tlx
Loading...
Searching...
No Matches
sample_sort_tools.hpp
Go to the documentation of this file.
1/*******************************************************************************
2 * tlx/sort/strings/sample_sort_tools.hpp
3 *
4 * Helpers for (Parallel) Super Scalar String Sample Sort
5 *
6 * See also Timo Bingmann, Andreas Eberle, and Peter Sanders. "Engineering
7 * parallel string sorting." Algorithmica 77.1 (2017): 235-286.
8 *
9 * Part of tlx - http://panthema.net/tlx
10 *
11 * Copyright (C) 2013-2019 Timo Bingmann <tb@panthema.net>
12 *
13 * All rights reserved. Published under the Boost Software License, Version 1.0
14 ******************************************************************************/
15
16#ifndef TLX_SORT_STRINGS_SAMPLE_SORT_TOOLS_HEADER
17#define TLX_SORT_STRINGS_SAMPLE_SORT_TOOLS_HEADER
18
20
22#include <tlx/die/core.hpp>
23#include <tlx/logger/core.hpp>
24#include <tlx/math/clz.hpp>
25#include <tlx/math/ctz.hpp>
27
28#include <algorithm>
29#include <cassert>
30#include <cstddef>
31#include <cstdint>
32
33namespace tlx {
34namespace sort_strings_detail {
35
36/******************************************************************************/
37
38//! represent binary digits of large integer datatypes
39template <typename Type>
40static inline
41std::string to_binary(Type v, const size_t width = (8 * sizeof(Type))) {
42 std::string str(width, ' ');
43 for (size_t i = 0; i < width; i++) {
44 str[width - i - 1] = (v & 1) ? '1' : '0';
45 v /= 2;
46 }
47 return str;
48}
49
50//! Class to transform in-order to level-order indexes in a perfect binary tree
51template <size_t TreeBits>
53 static const bool debug = false;
54
55 static const size_t treebits = TreeBits;
56 static const size_t num_nodes = (1 << treebits) - 1;
57
58 static inline unsigned int level_to_preorder(unsigned int id) {
59 assert(id > 0);
60 TLX_LOG << "index: " << id << " = " << to_binary(id);
61
62 static const int bitmask = num_nodes;
63
64 int hi = treebits - 32 + clz<std::uint32_t>(id);
65 TLX_LOG << "high zero: " << hi;
66
67 unsigned int bkt = ((id << (hi + 1)) & bitmask) | (1 << hi);
68
69 TLX_LOG << "bkt: " << bkt << " = " << to_binary(bkt);
70 return bkt;
71 }
72
73 static inline unsigned int pre_to_levelorder(unsigned int id) {
74 assert(id > 0);
75 TLX_LOG << "index: " << id << " = " << to_binary(id);
76
77 static const int bitmask = num_nodes;
78
79 int lo = ctz<std::uint32_t>(id) + 1;
80 TLX_LOG << "low zero: " << lo;
81
82 unsigned int bkt = ((id >> lo) & bitmask) | (1 << (treebits - lo));
83
84 TLX_LOG << "bkt: " << bkt << " = " << to_binary(bkt);
85 return bkt;
86 }
87
88 static inline void self_verify() {
89 for (size_t i = 1; i <= num_nodes; ++i) {
90 TLX_LOG << to_binary(i, treebits) << " -> ";
91
92 size_t id = level_to_preorder(i);
93 TLX_LOG << to_binary(id, treebits) << " -> ";
94
95 id = pre_to_levelorder(id);
97
98 TLX_LOG << "";
99 tlx_die_unequal(id, i);
100 }
101 }
102};
103
115}
116
117/******************************************************************************/
118
119//! Recursive TreeBuilder for full-descent and unrolled variants, constructs a
120//! both a pre-order and level-order array of splitters and the corresponding
121//! LCPs.
122template <typename key_type, size_t num_splitters>
124{
125 static const bool debug_splitter = false;
126
127public:
128 // build tree: splitter[num_splitters], splitter_tree[num_splitters + 1],
129 // and splitter_lcp[num_splitters + 1].
130 SSTreeBuilderPreAndLevelOrder(key_type splitter[num_splitters],
131 key_type tree[num_splitters + 1],
132 unsigned char splitter_lcp[num_splitters + 1],
133 const key_type* samples, size_t samplesize)
134 : splitter_(splitter), tree_(tree),
135 lcp_iter_(splitter_lcp), samples_(samples) {
136 key_type sentinel = 0;
137 recurse(samples, samples + samplesize, 1, sentinel);
138
139 assert(splitter_ == splitter + num_splitters);
140 assert(lcp_iter_ == splitter_lcp + num_splitters);
141 // overwrite sentinel lcp for first < everything bucket
142 splitter_lcp[0] &= 0x80;
143 // sentinel for > everything bucket
144 splitter_lcp[num_splitters] = 0;
145 }
146
147 ptrdiff_t snum(const key_type* s) const {
148 return static_cast<ptrdiff_t>(s - samples_);
149 }
150
151 key_type recurse(const key_type* lo, const key_type* hi,
152 unsigned int treeidx, key_type& rec_prevkey) {
154 << "rec_buildtree(" << snum(lo) << "," << snum(hi)
155 << ", treeidx=" << treeidx << ")";
156
157 // pick middle element as splitter
158 const key_type* mid = lo + static_cast<ptrdiff_t>(hi - lo) / 2;
159
161 << "tree[" << treeidx << "] = samples[" << snum(mid) << "] = "
162 << hexdump_type(*mid);
163
164 key_type mykey = tree_[treeidx] = *mid;
165#if 1
166 const key_type* midlo = mid;
167 while (lo < midlo && *(midlo - 1) == mykey) midlo--;
168
169 const key_type* midhi = mid;
170 while (midhi + 1 < hi && *midhi == mykey) midhi++;
171
172 if (midhi - midlo > 1)
173 TLX_LOG0 << "key range = [" << snum(midlo) << "," << snum(midhi) << ")";
174#else
175 const key_type* midlo = mid, * midhi = mid + 1;
176#endif
177 if (2 * treeidx < num_splitters)
178 {
179 key_type prevkey = recurse(lo, midlo, 2 * treeidx + 0, rec_prevkey);
180
181 key_type xorSplit = prevkey ^ mykey;
182
184 << " lcp: " << hexdump_type(prevkey)
185 << " XOR " << hexdump_type(mykey)
186 << " = " << hexdump_type(xorSplit)
187 << " - " << clz(xorSplit)
188 << " bits = " << clz(xorSplit) / 8
189 << " chars lcp";
190
191 *splitter_++ = mykey;
192
193 *lcp_iter_++ =
194 (clz(xorSplit) / 8) |
195 // marker for done splitters
196 ((mykey & 0xFF) ? 0 : 0x80);
197
198 return recurse(midhi, hi, 2 * treeidx + 1, mykey);
199 }
200 else
201 {
202 key_type xorSplit = rec_prevkey ^ mykey;
203
205 << " lcp: " << hexdump_type(rec_prevkey)
206 << " XOR " << hexdump_type(mykey)
207 << " = " << hexdump_type(xorSplit)
208 << " - " << clz(xorSplit)
209 << " bits = " << clz(xorSplit) / 8
210 << " chars lcp";
211
212 *splitter_++ = mykey;
213
214 *lcp_iter_++ =
215 (clz(xorSplit) / 8) |
216 // marker for done splitters
217 ((mykey & 0xFF) ? 0 : 0x80);
218
219 return mykey;
220 }
221 }
222
223private:
224 key_type* splitter_;
225 key_type* tree_;
226 unsigned char* lcp_iter_;
227 const key_type* samples_;
228};
229
230//! Recursive TreeBuilder for full-descent and unrolled variants, constructs
231//! only a level-order binary tree of splitters
232template <typename key_type, size_t num_splitters>
234{
235 static const bool debug_splitter = false;
236
237public:
238 //! build tree, sizes: splitter_tree[num_splitters + 1] and
239 SSTreeBuilderLevelOrder(key_type tree[num_splitters],
240 unsigned char splitter_lcp[num_splitters + 1],
241 const key_type* samples, size_t samplesize)
242 : tree_(tree),
243 lcp_iter_(splitter_lcp),
244 samples_(samples) {
245 key_type sentinel = 0;
246 recurse(samples, samples + samplesize, 1, sentinel);
247
248 assert(lcp_iter_ == splitter_lcp + num_splitters);
249 // overwrite sentinel lcp for first < everything bucket
250 splitter_lcp[0] &= 0x80;
251 // sentinel for > everything bucket
252 splitter_lcp[num_splitters] = 0;
253 }
254
255 ptrdiff_t snum(const key_type* s) const {
256 return static_cast<ptrdiff_t>(s - samples_);
257 }
258
259 key_type recurse(const key_type* lo, const key_type* hi,
260 unsigned int treeidx, key_type& rec_prevkey) {
262 << "rec_buildtree(" << snum(lo) << "," << snum(hi)
263 << ", treeidx=" << treeidx << ")";
264
265 // pick middle element as splitter
266 const key_type* mid = lo + static_cast<ptrdiff_t>(hi - lo) / 2;
267
269 << "tree[" << treeidx << "] = samples[" << snum(mid) << "] = "
270 << hexdump_type(*mid);
271
272 key_type mykey = tree_[treeidx] = *mid;
273#if 1
274 const key_type* midlo = mid;
275 while (lo < midlo && *(midlo - 1) == mykey) midlo--;
276
277 const key_type* midhi = mid;
278 while (midhi + 1 < hi && *midhi == mykey) midhi++;
279
280 if (midhi - midlo > 1)
281 TLX_LOG0 << "key range = [" << snum(midlo) << "," << snum(midhi) << ")";
282#else
283 const key_type* midlo = mid, * midhi = mid + 1;
284#endif
285 if (2 * treeidx < num_splitters)
286 {
287 key_type prevkey = recurse(lo, midlo, 2 * treeidx + 0, rec_prevkey);
288
289 key_type xorSplit = prevkey ^ mykey;
290
292 << " lcp: " << hexdump_type(prevkey)
293 << " XOR " << hexdump_type(mykey)
294 << " = " << hexdump_type(xorSplit)
295 << " - " << clz(xorSplit)
296 << " bits = " << clz(xorSplit) / 8
297 << " chars lcp";
298
299 *lcp_iter_++ =
300 (clz(xorSplit) / 8) |
301 // marker for done splitters
302 ((mykey & 0xFF) ? 0 : 0x80);
303
304 return recurse(midhi, hi, 2 * treeidx + 1, mykey);
305 }
306 else
307 {
308 key_type xorSplit = rec_prevkey ^ mykey;
309
311 << " lcp: " << hexdump_type(rec_prevkey)
312 << " XOR " << hexdump_type(mykey)
313 << " = " << hexdump_type(xorSplit)
314 << " - " << clz(xorSplit)
315 << " bits = " << clz(xorSplit) / 8
316 << " chars lcp";
317
318 *lcp_iter_++ =
319 (clz(xorSplit) / 8) |
320 // marker for done splitters
321 ((mykey & 0xFF) ? 0 : 0x80);
322
323 return mykey;
324 }
325 }
326
327private:
328 key_type* tree_;
329 unsigned char* lcp_iter_;
330 const key_type* samples_;
331};
332
333/******************************************************************************/
334
335//! Sample Sort Classification Tree Unrolled and Interleaved
336template <typename key_type, size_t TreeBits, size_t Rollout = 4>
338{
339public:
340 static const size_t treebits = TreeBits;
341 static const size_t num_splitters = (1 << treebits) - 1;
342
343 //! build tree and splitter array from sample
344 void build(key_type* samples, size_t samplesize,
345 unsigned char* splitter_lcp) {
347 splitter_, splitter_tree_, splitter_lcp,
348 samples, samplesize);
349 }
350
351 //! binary search on splitter array for bucket number
352 unsigned int find_bkt(const key_type& key) const {
353 // binary tree traversal without left branch
354
355 unsigned int i = 1;
356 while (i <= num_splitters) {
357 // in gcc-4.6.3 this produces a SETA, LEA sequence
358 i = 2 * i + (key <= splitter_tree_[i] ? 0 : 1);
359 }
360 i -= num_splitters + 1;
361
362 size_t b = i * 2; // < bucket
363 if (i < num_splitters && splitter_[i] == key) b += 1; // equal bucket
364
365 return b;
366 }
367
368 // in gcc-4.6.3 this produces a SETA, LEA sequence
369#define TLX_CLASSIFY_TREE_STEP \
370 for (size_t u = 0; u < Rollout; ++u) { \
371 i[u] = 2 * i[u] + (key[u] <= splitter_tree_[i[u]] ? 0 : 1); \
372 } \
373 TLX_ATTRIBUTE_FALLTHROUGH;
374
375 //! search in splitter tree for bucket number, unrolled for Rollout keys at
376 //! once.
378 const key_type key[Rollout], std::uint16_t obkt[Rollout]) const {
379 // binary tree traversal without left branch
380
381 unsigned int i[Rollout];
382 std::fill(i, i + Rollout, 1u);
383
384 switch (treebits)
385 {
386 default:
387 abort();
388 case 15:
390 case 14:
392 case 13:
394 case 12:
396 case 11:
398
399 case 10:
401 case 9:
403 case 8:
405 case 7:
407 case 6:
409
410 case 5:
412 case 4:
414 case 3:
416 case 2:
418 case 1:
420 }
421
422 for (size_t u = 0; u < Rollout; ++u)
423 i[u] -= num_splitters + 1;
424
425 for (size_t u = 0; u < Rollout; ++u) {
426 // < bucket
427 obkt[u] = i[u] * 2;
428 }
429
430 for (size_t u = 0; u < Rollout; ++u) {
431 // equal bucket
432 if (i[u] < num_splitters && splitter_[i[u]] == key[u])
433 obkt[u] += 1;
434 }
435 }
436
437#undef TLX_CLASSIFY_TREE_STEP
438
439 //! classify all strings in area by walking tree and saving bucket id
440 template <typename StringSet>
441 // __attribute__ ((optimize("unroll-all-loops")))
443 const StringSet& strset,
444 typename StringSet::Iterator begin, typename StringSet::Iterator end,
445 std::uint16_t* bktout, size_t depth) const {
446 while (begin != end)
447 {
448 if (begin + Rollout <= end)
449 {
450 key_type key[Rollout];
451 for (size_t u = 0; u < Rollout; ++u)
452 key[u] = get_key<key_type>(strset, begin[u], depth);
453
454 find_bkt_unroll(key, bktout);
455
456 begin += Rollout, bktout += Rollout;
457 }
458 else
459 {
460 key_type key = get_key<key_type>(strset, *begin++, depth);
461 *bktout++ = this->find_bkt(key);
462 }
463 }
464 }
465
466 //! return a splitter
467 key_type get_splitter(unsigned int i) const
468 { return splitter_[i]; }
469
470private:
473};
474
475/******************************************************************************/
476
477//! Sample Sort Classification Tree Unrolled with Equal Comparisons
478template <typename key_type, size_t TreeBits>
480{
481public:
482 static const size_t treebits = TreeBits;
483 static const size_t num_splitters = (1 << treebits) - 1;
484
485 //! build tree and splitter array from sample
486 void build(key_type* samples, size_t samplesize, unsigned char* splitter_lcp) {
488 splitter_tree_, splitter_lcp, samples, samplesize);
489 }
490
491#define TLX_CLASSIFY_TREE_STEP \
492 if (TLX_UNLIKELY(key == splitter_tree_[i])) { \
493 return \
494 2 * PerfectTreeCalculations<treebits>::level_to_preorder(i) - 1; \
495 } \
496 i = 2 * i + (key < splitter_tree_[i] ? 0 : 1); \
497 TLX_ATTRIBUTE_FALLTHROUGH;
498
499 //! binary search on splitter array for bucket number
500 unsigned int find_bkt(const key_type& key) const {
501 // binary tree traversal without left branch
502
503 unsigned int i = 1;
504
505 switch (treebits)
506 {
507 default:
508 abort();
509 case 15:
511 case 14:
513 case 13:
515 case 12:
517 case 11:
519
520 case 10:
522 case 9:
524 case 8:
526 case 7:
528 case 6:
530
531 case 5:
533 case 4:
535 case 3:
537 case 2:
539 case 1:
541 }
542
543 i -= num_splitters + 1;
544 return 2 * i; // < or > bucket
545 }
546
547#undef TLX_CLASSIFY_TREE_STEP
548
549 //! classify all strings in area by walking tree and saving bucket id
550 template <typename StringSet>
552 const StringSet& strset,
553 typename StringSet::Iterator begin, typename StringSet::Iterator end,
554 std::uint16_t* bktout, size_t depth) const {
555 while (begin != end)
556 {
557 key_type key = get_key<key_type>(strset, *begin++, depth);
558 *bktout++ = find_bkt(key);
559 }
560 }
561
562 //! return a splitter
563 key_type get_splitter(unsigned int i) const {
564 return splitter_tree_[
566 }
567
568private:
570};
571
572/******************************************************************************/
573
574//! Sample Sort Classification Tree Unrolled, Interleaved, and with Perfect Tree
575//! Index Calculations
576template <typename key_type, size_t TreeBits, unsigned Rollout = 4>
578{
579public:
580 static const size_t treebits = TreeBits;
581 static const size_t num_splitters = (1 << treebits) - 1;
582
583 //! build tree and splitter array from sample
584 void build(key_type* samples, size_t samplesize,
585 unsigned char* splitter_lcp) {
587 splitter_tree_, splitter_lcp, samples, samplesize);
588 }
589
590 //! binary search on splitter array for bucket number
591 unsigned int find_bkt(const key_type& key) const {
592 // binary tree traversal without left branch
593
594 unsigned int i = 1;
595
596 while (i <= num_splitters) {
597 // in gcc-4.6.3 this produces a SETA, LEA sequence
598 i = 2 * i + (key <= splitter_tree_[i] ? 0 : 1);
599 }
600
601 i -= num_splitters + 1;
602
603 // < bucket
604 size_t b = i * 2;
605 if (i < num_splitters && get_splitter(i) == key) {
606 // equal bucket
607 b += 1;
608 }
609
610 return b;
611 }
612
613 // in gcc-4.6.3 this produces a SETA, LEA sequence
614#define TLX_CLASSIFY_TREE_STEP \
615 for (size_t u = 0; u < Rollout; ++u) { \
616 i[u] = 2 * i[u] + (key[u] <= splitter_tree_[i[u]] ? 0 : 1); \
617 } \
618 TLX_ATTRIBUTE_FALLTHROUGH;
619
620 //! search in splitter tree for bucket number, unrolled for Rollout keys at
621 //! once.
623 const key_type key[Rollout], std::uint16_t obkt[Rollout]) const {
624 // binary tree traversal without left branch
625
626 unsigned int i[Rollout];
627 std::fill(i + 0, i + Rollout, 1);
628
629 switch (treebits)
630 {
631 default:
632 abort();
633
634 case 15:
636 case 14:
638 case 13:
640 case 12:
642 case 11:
644
645 case 10:
647 case 9:
649 case 8:
651 case 7:
653 case 6:
655
656 case 5:
658 case 4:
660 case 3:
662 case 2:
664 case 1:
666 }
667
668 for (unsigned u = 0; u < Rollout; ++u)
669 i[u] -= num_splitters + 1;
670
671 for (unsigned u = 0; u < Rollout; ++u) {
672 // < bucket
673 obkt[u] = i[u] * 2;
674 }
675
676 for (unsigned u = 0; u < Rollout; ++u) {
677 // equal bucket
678 if (i[u] < num_splitters && get_splitter(i[u]) == key[u])
679 obkt[u] += 1;
680 }
681 }
682
683#undef TLX_CLASSIFY_TREE_STEP
684
685 //! classify all strings in area by walking tree and saving bucket id
686 template <typename StringSet>
687 // __attribute__ ((optimize("unroll-all-loops")))
689 const StringSet& strset,
690 typename StringSet::Iterator begin, typename StringSet::Iterator end,
691 std::uint16_t* bktout, size_t depth) const {
692 while (begin != end)
693 {
694 if (begin + Rollout <= end)
695 {
696 key_type key[Rollout];
697 for (size_t u = 0; u < Rollout; ++u)
698 key[u] = get_key<key_type>(strset, begin[u], depth);
699
700 find_bkt_unroll(key, bktout);
701
702 begin += Rollout, bktout += Rollout;
703 }
704 else
705 {
706 key_type key = get_key<key_type>(strset, *begin++, depth);
707 *bktout++ = this->find_bkt(key);
708 }
709 }
710 }
711
712 //! return a splitter
713 key_type get_splitter(unsigned int i) const {
714 return splitter_tree_[
716 }
717
718private:
720};
721
722/******************************************************************************/
723
724} // namespace sort_strings_detail
725} // namespace tlx
726
727#endif // !TLX_SORT_STRINGS_SAMPLE_SORT_TOOLS_HEADER
728
729/******************************************************************************/
Sample Sort Classification Tree Unrolled with Equal Comparisons.
void build(key_type *samples, size_t samplesize, unsigned char *splitter_lcp)
build tree and splitter array from sample
key_type get_splitter(unsigned int i) const
return a splitter
void classify(const StringSet &strset, typename StringSet::Iterator begin, typename StringSet::Iterator end, std::uint16_t *bktout, size_t depth) const
classify all strings in area by walking tree and saving bucket id
unsigned int find_bkt(const key_type &key) const
binary search on splitter array for bucket number
Sample Sort Classification Tree Unrolled, Interleaved, and with Perfect Tree Index Calculations.
void find_bkt_unroll(const key_type key[Rollout], std::uint16_t obkt[Rollout]) const
search in splitter tree for bucket number, unrolled for Rollout keys at once.
void build(key_type *samples, size_t samplesize, unsigned char *splitter_lcp)
build tree and splitter array from sample
key_type get_splitter(unsigned int i) const
return a splitter
void classify(const StringSet &strset, typename StringSet::Iterator begin, typename StringSet::Iterator end, std::uint16_t *bktout, size_t depth) const
classify all strings in area by walking tree and saving bucket id
unsigned int find_bkt(const key_type &key) const
binary search on splitter array for bucket number
Sample Sort Classification Tree Unrolled and Interleaved.
void find_bkt_unroll(const key_type key[Rollout], std::uint16_t obkt[Rollout]) const
search in splitter tree for bucket number, unrolled for Rollout keys at once.
void build(key_type *samples, size_t samplesize, unsigned char *splitter_lcp)
build tree and splitter array from sample
key_type get_splitter(unsigned int i) const
return a splitter
void classify(const StringSet &strset, typename StringSet::Iterator begin, typename StringSet::Iterator end, std::uint16_t *bktout, size_t depth) const
classify all strings in area by walking tree and saving bucket id
unsigned int find_bkt(const key_type &key) const
binary search on splitter array for bucket number
Recursive TreeBuilder for full-descent and unrolled variants, constructs only a level-order binary tr...
SSTreeBuilderLevelOrder(key_type tree[num_splitters], unsigned char splitter_lcp[num_splitters+1], const key_type *samples, size_t samplesize)
build tree, sizes: splitter_tree[num_splitters + 1] and
key_type recurse(const key_type *lo, const key_type *hi, unsigned int treeidx, key_type &rec_prevkey)
Recursive TreeBuilder for full-descent and unrolled variants, constructs a both a pre-order and level...
key_type recurse(const key_type *lo, const key_type *hi, unsigned int treeidx, key_type &rec_prevkey)
SSTreeBuilderPreAndLevelOrder(key_type splitter[num_splitters], key_type tree[num_splitters+1], unsigned char splitter_lcp[num_splitters+1], const key_type *samples, size_t samplesize)
#define tlx_die_unequal(X, Y)
Check that X == Y or die miserably, but output the values of X and Y for better debugging.
Definition core.hpp:132
unsigned clz(Integral x)
std::string hexdump_type(const Type &t)
Dump a (binary) item as a sequence of uppercase hexadecimal pairs.
Definition hexdump.hpp:52
#define TLX_LOGC(cond)
Explicitly specify the condition for logging.
Definition core.hpp:137
#define TLX_LOG
Default logging method: output if the local debug variable is true.
Definition core.hpp:141
#define TLX_LOG0
Override default output: never or always output log.
Definition core.hpp:144
static void perfect_tree_calculations_self_verify()
static std::string to_binary(Type v, const size_t width=(8 *sizeof(Type)))
represent binary digits of large integer datatypes
#define TLX_CLASSIFY_TREE_STEP
Class to transform in-order to level-order indexes in a perfect binary tree.
static unsigned int level_to_preorder(unsigned int id)
static unsigned int pre_to_levelorder(unsigned int id)