tlx
Loading...
Searching...
No Matches
string_set.hpp
Go to the documentation of this file.
1/*******************************************************************************
2 * tlx/sort/strings/string_set.hpp
3 *
4 * Implementations of a StringSet concept. This is an internal implementation
5 * header, see tlx/sort/strings.hpp for public front-end functions.
6 *
7 * A StringSet abstracts from arrays of strings, we provide four abstractions:
8 *
9 * - UCharStringSet: (const) unsigned char**
10 * - StdStringSet: std::string*
11 * - UPtrStdStringSet: std::unique_ptr<std::string>*
12 * - StringSuffixSet: suffix sorting indexes of a std::string text
13 *
14 * Part of tlx - http://panthema.net/tlx
15 *
16 * Copyright (C) 2015-2019 Timo Bingmann <tb@panthema.net>
17 *
18 * All rights reserved. Published under the Boost Software License, Version 1.0
19 ******************************************************************************/
20
21#ifndef TLX_SORT_STRINGS_STRING_SET_HEADER
22#define TLX_SORT_STRINGS_STRING_SET_HEADER
23
24#include <cassert>
25#include <cstdint>
26#include <memory>
27#include <string>
28#include <utility>
29#include <vector>
30
31#include <tlx/logger/core.hpp>
32#include <tlx/math/bswap.hpp>
34
35namespace tlx {
36
37//! \addtogroup tlx_sort
38//! \{
39
40namespace sort_strings_detail {
41
42/******************************************************************************/
43
44/*!
45 * Base class for common string set functions, included via CRTP.
46 */
47template <typename StringSet, typename Traits>
49{
50public:
51 //! index-based array access (readable and writable) to String objects.
52 typename Traits::String& at(size_t i) const {
53 const StringSet& ss = *static_cast<const StringSet*>(this);
54 return *(ss.begin() + i);
55 }
56
57 //! \name CharIterator Comparisons
58 //! \{
59
60 //! check equality of two strings a and b at char iterators ai and bi.
61 bool is_equal(const typename Traits::String& a,
62 const typename Traits::CharIterator& ai,
63 const typename Traits::String& b,
64 const typename Traits::CharIterator& bi) const {
65 const StringSet& ss = *static_cast<const StringSet*>(this);
66 return !ss.is_end(a, ai) && !ss.is_end(b, bi) && (*ai == *bi);
67 }
68
69 //! check if string a is less or equal to string b at iterators ai and bi.
70 bool is_less(const typename Traits::String& a,
71 const typename Traits::CharIterator& ai,
72 const typename Traits::String& b,
73 const typename Traits::CharIterator& bi) const {
74 const StringSet& ss = *static_cast<const StringSet*>(this);
75 return ss.is_end(a, ai) ||
76 (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai < *bi);
77 }
78
79 //! check if string a is less or equal to string b at iterators ai and bi.
80 bool is_leq(const typename Traits::String& a,
81 const typename Traits::CharIterator& ai,
82 const typename Traits::String& b,
83 const typename Traits::CharIterator& bi) const {
84 const StringSet& ss = *static_cast<const StringSet*>(this);
85 return ss.is_end(a, ai) ||
86 (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai <= *bi);
87 }
88
89 //! \}
90
91 //! \name Character Extractors
92 //! \{
93
94 typename Traits::Char
95 get_char(const typename Traits::String& s, size_t depth) const {
96 const StringSet& ss = *static_cast<const StringSet*>(this);
97 return *ss.get_chars(s, depth);
98 }
99
100 //! Return up to 1 characters of string s at iterator i packed into a
101 //! uint8_t (only works correctly for 8-bit characters)
102 std::uint8_t get_uint8(
103 const typename Traits::String& s, typename Traits::CharIterator i) const {
104 const StringSet& ss = *static_cast<const StringSet*>(this);
105
106 if (ss.is_end(s, i)) return 0;
107 return std::uint8_t(*i);
108 }
109
110 //! Return up to 2 characters of string s at iterator i packed into a
111 //! uint16_t (only works correctly for 8-bit characters)
112 std::uint16_t get_uint16(
113 const typename Traits::String& s, typename Traits::CharIterator i) const {
114 const StringSet& ss = *static_cast<const StringSet*>(this);
115
116 std::uint16_t v = 0;
117 if (ss.is_end(s, i)) return v;
118 v = (std::uint16_t(*i) << 8);
119 ++i;
120 if (ss.is_end(s, i)) return v;
121 v |= (std::uint16_t(*i) << 0);
122 return v;
123 }
124
125 //! Return up to 4 characters of string s at iterator i packed into a
126 //! uint32_t (only works correctly for 8-bit characters)
127 std::uint32_t get_uint32(
128 const typename Traits::String& s, typename Traits::CharIterator i) const {
129 const StringSet& ss = *static_cast<const StringSet*>(this);
130
131 std::uint32_t v = 0;
132 if (ss.is_end(s, i)) return v;
133 v = (std::uint32_t(*i) << 24);
134 ++i;
135 if (ss.is_end(s, i)) return v;
136 v |= (std::uint32_t(*i) << 16);
137 ++i;
138 if (ss.is_end(s, i)) return v;
139 v |= (std::uint32_t(*i) << 8);
140 ++i;
141 if (ss.is_end(s, i)) return v;
142 v |= (std::uint32_t(*i) << 0);
143 return v;
144 }
145
146 //! Return up to 8 characters of string s at iterator i packed into a
147 //! uint64_t (only works correctly for 8-bit characters)
148 std::uint64_t get_uint64(
149 const typename Traits::String& s, typename Traits::CharIterator i) const {
150 const StringSet& ss = *static_cast<const StringSet*>(this);
151
152 std::uint64_t v = 0;
153 if (ss.is_end(s, i)) return v;
154 v = (std::uint64_t(*i) << 56);
155 ++i;
156 if (ss.is_end(s, i)) return v;
157 v |= (std::uint64_t(*i) << 48);
158 ++i;
159 if (ss.is_end(s, i)) return v;
160 v |= (std::uint64_t(*i) << 40);
161 ++i;
162 if (ss.is_end(s, i)) return v;
163 v |= (std::uint64_t(*i) << 32);
164 ++i;
165 if (ss.is_end(s, i)) return v;
166 v |= (std::uint64_t(*i) << 24);
167 ++i;
168 if (ss.is_end(s, i)) return v;
169 v |= (std::uint64_t(*i) << 16);
170 ++i;
171 if (ss.is_end(s, i)) return v;
172 v |= (std::uint64_t(*i) << 8);
173 ++i;
174 if (ss.is_end(s, i)) return v;
175 v |= (std::uint64_t(*i) << 0);
176 return v;
177 }
178
179 std::uint8_t get_uint8(const typename Traits::String& s, size_t depth) const {
180 const StringSet& ss = *static_cast<const StringSet*>(this);
181 return get_uint8(s, ss.get_chars(s, depth));
182 }
183
184 std::uint16_t get_uint16(const typename Traits::String& s, size_t depth) const {
185 const StringSet& ss = *static_cast<const StringSet*>(this);
186 return get_uint16(s, ss.get_chars(s, depth));
187 }
188
189 std::uint32_t get_uint32(const typename Traits::String& s, size_t depth) const {
190 const StringSet& ss = *static_cast<const StringSet*>(this);
191 return get_uint32(s, ss.get_chars(s, depth));
192 }
193
194 std::uint64_t get_uint64(const typename Traits::String& s, size_t depth) const {
195 const StringSet& ss = *static_cast<const StringSet*>(this);
196 return get_uint64(s, ss.get_chars(s, depth));
197 }
198
199 //! \}
200
201 //! Subset this string set using index range.
202 StringSet subi(size_t begin, size_t end) const {
203 const StringSet& ss = *static_cast<const StringSet*>(this);
204 return ss.sub(ss.begin() + begin, ss.begin() + end);
205 }
206
207 bool check_order(const typename Traits::String& s1,
208 const typename Traits::String& s2) const {
209 const StringSet& ss = *static_cast<const StringSet*>(this);
210
211 typename StringSet::CharIterator c1 = ss.get_chars(s1, 0);
212 typename StringSet::CharIterator c2 = ss.get_chars(s2, 0);
213
214 while (ss.is_equal(s1, c1, s2, c2))
215 ++c1, ++c2;
216
217 return ss.is_leq(s1, c1, s2, c2);
218 }
219
220 bool check_order() const {
221 const StringSet& ss = *static_cast<const StringSet*>(this);
222
223 for (typename Traits::Iterator pi = ss.begin();
224 pi + 1 != ss.end(); ++pi)
225 {
226 if (!check_order(*pi, *(pi + 1))) {
227 TLX_LOG1 << "check_order() failed at position " << pi - ss.begin();
228 return false;
229 }
230 }
231 return true;
232 }
233
234 void print() const {
235 const StringSet& ss = *static_cast<const StringSet*>(this);
236 size_t i = 0;
237 for (typename Traits::Iterator pi = ss.begin(); pi != ss.end(); ++pi)
238 {
239 TLX_LOG1 << "[" << i++ << "] = " << *pi
240 << " = " << ss.get_string(*pi, 0);
241 }
242 }
243};
244
245template <typename Type, typename StringSet>
246inline
247typename enable_if<sizeof(Type) == 4, std::uint32_t>::type
248get_key(const StringSet& strset,
249 const typename StringSet::String& s, size_t depth) {
250 return strset.get_uint32(s, depth);
251}
252
253template <typename Type, typename StringSet>
254inline
255typename enable_if<sizeof(Type) == 8, std::uint64_t>::type
256get_key(const StringSet& strset,
257 const typename StringSet::String& s, size_t depth) {
258 return strset.get_uint64(s, depth);
259}
260
261template <typename Type, typename StringSet>
262inline
263Type get_key_at(const StringSet& strset, size_t idx, size_t depth) {
264 return get_key<Type>(strset, strset.at(idx), depth);
265}
266
267/******************************************************************************/
268
269/*!
270 * Traits class implementing StringSet concept for char* and unsigned char*
271 * strings.
272 */
273template <typename CharType>
275{
276public:
277 //! exported alias for character type
278 typedef CharType Char;
279
280 //! String reference: pointer to first character
281 typedef Char* String;
282
283 //! Iterator over string references: pointer over pointers
284 typedef String* Iterator;
285
286 //! iterator of characters in a string
287 typedef const Char* CharIterator;
288
289 //! exported alias for assumed string container
290 typedef std::pair<Iterator, size_t> Container;
291};
292
293/*!
294 * Class implementing StringSet concept for char* and unsigned char* strings.
295 */
296template <typename CharType>
298 : public GenericCharStringSetTraits<CharType>,
299 public StringSetBase<GenericCharStringSet<CharType>,
300 GenericCharStringSetTraits<CharType> >
301{
302public:
304
305 typedef typename Traits::Char Char;
306 typedef typename Traits::String String;
307 typedef typename Traits::Iterator Iterator;
309 typedef typename Traits::Container Container;
310
311 //! Construct from begin and end string pointers
313 : begin_(begin), end_(end)
314 { }
315
316 //! Construct from a string container
318 : begin_(c.first), end_(c.first + c.second)
319 { }
320
321 //! Return size of string array
322 size_t size() const { return end_ - begin_; }
323 //! Iterator representing first String position
324 Iterator begin() const { return begin_; }
325 //! Iterator representing beyond last String position
326 Iterator end() const { return end_; }
327
328 //! Iterator-based array access (readable and writable) to String objects.
330 { return *i; }
331
332 //! Return CharIterator for referenced string, which belong to this set.
333 CharIterator get_chars(const String& s, size_t depth) const
334 { return s + depth; }
335
336 //! Returns true if CharIterator is at end of the given String
337 bool is_end(const String&, const CharIterator& i) const
338 { return (*i == 0); }
339
340 //! Return complete string (for debugging purposes)
341 std::string get_string(const String& s, size_t depth = 0) const
342 { return std::string(reinterpret_cast<const char*>(s) + depth); }
343
344 //! Subset this string set using iterator range.
346 { return GenericCharStringSet(begin, end); }
347
348 //! Allocate a new temporary string container with n empty Strings
349 static Container allocate(size_t n)
350 { return std::make_pair(new String[n], n); }
351
352 //! Deallocate a temporary string container
353 static void deallocate(Container& c)
354 { delete[] c.first; c.first = nullptr; }
355
356protected:
357 //! array of string pointers
359};
360
363
366
367/******************************************************************************/
368
369/*!
370 * Class implementing StringSet concept for a std::string objects.
371 */
373{
374public:
375 //! exported alias for character type
376 typedef std::uint8_t Char;
377
378 //! String reference: std::string, which should be reference counted.
379 typedef std::string String;
380
381 //! Iterator over string references: pointer to std::string.
382 typedef String* Iterator;
383
384 //! iterator of characters in a string
385 typedef const Char* CharIterator;
386
387 //! exported alias for assumed string container
388 typedef std::pair<Iterator, size_t> Container;
389};
390
391/*!
392 * Class implementing StringSet concept for arrays of std::string objects.
393 */
395 : public StdStringSetTraits,
396 public StringSetBase<StdStringSet, StdStringSetTraits>
397{
398public:
399 //! Construct from begin and end string pointers
401 : begin_(begin), end_(end)
402 { }
403
404 //! Construct from a string container
406 : begin_(c.first), end_(c.first + c.second)
407 { }
408
409 //! Return size of string array
410 size_t size() const { return end_ - begin_; }
411 //! Iterator representing first String position
412 Iterator begin() const { return begin_; }
413 //! Iterator representing beyond last String position
414 Iterator end() const { return end_; }
415
416 //! Array access (readable and writable) to String objects.
417 String& operator [] (const Iterator& i) const
418 { return *i; }
419
420 //! Return CharIterator for referenced string, which belongs to this set.
421 CharIterator get_chars(const String& s, size_t depth) const
422 { return reinterpret_cast<CharIterator>(s.data()) + depth; }
423
424 //! Returns true if CharIterator is at end of the given String
425 bool is_end(const String& s, const CharIterator& i) const
426 { return (i >= reinterpret_cast<CharIterator>(s.data()) + s.size()); }
427
428 //! Return complete string (for debugging purposes)
429 std::string get_string(const String& s, size_t depth = 0) const
430 { return s.substr(depth); }
431
432 //! Subset this string set using iterator range.
434 { return StdStringSet(begin, end); }
435
436 //! Allocate a new temporary string container with n empty Strings
437 static Container allocate(size_t n)
438 { return std::make_pair(new String[n], n); }
439
440 //! Deallocate a temporary string container
441 static void deallocate(Container& c)
442 { delete[] c.first; c.first = nullptr; }
443
444protected:
445 //! pointers to std::string objects
447};
448
449/******************************************************************************/
450
451/*!
452 * Class implementing StringSet concept for a std::unique_ptr<std::string
453 * objects, which are non-copyable.
454 */
456{
457public:
458 //! exported alias for character type
459 typedef std::uint8_t Char;
460
461 //! String reference: std::string, which should be reference counted.
462 typedef std::unique_ptr<std::string> String;
463
464 //! Iterator over string references: using std::vector's iterator
465 typedef String* Iterator;
466
467 //! iterator of characters in a string
468 typedef const Char* CharIterator;
469
470 //! exported alias for assumed string container
471 typedef std::pair<Iterator, size_t> Container;
472};
473
474/*!
475 * Class implementing StringSet concept for a std::vector containing std::string
476 * objects.
477 */
479 : public UPtrStdStringSetTraits,
480 public StringSetBase<UPtrStdStringSet, UPtrStdStringSetTraits>
481{
482public:
483 //! Construct from begin and end string pointers
485 : begin_(begin), end_(end)
486 { }
487
488 //! Construct from a string container
490 : begin_(c.first), end_(c.first + c.second)
491 { }
492
493 //! Return size of string array
494 size_t size() const { return end_ - begin_; }
495 //! Iterator representing first String position
496 Iterator begin() const { return begin_; }
497 //! Iterator representing beyond last String position
498 Iterator end() const { return end_; }
499
500 //! Array access (readable and writable) to String objects.
501 String& operator [] (const Iterator& i) const
502 { return *i; }
503
504 //! Return CharIterator for referenced string, which belongs to this set.
505 CharIterator get_chars(const String& s, size_t depth) const
506 { return reinterpret_cast<CharIterator>(s->data()) + depth; }
507
508 //! Returns true if CharIterator is at end of the given String
509 bool is_end(const String& s, const CharIterator& i) const
510 { return (i >= reinterpret_cast<CharIterator>(s->data()) + s->size()); }
511
512 //! Return complete string (for debugging purposes)
513 std::string get_string(const String& s, size_t depth = 0) const
514 { return s->substr(depth); }
515
516 //! Subset this string set using iterator range.
518 { return UPtrStdStringSet(begin, end); }
519
520 //! Allocate a new temporary string container with n empty Strings
521 static Container allocate(size_t n)
522 { return std::make_pair(new String[n], n); }
523
524 //! Deallocate a temporary string container
525 static void deallocate(Container& c)
526 { delete[] c.first; c.first = nullptr; }
527
528 void print() const {
529 size_t i = 0;
530 for (Iterator pi = begin(); pi != end(); ++pi)
531 {
532 TLX_LOG1 << "[" << i++ << "] = " << pi->get()
533 << " = " << get_string(*pi, 0);
534 }
535 }
536
537protected:
538 //! vector of std::string objects
540};
541
542/******************************************************************************/
543
544/*!
545 * Class implementing StringSet concept for suffix sorting indexes of a
546 * std::string text object.
547 */
549{
550public:
551 //! exported alias for assumed string container
552 typedef std::string Text;
553
554 //! exported alias for character type
555 typedef std::uint8_t Char;
556
557 //! String reference: suffix index of the text.
558 typedef typename Text::size_type String;
559
560 //! Iterator over string references: using std::vector's iterator over
561 //! suffix array vector
562 typedef typename std::vector<String>::iterator Iterator;
563
564 //! iterator of characters in a string
565 typedef const Char* CharIterator;
566
567 //! exported alias for assumed string container
568 typedef std::pair<Text, std::vector<String> > Container;
569};
570
571/*!
572 * Class implementing StringSet concept for suffix sorting indexes of a
573 * std::string text object.
574 */
576 : public StringSuffixSetTraits,
577 public StringSetBase<StringSuffixSet, StringSuffixSetTraits>
578{
579public:
580 //! Construct from begin and end string pointers
582 const Iterator& begin, const Iterator& end)
583 : text_(&text),
585 { }
586
587 //! Initializing constructor which fills output vector sa with indices.
588 static StringSuffixSet
589 Initialize(const Text& text, std::vector<String>& sa) {
590 sa.resize(text.size());
591 for (size_t i = 0; i < text.size(); ++i)
592 sa[i] = i;
593 return StringSuffixSet(text, sa.begin(), sa.end());
594 }
595
596 //! Return size of string array
597 size_t size() const { return end_ - begin_; }
598 //! Iterator representing first String position
599 Iterator begin() const { return begin_; }
600 //! Iterator representing beyond last String position
601 Iterator end() const { return end_; }
602
603 //! Array access (readable and writable) to String objects.
604 String& operator [] (const Iterator& i) const
605 { return *i; }
606
607 //! Return CharIterator for referenced string, which belongs to this set.
608 CharIterator get_chars(const String& s, size_t depth) const
609 { return reinterpret_cast<CharIterator>(text_->data()) + s + depth; }
610
611 //! Returns true if CharIterator is at end of the given String
612 bool is_end(const String&, const CharIterator& i) const
613 { return (i >= reinterpret_cast<CharIterator>(text_->data()) + text_->size()); }
614
615 //! Return complete string (for debugging purposes)
616 std::string get_string(const String& s, size_t depth = 0) const
617 { return text_->substr(s + depth); }
618
619 //! Subset this string set using iterator range.
621 { return StringSuffixSet(*text_, begin, end); }
622
623 //! Allocate a new temporary string container with n empty Strings
624 Container allocate(size_t n) const
625 { return std::make_pair(*text_, std::vector<String>(n)); }
626
627 //! Deallocate a temporary string container
628 static void deallocate(Container& c)
629 { std::vector<String> v; v.swap(c.second); }
630
631 //! Construct from a string container
633 : text_(&c.first),
634 begin_(c.second.begin()), end_(c.second.end())
635 { }
636
637protected:
638 //! reference to base text
639 const Text* text_;
640
641 //! iterators inside the output suffix array.
643};
644
645/******************************************************************************/
646
647} // namespace sort_strings_detail
648
649//! \}
650
651} // namespace tlx
652
653#endif // !TLX_SORT_STRINGS_STRING_SET_HEADER
654
655/******************************************************************************/
Traits class implementing StringSet concept for char* and unsigned char* strings.
Char * String
String reference: pointer to first character.
const Char * CharIterator
iterator of characters in a string
String * Iterator
Iterator over string references: pointer over pointers.
CharType Char
exported alias for character type
std::pair< Iterator, size_t > Container
exported alias for assumed string container
Class implementing StringSet concept for char* and unsigned char* strings.
GenericCharStringSetTraits< CharType > Traits
Iterator begin() const
Iterator representing first String position.
size_t size() const
Return size of string array.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belong to this set.
static void deallocate(Container &c)
Deallocate a temporary string container.
String & operator[](Iterator i) const
Iterator-based array access (readable and writable) to String objects.
GenericCharStringSet(const Container &c)
Construct from a string container.
GenericCharStringSet(Iterator begin, Iterator end)
Construct from begin and end string pointers.
Iterator end() const
Iterator representing beyond last String position.
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
GenericCharStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Iterator begin_
array of string pointers
Class implementing StringSet concept for a std::string objects.
const Char * CharIterator
iterator of characters in a string
String * Iterator
Iterator over string references: pointer to std::string.
std::pair< Iterator, size_t > Container
exported alias for assumed string container
std::string String
String reference: std::string, which should be reference counted.
std::uint8_t Char
exported alias for character type
Class implementing StringSet concept for arrays of std::string objects.
Iterator begin() const
Iterator representing first String position.
size_t size() const
Return size of string array.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
static void deallocate(Container &c)
Deallocate a temporary string container.
StdStringSet(Container &c)
Construct from a string container.
Iterator end() const
Iterator representing beyond last String position.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
StdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
StdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
Iterator begin_
pointers to std::string objects
String & operator[](const Iterator &i) const
Array access (readable and writable) to String objects.
Base class for common string set functions, included via CRTP.
Traits::String & at(size_t i) const
index-based array access (readable and writable) to String objects.
std::uint8_t get_uint8(const typename Traits::String &s, size_t depth) const
bool is_less(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
std::uint64_t get_uint64(const typename Traits::String &s, size_t depth) const
std::uint16_t get_uint16(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 2 characters of string s at iterator i packed into a uint16_t (only works correctly for ...
bool is_equal(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check equality of two strings a and b at char iterators ai and bi.
Traits::Char get_char(const typename Traits::String &s, size_t depth) const
StringSet subi(size_t begin, size_t end) const
Subset this string set using index range.
bool check_order(const typename Traits::String &s1, const typename Traits::String &s2) const
bool is_leq(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
std::uint16_t get_uint16(const typename Traits::String &s, size_t depth) const
std::uint8_t get_uint8(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 1 characters of string s at iterator i packed into a uint8_t (only works correctly for 8...
std::uint64_t get_uint64(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 8 characters of string s at iterator i packed into a uint64_t (only works correctly for ...
std::uint32_t get_uint32(const typename Traits::String &s, size_t depth) const
std::uint32_t get_uint32(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 4 characters of string s at iterator i packed into a uint32_t (only works correctly for ...
Class implementing StringSet concept for suffix sorting indexes of a std::string text object.
std::string Text
exported alias for assumed string container
std::pair< Text, std::vector< String > > Container
exported alias for assumed string container
Text::size_type String
String reference: suffix index of the text.
const Char * CharIterator
iterator of characters in a string
std::vector< String >::iterator Iterator
Iterator over string references: using std::vector's iterator over suffix array vector.
std::uint8_t Char
exported alias for character type
Class implementing StringSet concept for suffix sorting indexes of a std::string text object.
Iterator begin() const
Iterator representing first String position.
size_t size() const
Return size of string array.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
static void deallocate(Container &c)
Deallocate a temporary string container.
Iterator end() const
Iterator representing beyond last String position.
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
StringSuffixSet(const Text &text, const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
StringSuffixSet(Container &c)
Construct from a string container.
Container allocate(size_t n) const
Allocate a new temporary string container with n empty Strings.
const Text * text_
reference to base text
Iterator begin_
iterators inside the output suffix array.
StringSuffixSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
static StringSuffixSet Initialize(const Text &text, std::vector< String > &sa)
Initializing constructor which fills output vector sa with indices.
String & operator[](const Iterator &i) const
Array access (readable and writable) to String objects.
Class implementing StringSet concept for a std::unique_ptr<std::string objects, which are non-copyabl...
const Char * CharIterator
iterator of characters in a string
String * Iterator
Iterator over string references: using std::vector's iterator.
std::unique_ptr< std::string > String
String reference: std::string, which should be reference counted.
std::pair< Iterator, size_t > Container
exported alias for assumed string container
std::uint8_t Char
exported alias for character type
Class implementing StringSet concept for a std::vector containing std::string objects.
Iterator begin() const
Iterator representing first String position.
size_t size() const
Return size of string array.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
UPtrStdStringSet(Container &c)
Construct from a string container.
static void deallocate(Container &c)
Deallocate a temporary string container.
Iterator end() const
Iterator representing beyond last String position.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
UPtrStdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
UPtrStdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Iterator begin_
vector of std::string objects
String & operator[](const Iterator &i) const
Array access (readable and writable) to String objects.
#define TLX_LOG1
Definition core.hpp:145
Type get_key_at(const StringSet &strset, size_t idx, size_t depth)
GenericCharStringSet< const char > CCharStringSet
enable_if< sizeof(Type)==4, std::uint32_t >::type get_key(const StringSet &strset, const typename StringSet::String &s, size_t depth)
GenericCharStringSet< char > CharStringSet
GenericCharStringSet< const unsigned char > CUCharStringSet
GenericCharStringSet< unsigned char > UCharStringSet
SFINAE enable_if – copy of std::enable_if<> with less extra cruft.
Definition enable_if.hpp:22