qxLib
string_utils.h
Go to the documentation of this file.
1 /**
2 
3  @file string_utils.h
4  @author Khrapov
5  @date 17.10.2020
6  @copyright © Nick Khrapov, 2021. All right reserved.
7 
8 **/
9 #pragma once
10 
11 #include <qx/macros/config.h>
12 #include <qx/typedefs.h>
13 
14 namespace qx
15 {
16 
17 /**
18  @brief djb2a hash
19  @tparam value_t - char type
20  @param pszStr - string for hashing
21  @param nSeed - seed for hashing
22  @param nLen - string length
23  @retval - 32bit unsigned value
24 **/
25 template<class value_t>
26 constexpr size_t djb2a_hash(const value_t* pszStr, size_t nSeed, size_t nLen)
27 {
28  size_t nHash = nSeed;
29 
30  for (size_t i = 0; i < nLen; ++i)
31  nHash = nHash * 33 ^ pszStr[i];
32 
33  return nHash;
34 }
35 
36 /**
37  @brief djb2a hash
38  @tparam value_t - char type
39  @param pszStr - string for hashing (zero terminated)
40  @param nSeed - seed for hashing
41  @retval - 32bit unsigned value
42 **/
43 template<class value_t>
44 constexpr size_t djb2a_hash(const value_t* pszStr, size_t nSeed)
45 {
46  size_t nHash = nSeed;
47 
48  for (size_t i = 0; pszStr[i]; ++i)
49  nHash = nHash * 33 ^ pszStr[i];
50 
51  return nHash;
52 }
53 
54 /**
55  @brief Murmur nHash
56  @details https://en.wikipedia.org/wiki/MurmurHash
57  https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed
58  @tparam value_t - char type
59  @param pStr - string for hashing
60  @param nSeed - seed for hashing
61  @param nLen - string length
62  @retval - 32bit unsigned value
63 **/
64 template<class value_t>
65 constexpr size_t murmur_32_hash(const value_t* pStr, size_t nSeed, size_t nLen) noexcept
66 {
67  size_t nHash = nSeed;
68 
69  if (nLen > 3)
70  {
71  size_t i = nLen >> 2;
72  do
73  {
74  size_t k = 0;
75 
76  // constexpr version of std::memcpy(&k, pszStr, sizeof(u32));
77  static_assert(sizeof(value_t) == 1 || sizeof(value_t) == 2 || sizeof(value_t) == 4);
78 
79  if constexpr (sizeof(value_t) == sizeof(u32))
80  {
81  k += *(pStr + 0);
82  }
83  else if constexpr (sizeof(value_t) == sizeof(u16))
84  {
85  k += *(pStr + 0);
86  k <<= sizeof(u16);
87  k += *(pStr + 1);
88  }
89  else
90  {
91  k += *(pStr + 0);
92  k <<= sizeof(u8);
93  k += *(pStr + 1);
94  k <<= sizeof(u8);
95  k += *(pStr + 2);
96  k <<= sizeof(u8);
97  k += *(pStr + 3);
98  }
99 
100  pStr += sizeof(u32);
101 
102  k *= 0xcc9e2d51; //-V101
103  k = (k << 15) | (k >> 17);
104  k *= 0x1b873593;
105 
106  nHash ^= k;
107  nHash = (nHash << 13) | (nHash >> 19);
108  nHash = nHash * 5 + 0xe6546b64; //-V104
109  } while (--i);
110  }
111 
112  if (nLen & 3)
113  {
114  size_t i = nLen & 3;
115  size_t k = 0;
116  do
117  {
118  k <<= 8;
119  k |= pStr[i - 1];
120  } while (--i);
121  k *= 0xcc9e2d51; //-V101
122  k = (k << 15) | (k >> 17);
123  k *= 0x1b873593;
124  nHash ^= k;
125  }
126 
127  nHash ^= nLen; //-V103
128  nHash ^= nHash >> 16;
129  nHash *= 0x85ebca6b; //-V101
130  nHash ^= nHash >> 13;
131  nHash *= 0xc2b2ae35; //-V101
132  nHash ^= nHash >> 16;
133 
134  return nHash;
135 }
136 
137 /**
138  @brief Compares string 1 with string 2
139  @tparam fwd_it_1_t - string 1 iterator type
140  @tparam fwd_it_2_t - string 2 iterator type
141  @param itBegin1 - string 1 begin iterator
142  @param itEnd1 - string 1 end iterator
143  @param itBegin2 - string 2 begin iterator
144  @param itEnd2 - string 2 end iterator
145  @retval - < 0 the first character that does not match has a lower value in str 1 than in str 2
146  0 the contents of both strings are equal
147  > 0 the first character that does not match has a greater value in str 1 than in str 2
148 **/
149 template<class fwd_it_1_t, class fwd_it_2_t>
150 constexpr int iter_strcmp(fwd_it_1_t itBegin1, fwd_it_1_t itEnd1, fwd_it_2_t itBegin2, fwd_it_2_t itEnd2) noexcept
151 {
152  auto it1 = itBegin1;
153  auto it2 = itBegin2;
154 
155  while (it1 != itEnd1 && it2 != itEnd2 && *it1 == *it2)
156  {
157  ++it1;
158  ++it2;
159  }
160 
161  if (it1 == itEnd1 && it2 == itEnd2)
162  return 0;
163 
164  if (it1 == itEnd1)
165  return -static_cast<int>(*it2);
166 
167  if (it2 == itEnd2)
168  return static_cast<int>(*it1);
169 
170  return static_cast<int>(*it1) - static_cast<int>(*it2);
171 }
172 
173 /**
174  @brief Constexpr compare two strings
175  @tparam value_t - char type
176  @param pszLeft - first string (zero terminated)
177  @param pszRight - second string (zero terminated)
178  @retval - < 0 the first character that does not match has a lower value in ptr1 than in ptr2
179  0 the contents of both strings are equal
180  > 0 the first character that does not match has a greater value in ptr1 than in ptr2
181 **/
182 template<class value_t>
183 constexpr int strcmp(const value_t* pszLeft, const value_t* pszRight)
184 {
185  while (*pszLeft && (*pszLeft == *pszRight))
186  {
187  ++pszLeft;
188  ++pszRight;
189  }
190  return *pszLeft - *pszRight;
191 }
192 
193 namespace details
194 {
195 
196 template<class value_t>
197 constexpr const value_t* choose_str_prefix(const char*, const wchar_t*) noexcept;
198 
199 template<>
200 constexpr const char* choose_str_prefix<char>(const char* c, const wchar_t*) noexcept
201 {
202  return c;
203 }
204 
205 template<>
206 constexpr const wchar_t* choose_str_prefix<wchar_t>(const char*, const wchar_t* w) noexcept
207 {
208  return w;
209 }
210 
211 template<class value_t>
212 constexpr value_t choose_char_prefix(char, wchar_t) noexcept;
213 
214 template<>
215 constexpr char choose_char_prefix<char>(char c, wchar_t) noexcept
216 {
217  return c;
218 }
219 
220 template<>
221 constexpr wchar_t choose_char_prefix<wchar_t>(char, wchar_t w) noexcept
222 {
223  return w;
224 }
225 
226 } // namespace details
227 
228 } // namespace qx
229 
230 //==============================================================================
231 
232 #define _QX_TO_WCHAR(x) L##x
233 
234 #if QX_MSVC
235  #define _QX_TO_WSTRING(x) __LPREFIX(x)
236 #else
237  #define _QX_TO_WSTRING(x) _QX_TO_WCHAR(x)
238 #endif
239 
240 /**
241  @def QX_TO_WSTRING
242  @brief Convert char* to wchar_t*
243  @param str - string to apply
244 **/
245 #define QX_TO_WSTRING(str) _QX_TO_WSTRING(str)
246 
247 /**
248  @def QX_STR_PREFIX
249  @brief Chose witch of prefixes add to string : L or none
250  @param value_t - char type
251  @param str - string to apply
252 **/
253 #define QX_STR_PREFIX(value_t, str) qx::details::choose_str_prefix<value_t>(str, QX_TO_WSTRING(str))
254 
255 /**
256  @def QX_CHAR_PREFIX
257  @brief Chose witch of prefixes add to char : L or none
258  @param value_t - char type
259  @param ch - string to apply
260 **/
261 #define QX_CHAR_PREFIX(value_t, ch) qx::details::choose_char_prefix<value_t>(ch, _QX_TO_WCHAR(ch))
262 
263 //==============================================================================
264 
265 /*
266  Static string comparisons
267 */
268 #define QX_STATIC_ASSERT_STR_EQ(a, b) static_assert(qx::strcmp((a), (b)) == 0)
269 #define QX_STATIC_ASSERT_STR_NE(a, b) static_assert(qx::strcmp((a), (b)) != 0)
270 #define QX_STATIC_ASSERT_STR_LT(a, b) static_assert(qx::strcmp((a), (b)) < 0)
271 #define QX_STATIC_ASSERT_STR_LE(a, b) static_assert(qx::strcmp((a), (b)) <= 0)
272 #define QX_STATIC_ASSERT_STR_GT(a, b) static_assert(qx::strcmp((a), (b)) > 0)
273 #define QX_STATIC_ASSERT_STR_GE(a, b) static_assert(qx::strcmp((a), (b)) >= 0)
274 
275 //==============================================================================
276 
277 namespace qx
278 {
279 
280 /**
281  @brief Get format specifier for type
282  @tparam value_t - char type
283  @tparam T - target type
284  @retval - format specifier or nullptr
285 **/
286 template<class value_t, class T>
287 constexpr auto get_format_specifier() noexcept
288 {
289  const value_t* pszFormat = nullptr;
290 
291  using test_type = std::remove_cvref_t<T>;
292 
293  if constexpr (std::is_same_v<test_type, char>)
294  {
295  pszFormat = QX_STR_PREFIX(value_t, "%hhd");
296  }
297  else if constexpr (std::is_same_v<test_type, unsigned char>)
298  {
299  pszFormat = QX_STR_PREFIX(value_t, "%hhu");
300  }
301  else if constexpr (std::is_same_v<test_type, short>)
302  {
303  pszFormat = QX_STR_PREFIX(value_t, "%hd");
304  }
305  else if constexpr (std::is_same_v<test_type, unsigned short>)
306  {
307  pszFormat = QX_STR_PREFIX(value_t, "%hu");
308  }
309  else if constexpr (std::is_same_v<test_type, int>)
310  {
311  pszFormat = QX_STR_PREFIX(value_t, "%d");
312  }
313  else if constexpr (std::is_same_v<test_type, unsigned int>)
314  {
315  pszFormat = QX_STR_PREFIX(value_t, "%u");
316  }
317  else if constexpr (std::is_same_v<test_type, long>)
318  {
319  pszFormat = QX_STR_PREFIX(value_t, "%ld");
320  }
321  else if constexpr (std::is_same_v<test_type, unsigned long>)
322  {
323  pszFormat = QX_STR_PREFIX(value_t, "%lu");
324  }
325  else if constexpr (std::is_same_v<test_type, long long>)
326  {
327  pszFormat = QX_STR_PREFIX(value_t, "%lld");
328  }
329  else if constexpr (std::is_same_v<test_type, unsigned long long>)
330  {
331  pszFormat = QX_STR_PREFIX(value_t, "%llu");
332  }
333  else if constexpr (std::is_same_v<test_type, float>)
334  {
335  pszFormat = QX_STR_PREFIX(value_t, "%f");
336  }
337  else if constexpr (std::is_same_v<test_type, double>)
338  {
339  pszFormat = QX_STR_PREFIX(value_t, "%lf");
340  }
341  else if constexpr (std::is_same_v<test_type, long double>)
342  {
343  pszFormat = QX_STR_PREFIX(value_t, "%Lf");
344  }
345  else if constexpr (std::is_same_v<std::remove_cv_t<std::remove_pointer_t<test_type>>, char>)
346  {
347  pszFormat = QX_STR_PREFIX(value_t, "%s");
348  }
349  else if constexpr (std::is_same_v<std::remove_cv_t<std::remove_pointer_t<test_type>>, wchar_t>)
350  {
351  pszFormat = QX_STR_PREFIX(value_t, "%ls");
352  }
353  else if constexpr (std::is_pointer_v<test_type>)
354  {
355 #if QX_MSVC
356  pszFormat = QX_STR_PREFIX(value_t, "0x%p");
357 #else
358  pszFormat = QX_STR_PREFIX(value_t, "%p");
359 #endif
360  }
361 
362  return pszFormat;
363 }
364 
365 /**
366  @brief Naive but constexpr string length algorithm,
367  for runtime prefer std::strlen as there are may be a lot of optimizations
368  @tparam value_t - char type
369  @param psz - pointer to string zero terminated
370  @retval - string length
371 **/
372 template<class value_t>
373 constexpr std::size_t strlen(const value_t* psz)
374 {
375  if (!psz)
376  return 0;
377 
378  std::size_t nLen = 0;
379  while (*psz != QX_CHAR_PREFIX(value_t, '\0'))
380  {
381  ++psz;
382  nLen++;
383  }
384 
385  return nLen;
386 }
387 
388 } // namespace qx
constexpr size_t djb2a_hash(const value_t *pszStr, size_t nSeed, size_t nLen)
djb2a hash
Definition: string_utils.h:26
#define QX_CHAR_PREFIX(value_t, ch)
Chose witch of prefixes add to char : L or none.
Definition: string_utils.h:261
constexpr int iter_strcmp(fwd_it_1_t itBegin1, fwd_it_1_t itEnd1, fwd_it_2_t itBegin2, fwd_it_2_t itEnd2) noexcept
Compares string 1 with string 2.
Definition: string_utils.h:150
constexpr auto get_format_specifier() noexcept
Get format specifier for type.
Definition: string_utils.h:287
constexpr size_t murmur_32_hash(const value_t *pStr, size_t nSeed, size_t nLen) noexcept
Murmur nHash.
Definition: string_utils.h:65
constexpr int strcmp(const value_t *pszLeft, const value_t *pszRight)
Constexpr compare two strings.
Definition: string_utils.h:183
constexpr std::size_t strlen(const value_t *psz)
Naive but constexpr string length algorithm, for runtime prefer std::strlen as there are may be a lot...
Definition: string_utils.h:373
#define QX_STR_PREFIX(value_t, str)
Chose witch of prefixes add to string : L or none.
Definition: string_utils.h:253
std::uint8_t u8
0 .. 65 535
Definition: typedefs.h:19
std::uint32_t u32
0 .. 18 446 744 073 709 551 615
Definition: typedefs.h:23
std::uint16_t u16
0 .. 4 294 967 295
Definition: typedefs.h:21