qxLib
string_utils.h
Go to the documentation of this file.
1 /**
2 
3  @file string_utils.h
4  @author Khrapov
5  @date 17.10.2020
6  @copyright © Nick Khrapov, 2021. All right reserved.
7 
8 **/
9 #pragma once
10 
11 #include <qx/macros/config.h>
12 #include <qx/typedefs.h>
13 
14 namespace qx
15 {
16 
17 /**
18  @brief djb2a hash
19  @tparam value_t - char type
20  @param pszStr - string for hashing
21  @param nSeed - seed for hashing
22  @param nLen - string length
23  @retval - 32bit unsigned value
24 **/
25 template<class value_t>
26 constexpr size_t djb2a_hash(const value_t* pszStr, size_t nSeed, size_t nLen)
27 {
28  size_t nHash = nSeed;
29 
30  for (size_t i = 0; i < nLen; ++i)
31  nHash = nHash * 33 ^ pszStr[i];
32 
33  return nHash;
34 }
35 
36 /**
37  @brief djb2a hash
38  @tparam value_t - char type
39  @param pszStr - string for hashing (zero terminated)
40  @param nSeed - seed for hashing
41  @retval - 32bit unsigned value
42 **/
43 template<class value_t>
44 constexpr size_t djb2a_hash(const value_t* pszStr, size_t nSeed)
45 {
46  size_t nHash = nSeed;
47 
48  for (size_t i = 0; pszStr[i]; ++i)
49  nHash = nHash * 33 ^ pszStr[i];
50 
51  return nHash;
52 }
53 
54 /**
55  @brief Murmur nHash
56  @details https://en.wikipedia.org/wiki/MurmurHash
57  https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed
58  @tparam value_t - char type
59  @param pStr - string for hashing
60  @param nSeed - seed for hashing
61  @param nLen - string length
62  @retval - 32bit unsigned value
63 **/
64 template<class value_t>
65 constexpr size_t murmur_32_hash(const value_t* pStr, size_t nSeed, size_t nLen) noexcept
66 {
67  size_t nHash = nSeed;
68 
69  if (nLen > 3)
70  {
71  size_t i = nLen >> 2;
72  do
73  {
74  size_t k = 0;
75 
76  // constexpr version of std::memcpy(&k, pszStr, sizeof(u32));
77  static_assert(sizeof(value_t) == 1 || sizeof(value_t) == 2 || sizeof(value_t) == 4);
78 
79  if constexpr (sizeof(value_t) == sizeof(u32))
80  {
81  k += *(pStr + 0);
82  }
83  else if constexpr (sizeof(value_t) == sizeof(u16))
84  {
85  k += *(pStr + 0);
86  k <<= sizeof(u16);
87  k += *(pStr + 1);
88  }
89  else
90  {
91  k += *(pStr + 0);
92  k <<= sizeof(u8);
93  k += *(pStr + 1);
94  k <<= sizeof(u8);
95  k += *(pStr + 2);
96  k <<= sizeof(u8);
97  k += *(pStr + 3);
98  }
99 
100  pStr += sizeof(u32);
101 
102  k *= 0xcc9e2d51; //-V101
103  k = (k << 15) | (k >> 17);
104  k *= 0x1b873593;
105 
106  nHash ^= k;
107  nHash = (nHash << 13) | (nHash >> 19);
108  nHash = nHash * 5 + 0xe6546b64; //-V104
109  } while (--i);
110  }
111 
112  if (nLen & 3)
113  {
114  size_t i = nLen & 3;
115  size_t k = 0;
116  do
117  {
118  k <<= 8;
119  k |= pStr[i - 1];
120  } while (--i);
121  k *= 0xcc9e2d51; //-V101
122  k = (k << 15) | (k >> 17);
123  k *= 0x1b873593;
124  nHash ^= k;
125  }
126 
127  nHash ^= nLen; //-V103
128  nHash ^= nHash >> 16;
129  nHash *= 0x85ebca6b; //-V101
130  nHash ^= nHash >> 13;
131  nHash *= 0xc2b2ae35; //-V101
132  nHash ^= nHash >> 16;
133 
134  return nHash;
135 }
136 
137 /**
138  @brief Compares string 1 with string 2
139  @tparam fwd_it_1_t - string 1 iterator type
140  @tparam fwd_it_2_t - string 2 iterator type
141  @param itBegin1 - string 1 begin iterator
142  @param itEnd1 - string 1 end iterator
143  @param itBegin2 - string 2 begin iterator
144  @param itEnd2 - string 2 end iterator
145  @retval - < 0 the first character that does not match has a lower value in str 1 than in str 2
146  0 the contents of both strings are equal
147  > 0 the first character that does not match has a greater value in str 1 than in str 2
148 **/
149 template<class fwd_it_1_t, class fwd_it_2_t>
150 constexpr int iter_strcmp(fwd_it_1_t itBegin1, fwd_it_1_t itEnd1, fwd_it_2_t itBegin2, fwd_it_2_t itEnd2) noexcept
151 {
152  int nRet = 0;
153 
154  if (itBegin1 != itEnd1 && itBegin2 != itEnd2)
155  {
156  auto it1 = itBegin1;
157  auto it2 = itBegin2;
158  while (it2 != itEnd2 && (*it1 == *it2))
159  {
160  ++it2;
161  ++it1;
162  }
163  nRet = *it1 - (it2 == itEnd2 ? *it1 : *it2);
164  }
165 
166  return nRet;
167 }
168 
169 /**
170  @brief Constexpr compare two strings
171  @tparam value_t - char type
172  @param pszLeft - first string (zero terminated)
173  @param pszRight - second string (zero terminated)
174  @retval - < 0 the first character that does not match has a lower value in ptr1 than in ptr2
175  0 the contents of both strings are equal
176  > 0 the first character that does not match has a greater value in ptr1 than in ptr2
177 **/
178 template<class value_t>
179 constexpr int strcmp(const value_t* pszLeft, const value_t* pszRight)
180 {
181  while (*pszLeft && (*pszLeft == *pszRight))
182  {
183  ++pszLeft;
184  ++pszRight;
185  }
186  return *pszLeft - *pszRight;
187 }
188 
189 namespace details
190 {
191 
192 template<class value_t>
193 constexpr const value_t* choose_str_prefix(const char*, const wchar_t*) noexcept;
194 
195 template<>
196 constexpr const char* choose_str_prefix<char>(const char* c, const wchar_t*) noexcept
197 {
198  return c;
199 }
200 
201 template<>
202 constexpr const wchar_t* choose_str_prefix<wchar_t>(const char*, const wchar_t* w) noexcept
203 {
204  return w;
205 }
206 
207 template<class value_t>
208 constexpr value_t choose_char_prefix(char, wchar_t) noexcept;
209 
210 template<>
211 constexpr char choose_char_prefix<char>(char c, wchar_t) noexcept
212 {
213  return c;
214 }
215 
216 template<>
217 constexpr wchar_t choose_char_prefix<wchar_t>(char, wchar_t w) noexcept
218 {
219  return w;
220 }
221 
222 } // namespace details
223 
224 } // namespace qx
225 
226 //==============================================================================
227 
228 #define _QX_TO_WCHAR(x) L##x
229 
230 #if QX_MSVC
231  #define _QX_TO_WSTRING(x) __LPREFIX(x)
232 #else
233  #define _QX_TO_WSTRING(x) _QX_TO_WCHAR(x)
234 #endif
235 
236 /**
237  @def QX_TO_WSTRING
238  @brief Convert char* to wchar_t*
239  @param str - string to apply
240 **/
241 #define QX_TO_WSTRING(str) _QX_TO_WSTRING(str)
242 
243 /**
244  @def QX_STR_PREFIX
245  @brief Chose witch of prefixes add to string : L or none
246  @param value_t - char type
247  @param str - string to apply
248 **/
249 #define QX_STR_PREFIX(value_t, str) qx::details::choose_str_prefix<value_t>(str, QX_TO_WSTRING(str))
250 
251 /**
252  @def QX_CHAR_PREFIX
253  @brief Chose witch of prefixes add to char : L or none
254  @param value_t - char type
255  @param ch - string to apply
256 **/
257 #define QX_CHAR_PREFIX(value_t, ch) qx::details::choose_char_prefix<value_t>(ch, _QX_TO_WCHAR(ch))
258 
259 //==============================================================================
260 
261 /*
262  Static string comparisons
263 */
264 #define QX_STATIC_ASSERT_STR_EQ(a, b) static_assert(qx::strcmp((a), (b)) == 0)
265 #define QX_STATIC_ASSERT_STR_NE(a, b) static_assert(qx::strcmp((a), (b)) != 0)
266 #define QX_STATIC_ASSERT_STR_LT(a, b) static_assert(qx::strcmp((a), (b)) < 0)
267 #define QX_STATIC_ASSERT_STR_LE(a, b) static_assert(qx::strcmp((a), (b)) <= 0)
268 #define QX_STATIC_ASSERT_STR_GT(a, b) static_assert(qx::strcmp((a), (b)) > 0)
269 #define QX_STATIC_ASSERT_STR_GE(a, b) static_assert(qx::strcmp((a), (b)) >= 0)
270 
271 //==============================================================================
272 
273 namespace qx
274 {
275 
276 /**
277  @brief Get format specifier for type
278  @tparam value_t - char type
279  @tparam T - target type
280  @retval - format specifier or nullptr
281 **/
282 template<class value_t, class T>
283 constexpr auto get_format_specifier() noexcept
284 {
285  const value_t* pszFormat = nullptr;
286 
287  using test_type = std::remove_cvref_t<T>;
288 
289  if constexpr (std::is_same_v<test_type, char>)
290  {
291  pszFormat = QX_STR_PREFIX(value_t, "%hhd");
292  }
293  else if constexpr (std::is_same_v<test_type, unsigned char>)
294  {
295  pszFormat = QX_STR_PREFIX(value_t, "%hhu");
296  }
297  else if constexpr (std::is_same_v<test_type, short>)
298  {
299  pszFormat = QX_STR_PREFIX(value_t, "%hd");
300  }
301  else if constexpr (std::is_same_v<test_type, unsigned short>)
302  {
303  pszFormat = QX_STR_PREFIX(value_t, "%hu");
304  }
305  else if constexpr (std::is_same_v<test_type, int>)
306  {
307  pszFormat = QX_STR_PREFIX(value_t, "%d");
308  }
309  else if constexpr (std::is_same_v<test_type, unsigned int>)
310  {
311  pszFormat = QX_STR_PREFIX(value_t, "%u");
312  }
313  else if constexpr (std::is_same_v<test_type, long>)
314  {
315  pszFormat = QX_STR_PREFIX(value_t, "%ld");
316  }
317  else if constexpr (std::is_same_v<test_type, unsigned long>)
318  {
319  pszFormat = QX_STR_PREFIX(value_t, "%lu");
320  }
321  else if constexpr (std::is_same_v<test_type, long long>)
322  {
323  pszFormat = QX_STR_PREFIX(value_t, "%lld");
324  }
325  else if constexpr (std::is_same_v<test_type, unsigned long long>)
326  {
327  pszFormat = QX_STR_PREFIX(value_t, "%llu");
328  }
329  else if constexpr (std::is_same_v<test_type, float>)
330  {
331  pszFormat = QX_STR_PREFIX(value_t, "%f");
332  }
333  else if constexpr (std::is_same_v<test_type, double>)
334  {
335  pszFormat = QX_STR_PREFIX(value_t, "%lf");
336  }
337  else if constexpr (std::is_same_v<test_type, long double>)
338  {
339  pszFormat = QX_STR_PREFIX(value_t, "%Lf");
340  }
341  else if constexpr (std::is_same_v<std::remove_cv_t<std::remove_pointer_t<test_type>>, char>)
342  {
343  pszFormat = QX_STR_PREFIX(value_t, "%s");
344  }
345  else if constexpr (std::is_same_v<std::remove_cv_t<std::remove_pointer_t<test_type>>, wchar_t>)
346  {
347  pszFormat = QX_STR_PREFIX(value_t, "%ls");
348  }
349  else if constexpr (std::is_pointer_v<test_type>)
350  {
351 #if QX_MSVC
352  pszFormat = QX_STR_PREFIX(value_t, "0x%p");
353 #else
354  pszFormat = QX_STR_PREFIX(value_t, "%p");
355 #endif
356  }
357 
358  return pszFormat;
359 }
360 
361 /**
362  @brief Naive but constexpr string length algorithm,
363  for runtime prefer std::strlen as there are may be a lot of optimizations
364  @tparam value_t - char type
365  @param psz - pointer to string zero terminated
366  @retval - string length
367 **/
368 template<class value_t>
369 constexpr std::size_t strlen(const value_t* psz)
370 {
371  if (!psz)
372  return 0;
373 
374  std::size_t nLen = 0;
375  while (*psz != QX_CHAR_PREFIX(value_t, '\0'))
376  {
377  ++psz;
378  nLen++;
379  }
380 
381  return nLen;
382 }
383 
384 } // namespace qx
constexpr size_t djb2a_hash(const value_t *pszStr, size_t nSeed, size_t nLen)
djb2a hash
Definition: string_utils.h:26
#define QX_CHAR_PREFIX(value_t, ch)
Chose witch of prefixes add to char : L or none.
Definition: string_utils.h:257
constexpr int iter_strcmp(fwd_it_1_t itBegin1, fwd_it_1_t itEnd1, fwd_it_2_t itBegin2, fwd_it_2_t itEnd2) noexcept
Compares string 1 with string 2.
Definition: string_utils.h:150
constexpr auto get_format_specifier() noexcept
Get format specifier for type.
Definition: string_utils.h:283
constexpr size_t murmur_32_hash(const value_t *pStr, size_t nSeed, size_t nLen) noexcept
Murmur nHash.
Definition: string_utils.h:65
constexpr int strcmp(const value_t *pszLeft, const value_t *pszRight)
Constexpr compare two strings.
Definition: string_utils.h:179
constexpr std::size_t strlen(const value_t *psz)
Naive but constexpr string length algorithm, for runtime prefer std::strlen as there are may be a lot...
Definition: string_utils.h:369
#define QX_STR_PREFIX(value_t, str)
Chose witch of prefixes add to string : L or none.
Definition: string_utils.h:249
uint16_t u16
0 .. 4 294 967 295
Definition: typedefs.h:22
uint32_t u32
0 .. 18 446 744 073 709 551 615
Definition: typedefs.h:24
uint8_t u8
0 .. 65 535
Definition: typedefs.h:20