qxLib
string_utils.h
Go to the documentation of this file.
1 /**
2 
3  @file string_utils.h
4  @author Khrapov
5  @date 17.10.2020
6  @copyright © Nick Khrapov, 2021. All right reserved.
7 
8 **/
9 #pragma once
10 
11 #include <qx/macros/config.h>
12 #include <qx/typedefs.h>
13 
14 namespace qx
15 {
16 
17 /**
18  @brief djb2a hash
19  @tparam value_t - char type
20  @param pszStr - string for hashing
21  @param nSeed - seed for hashing
22  @param nLen - string length
23  @retval - 32bit unsigned value
24 **/
25 template<class value_t>
26 constexpr size_t djb2a_hash(const value_t* pszStr, size_t nSeed, size_t nLen)
27 {
28  size_t nHash = nSeed;
29 
30  for (size_t i = 0; i < nLen; ++i)
31  nHash = nHash * 33 ^ pszStr[i];
32 
33  return nHash;
34 }
35 
36 /**
37  @brief djb2a hash
38  @tparam value_t - char type
39  @param pszStr - string for hashing (zero terminated)
40  @param nSeed - seed for hashing
41  @retval - 32bit unsigned value
42 **/
43 template<class value_t>
44 constexpr size_t djb2a_hash(const value_t* pszStr, size_t nSeed = 0)
45 {
46  size_t nHash = nSeed;
47 
48  for (size_t i = 0; pszStr[i]; ++i)
49  nHash = nHash * 33 ^ pszStr[i];
50 
51  return nHash;
52 }
53 
54 /**
55  @brief Murmur nHash
56  @details https://en.wikipedia.org/wiki/MurmurHash
57  https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed
58  @tparam value_t - char type
59  @param pStr - string for hashing
60  @param nSeed - seed for hashing
61  @param nLen - string length
62  @retval - 32bit unsigned value
63 **/
64 template<class value_t>
65 constexpr size_t murmur_32_hash(const value_t* pStr, size_t nSeed, size_t nLen) noexcept
66 {
67  size_t nHash = nSeed;
68 
69  if (nLen > 3)
70  {
71  size_t i = nLen >> 2;
72  do
73  {
74  size_t k = 0;
75 
76  // constexpr version of std::memcpy(&k, pszStr, sizeof(u32));
77  static_assert(sizeof(value_t) == 1 || sizeof(value_t) == 2 || sizeof(value_t) == 4);
78 
79  if constexpr (sizeof(value_t) == sizeof(u32))
80  {
81  k += *(pStr + 0);
82  }
83  else if constexpr (sizeof(value_t) == sizeof(u16))
84  {
85  k += *(pStr + 0);
86  k <<= sizeof(u16);
87  k += *(pStr + 1);
88  }
89  else
90  {
91  k += *(pStr + 0);
92  k <<= sizeof(u8);
93  k += *(pStr + 1);
94  k <<= sizeof(u8);
95  k += *(pStr + 2);
96  k <<= sizeof(u8);
97  k += *(pStr + 3);
98  }
99 
100  pStr += sizeof(u32);
101 
102  k *= 0xcc9e2d51; //-V101
103  k = (k << 15) | (k >> 17);
104  k *= 0x1b873593;
105 
106  nHash ^= k;
107  nHash = (nHash << 13) | (nHash >> 19);
108  nHash = nHash * 5 + 0xe6546b64; //-V104
109  } while (--i);
110  }
111 
112  if (nLen & 3)
113  {
114  size_t i = nLen & 3;
115  size_t k = 0;
116  do
117  {
118  k <<= 8;
119  k |= pStr[i - 1];
120  } while (--i);
121  k *= 0xcc9e2d51; //-V101
122  k = (k << 15) | (k >> 17);
123  k *= 0x1b873593;
124  nHash ^= k;
125  }
126 
127  nHash ^= nLen; //-V103
128  nHash ^= nHash >> 16;
129  nHash *= 0x85ebca6b; //-V101
130  nHash ^= nHash >> 13;
131  nHash *= 0xc2b2ae35; //-V101
132  nHash ^= nHash >> 16;
133 
134  return nHash;
135 }
136 
137 /**
138  @brief Compares string 1 with string 2
139  @tparam fwd_it_1_t - string 1 iterator type
140  @tparam fwd_it_2_t - string 2 iterator type
141  @param itBegin1 - string 1 begin iterator
142  @param itEnd1 - string 1 end iterator
143  @param itBegin2 - string 2 begin iterator
144  @param itEnd2 - string 2 end iterator
145  @retval - < 0 the first character that does not match has a lower value in str 1 than in str 2
146  0 the contents of both strings are equal
147  > 0 the first character that does not match has a greater value in str 1 than in str 2
148 **/
149 template<class fwd_it_1_t, class fwd_it_2_t>
150 constexpr int iter_strcmp(fwd_it_1_t itBegin1, fwd_it_1_t itEnd1, fwd_it_2_t itBegin2, fwd_it_2_t itEnd2) noexcept
151 {
152  auto it1 = itBegin1;
153  auto it2 = itBegin2;
154 
155  while (it1 != itEnd1 && it2 != itEnd2 && *it1 == *it2)
156  {
157  ++it1;
158  ++it2;
159  }
160 
161  if (it1 == itEnd1 && it2 == itEnd2)
162  return 0;
163 
164  if (it1 == itEnd1)
165  return -static_cast<int>(*it2);
166 
167  if (it2 == itEnd2)
168  return static_cast<int>(*it1);
169 
170  return static_cast<int>(*it1) - static_cast<int>(*it2);
171 }
172 
173 /**
174  @brief Constexpr compare two strings
175  @tparam value_t - char type
176  @param pszLeft - first string (zero terminated)
177  @param pszRight - second string (zero terminated)
178  @retval - < 0 the first character that does not match has a lower value in ptr1 than in ptr2
179  0 the contents of both strings are equal
180  > 0 the first character that does not match has a greater value in ptr1 than in ptr2
181 **/
182 template<class value_t>
183 constexpr int strcmp(const value_t* pszLeft, const value_t* pszRight)
184 {
185  while (*pszLeft && (*pszLeft == *pszRight))
186  {
187  ++pszLeft;
188  ++pszRight;
189  }
190  return *pszLeft - *pszRight;
191 }
192 
193 namespace details
194 {
195 
196 template<class value_t, size_t N>
197 constexpr const auto& choose_str_prefix(const char (&c)[N], const wchar_t (&w)[N]) noexcept
198 {
199  if constexpr (std::is_same_v<value_t, char>)
200  return c;
201  else
202  return w;
203 }
204 
205 template<class value_t>
206 constexpr value_t choose_char_prefix(char, wchar_t) noexcept;
207 
208 template<>
209 constexpr char choose_char_prefix<char>(char c, wchar_t) noexcept
210 {
211  return c;
212 }
213 
214 template<>
215 constexpr wchar_t choose_char_prefix<wchar_t>(char, wchar_t w) noexcept
216 {
217  return w;
218 }
219 
220 } // namespace details
221 
222 } // namespace qx
223 
224 //==============================================================================
225 
226 #define _QX_TO_WCHAR(x) L##x
227 
228 #if QX_MSVC
229  #define _QX_TO_WSTRING(x) __LPREFIX(x)
230 #else
231  #define _QX_TO_WSTRING(x) _QX_TO_WCHAR(x)
232 #endif
233 
234 /**
235  @def QX_TO_WSTRING
236  @brief Convert char* to wchar_t*
237  @param str - string to apply
238 **/
239 #define QX_TO_WSTRING(str) _QX_TO_WSTRING(str)
240 
241 /**
242  @def QX_STR_PREFIX
243  @brief Chose witch of prefixes add to string : L or none
244  @param value_t - char type
245  @param str - string to apply
246 **/
247 #define QX_STR_PREFIX(value_t, str) qx::details::choose_str_prefix<value_t>(str, QX_TO_WSTRING(str))
248 
249 /**
250  @def QX_CHAR_PREFIX
251  @brief Chose witch of prefixes add to char : L or none
252  @param value_t - char type
253  @param ch - string to apply
254 **/
255 #define QX_CHAR_PREFIX(value_t, ch) qx::details::choose_char_prefix<value_t>(ch, _QX_TO_WCHAR(ch))
256 
257 //==============================================================================
258 
259 /*
260  Static string comparisons
261 */
262 #define QX_STATIC_ASSERT_STR_EQ(a, b) static_assert(qx::strcmp((a), (b)) == 0)
263 #define QX_STATIC_ASSERT_STR_NE(a, b) static_assert(qx::strcmp((a), (b)) != 0)
264 #define QX_STATIC_ASSERT_STR_LT(a, b) static_assert(qx::strcmp((a), (b)) < 0)
265 #define QX_STATIC_ASSERT_STR_LE(a, b) static_assert(qx::strcmp((a), (b)) <= 0)
266 #define QX_STATIC_ASSERT_STR_GT(a, b) static_assert(qx::strcmp((a), (b)) > 0)
267 #define QX_STATIC_ASSERT_STR_GE(a, b) static_assert(qx::strcmp((a), (b)) >= 0)
268 
269 //==============================================================================
270 
271 namespace qx
272 {
273 
274 /**
275  @brief Get format specifier for type
276  @tparam value_t - char type
277  @tparam T - target type
278  @retval - format specifier or nullptr
279 **/
280 template<class value_t, class T>
281 constexpr auto get_format_specifier() noexcept
282 {
283  const value_t* pszFormat = nullptr;
284 
285  using test_type = std::remove_cvref_t<T>;
286 
287  if constexpr (std::is_same_v<test_type, char>)
288  {
289  pszFormat = QX_STR_PREFIX(value_t, "%hhd");
290  }
291  else if constexpr (std::is_same_v<test_type, unsigned char>)
292  {
293  pszFormat = QX_STR_PREFIX(value_t, "%hhu");
294  }
295  else if constexpr (std::is_same_v<test_type, short>)
296  {
297  pszFormat = QX_STR_PREFIX(value_t, "%hd");
298  }
299  else if constexpr (std::is_same_v<test_type, unsigned short>)
300  {
301  pszFormat = QX_STR_PREFIX(value_t, "%hu");
302  }
303  else if constexpr (std::is_same_v<test_type, int>)
304  {
305  pszFormat = QX_STR_PREFIX(value_t, "%d");
306  }
307  else if constexpr (std::is_same_v<test_type, unsigned int>)
308  {
309  pszFormat = QX_STR_PREFIX(value_t, "%u");
310  }
311  else if constexpr (std::is_same_v<test_type, long>)
312  {
313  pszFormat = QX_STR_PREFIX(value_t, "%ld");
314  }
315  else if constexpr (std::is_same_v<test_type, unsigned long>)
316  {
317  pszFormat = QX_STR_PREFIX(value_t, "%lu");
318  }
319  else if constexpr (std::is_same_v<test_type, long long>)
320  {
321  pszFormat = QX_STR_PREFIX(value_t, "%lld");
322  }
323  else if constexpr (std::is_same_v<test_type, unsigned long long>)
324  {
325  pszFormat = QX_STR_PREFIX(value_t, "%llu");
326  }
327  else if constexpr (std::is_same_v<test_type, float>)
328  {
329  pszFormat = QX_STR_PREFIX(value_t, "%f");
330  }
331  else if constexpr (std::is_same_v<test_type, double>)
332  {
333  pszFormat = QX_STR_PREFIX(value_t, "%lf");
334  }
335  else if constexpr (std::is_same_v<test_type, long double>)
336  {
337  pszFormat = QX_STR_PREFIX(value_t, "%Lf");
338  }
339  else if constexpr (std::is_same_v<std::remove_cv_t<std::remove_pointer_t<test_type>>, char>)
340  {
341  pszFormat = QX_STR_PREFIX(value_t, "%s");
342  }
343  else if constexpr (std::is_same_v<std::remove_cv_t<std::remove_pointer_t<test_type>>, wchar_t>)
344  {
345  pszFormat = QX_STR_PREFIX(value_t, "%ls");
346  }
347  else if constexpr (std::is_pointer_v<test_type>)
348  {
349 #if QX_MSVC
350  pszFormat = QX_STR_PREFIX(value_t, "0x%p");
351 #else
352  pszFormat = QX_STR_PREFIX(value_t, "%p");
353 #endif
354  }
355 
356  return pszFormat;
357 }
358 
359 /**
360  @brief Naive but constexpr string length algorithm,
361  for runtime prefer std::strlen as there are may be a lot of optimizations
362  @tparam value_t - char type
363  @param psz - pointer to string zero terminated
364  @retval - string length
365 **/
366 template<class value_t>
367 constexpr std::size_t strlen(const value_t* psz)
368 {
369  if (!psz)
370  return 0;
371 
372  std::size_t nLen = 0;
373  while (*psz != QX_CHAR_PREFIX(value_t, '\0'))
374  {
375  ++psz;
376  ++nLen;
377  }
378 
379  return nLen;
380 }
381 
382 } // namespace qx
constexpr size_t djb2a_hash(const value_t *pszStr, size_t nSeed, size_t nLen)
djb2a hash
Definition: string_utils.h:26
#define QX_CHAR_PREFIX(value_t, ch)
Chose witch of prefixes add to char : L or none.
Definition: string_utils.h:255
constexpr int iter_strcmp(fwd_it_1_t itBegin1, fwd_it_1_t itEnd1, fwd_it_2_t itBegin2, fwd_it_2_t itEnd2) noexcept
Compares string 1 with string 2.
Definition: string_utils.h:150
constexpr auto get_format_specifier() noexcept
Get format specifier for type.
Definition: string_utils.h:281
constexpr size_t murmur_32_hash(const value_t *pStr, size_t nSeed, size_t nLen) noexcept
Murmur nHash.
Definition: string_utils.h:65
constexpr int strcmp(const value_t *pszLeft, const value_t *pszRight)
Constexpr compare two strings.
Definition: string_utils.h:183
constexpr std::size_t strlen(const value_t *psz)
Naive but constexpr string length algorithm, for runtime prefer std::strlen as there are may be a lot...
Definition: string_utils.h:367
#define QX_STR_PREFIX(value_t, str)
Chose witch of prefixes add to string : L or none.
Definition: string_utils.h:247
std::uint8_t u8
0 .. 65 535
Definition: typedefs.h:19
std::uint32_t u32
0 .. 18 446 744 073 709 551 615
Definition: typedefs.h:23
std::uint16_t u16
0 .. 4 294 967 295
Definition: typedefs.h:21