You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

353 lines
14 KiB

  1. /* /////////////////////////////////////////////////////////////////////////
  2. * File: stlsoft/string/charset_tokeniser.hpp
  3. *
  4. * Purpose: String token parsing class using char-sets.
  5. *
  6. * Created: 17th October 2005
  7. * Updated: 10th August 2009
  8. *
  9. * Home: http://stlsoft.org/
  10. *
  11. * Copyright (c) 2005-2009, Matthew Wilson and Synesis Software
  12. * All rights reserved.
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright notice, this
  18. * list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright notice,
  20. * this list of conditions and the following disclaimer in the documentation
  21. * and/or other materials provided with the distribution.
  22. * - Neither the name(s) of Matthew Wilson and Synesis Software nor the names of
  23. * any contributors may be used to endorse or promote products derived from
  24. * this software without specific prior written permission.
  25. *
  26. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36. * POSSIBILITY OF SUCH DAMAGE.
  37. *
  38. * ////////////////////////////////////////////////////////////////////// */
  39. /** \file stlsoft/string/charset_tokeniser.hpp
  40. *
  41. * \brief [C++ only] Definition of the stlsoft::charset_tokeniser class
  42. * template
  43. * (\ref group__library__string "String" Library).
  44. */
  45. #ifndef STLSOFT_INCL_STLSOFT_STRING_HPP_CHARSET_TOKENISER
  46. #define STLSOFT_INCL_STLSOFT_STRING_HPP_CHARSET_TOKENISER
  47. #ifndef STLSOFT_DOCUMENTATION_SKIP_SECTION
  48. # define STLSOFT_VER_STLSOFT_STRING_HPP_CHARSET_TOKENISER_MAJOR 2
  49. # define STLSOFT_VER_STLSOFT_STRING_HPP_CHARSET_TOKENISER_MINOR 0
  50. # define STLSOFT_VER_STLSOFT_STRING_HPP_CHARSET_TOKENISER_REVISION 4
  51. # define STLSOFT_VER_STLSOFT_STRING_HPP_CHARSET_TOKENISER_EDIT 25
  52. #endif /* !STLSOFT_DOCUMENTATION_SKIP_SECTION */
  53. /* /////////////////////////////////////////////////////////////////////////
  54. * Compatibility
  55. */
  56. /*
  57. [Incompatibilies-start]
  58. STLSOFT_COMPILER_IS_DMC: __DMC__<0x0839
  59. STLSOFT_COMPILER_IS_MSVC: _MSC_VER<1200
  60. STLSOFT_COMPILER_IS_WATCOM:
  61. [Incompatibilies-end]
  62. */
  63. /* /////////////////////////////////////////////////////////////////////////
  64. * Includes
  65. */
  66. #ifndef STLSOFT_INCL_STLSOFT_H_STLSOFT
  67. # include <stlsoft/stlsoft.h>
  68. #endif /* !STLSOFT_INCL_STLSOFT_H_STLSOFT */
  69. #if defined(STLSOFT_COMPILER_IS_MSVC) && \
  70. _MSC_VER < 1200
  71. # error stlsoft/string/charset_tokeniser.hpp is not compatible with Visual C++ 5.0 or earlier
  72. #endif /* compiler */
  73. #ifndef STLSOFT_INCL_STLSOFT_STRING_HPP_STRING_TOKENISER
  74. # include <stlsoft/string/string_tokeniser.hpp>
  75. #endif /* !STLSOFT_INCL_STLSOFT_STRING_HPP_STRING_TOKENISER */
  76. #ifndef STLSOFT_INCL_ALGORITHM
  77. # define STLSOFT_INCL_ALGORITHM
  78. # include <algorithm>
  79. #endif /* !STLSOFT_INCL_ALGORITHM */
  80. #ifdef STLSOFT_UNITTEST
  81. # ifndef STLSOFT_INCL_STLSOFT_STRING_HPP_SIMPLE_STRING
  82. # include <stlsoft/string/simple_string.hpp>
  83. # endif /* !STLSOFT_INCL_STLSOFT_STRING_HPP_SIMPLE_STRING */
  84. # ifndef STLSOFT_INCL_STLSOFT_STRING_HPP_STRING_VIEW
  85. # include <stlsoft/string/string_view.hpp>
  86. # endif /* STLSOFT_INCL_STLSOFT_STRING_HPP_STRING_VIEW */
  87. # include <algorithm>
  88. # include <string>
  89. #endif /* STLSOFT_UNITTEST */
  90. /* /////////////////////////////////////////////////////////////////////////
  91. * Namespace
  92. */
  93. #ifndef _STLSOFT_NO_NAMESPACE
  94. namespace stlsoft
  95. {
  96. #endif /* _STLSOFT_NO_NAMESPACE */
  97. /* /////////////////////////////////////////////////////////////////////////
  98. * Classes
  99. */
  100. /** \brief Comparator for the stlsoft::charset_tokeniser class template.
  101. *
  102. * \ingroup group__library__string
  103. */
  104. template <ss_typename_param_k S>
  105. struct charset_comparator
  106. {
  107. public:
  108. typedef S delimiter_type;
  109. private:
  110. template <ss_typename_param_k const_iterator>
  111. static bool equal_(delimiter_type const& delimiter, const_iterator &it)
  112. {
  113. return delimiter.end() != stlsoft_ns_qual_std(find)(delimiter.begin(), delimiter.end(), *it);
  114. }
  115. template <ss_typename_param_k const_iterator>
  116. static const_iterator advance_(const_iterator it, delimiter_type const& delimiter)
  117. {
  118. return it + 1;
  119. }
  120. public:
  121. ///
  122. ///
  123. /// \note This is only for compatibility with earlier versions of the string tokeniser
  124. template <ss_typename_param_k const_iterator>
  125. static bool not_equal(delimiter_type const& delimiter, const_iterator &it)
  126. {
  127. return !equal_(delimiter, it);
  128. }
  129. static ss_size_t length(delimiter_type const& /* delimiter */)
  130. {
  131. return 1;
  132. }
  133. template <ss_typename_param_k const_iterator>
  134. static bool test_start_token_advance(const_iterator &it, const_iterator end, delimiter_type const& delimiter)
  135. {
  136. return equal_(delimiter, it) ? (it = advance_(it, delimiter), true) : false;
  137. }
  138. template <ss_typename_param_k const_iterator>
  139. static bool test_end_token_advance(const_iterator &it, const_iterator end, delimiter_type const& delimiter)
  140. {
  141. return equal_(delimiter, it) ? (it = advance_(it, delimiter), true) : false;
  142. }
  143. template <ss_typename_param_k const_iterator>
  144. static const_iterator nonskip_move_to_start(const_iterator it, const_iterator end, delimiter_type const& delimiter)
  145. {
  146. return it;
  147. }
  148. template <ss_typename_param_k const_iterator>
  149. static bool test_end_token(const_iterator it, const_iterator end, delimiter_type const& delimiter)
  150. {
  151. return equal_(delimiter, it);
  152. }
  153. template <ss_typename_param_k const_iterator>
  154. static const_iterator find_next_start(const_iterator it, const_iterator end, delimiter_type const& delimiter)
  155. {
  156. return advance_(it, delimiter);
  157. }
  158. };
  159. /** \brief A class template that provides string tokenising behaviour, where the delimiter is a character set, a la <code>strtok()</code>
  160. *
  161. * \ingroup group__library__string
  162. *
  163. * This class takes a string, and a character-set delimiter, and fashions
  164. * a sequence from the given string, with each element determined with
  165. * respect to the delimiter. It is derived from stlsoft::string_tokeniser,
  166. * and effectively defines a specialisation of it, in order to make it
  167. * simpler to specialise. All that's usually required is to specialise
  168. * the string type and, optionally, the blanks policy.
  169. *
  170. * \param S The string type
  171. * \param B The blank skipping policy type. Defaults to skip_blank_tokens&lt;true&gt;
  172. * \param V The value type (the string type that will be used for the values). Defaults to \c S
  173. * \param T The string type traits type. Defaults to string_tokeniser_type_traits&lt;S, V&gt;
  174. * \param D The delimiter type (can be a string type or a character type). Defaults to \c S
  175. * \param P The tokeniser comparator type. Defaults to string_tokeniser_comparator&lt;D, S, T&gt;
  176. *
  177. */
  178. template< ss_typename_param_k S
  179. #ifdef STLSOFT_CF_TEMPLATE_CLASS_DEFAULT_CLASS_ARGUMENT_SUPPORT
  180. , ss_typename_param_k B = skip_blank_tokens<true>
  181. , ss_typename_param_k V = S
  182. , ss_typename_param_k T = string_tokeniser_type_traits<S, V>
  183. , ss_typename_param_k D = S
  184. , ss_typename_param_k P = charset_comparator<S>
  185. #else /* ? STLSOFT_CF_TEMPLATE_CLASS_DEFAULT_CLASS_ARGUMENT_SUPPORT */
  186. , ss_typename_param_k B
  187. , ss_typename_param_k V
  188. , ss_typename_param_k T
  189. , ss_typename_param_k D
  190. , ss_typename_param_k P
  191. #endif /* STLSOFT_CF_TEMPLATE_CLASS_DEFAULT_CLASS_ARGUMENT_SUPPORT */
  192. >
  193. class charset_tokeniser
  194. : public string_tokeniser<S, D, B, V, T, P>
  195. {
  196. /// \name Member Types
  197. /// @{
  198. private:
  199. typedef string_tokeniser<S, D, B, V, T, P> parent_class_type;
  200. public:
  201. /// The current parameterisation of the type
  202. typedef charset_tokeniser<S, B, V, T, D, P> class_type;
  203. /// The sequence string type
  204. typedef ss_typename_type_k parent_class_type::string_type string_type;
  205. /// The delimiter type
  206. typedef ss_typename_type_k parent_class_type::delimiter_type delimiter_type;
  207. /// The blanks policy type
  208. typedef ss_typename_type_k parent_class_type::blanks_policy_type blanks_policy_type;
  209. /// The value type
  210. typedef ss_typename_type_k parent_class_type::value_type value_type;
  211. /// The traits type
  212. typedef ss_typename_type_k parent_class_type::traits_type traits_type;
  213. /// The tokeniser comparator type
  214. typedef ss_typename_type_k parent_class_type::comparator_type comparator_type;
  215. /// The character type
  216. typedef ss_typename_type_k parent_class_type::char_type char_type;
  217. /// The size type
  218. typedef ss_typename_type_k parent_class_type::size_type size_type;
  219. #if 0
  220. /// The difference type
  221. typedef ss_typename_type_k parent_class_type::difference_type difference_type;
  222. #endif /* 0 */
  223. /// The non-mutating (const) reference type
  224. typedef ss_typename_type_k parent_class_type::const_reference const_reference;
  225. /// The non-mutating (const) iterator type
  226. typedef ss_typename_type_k parent_class_type::const_iterator const_iterator;
  227. /// @}
  228. public:
  229. /// \name Construction
  230. /// @{
  231. public:
  232. /// Tokenise the given C-string with the given delimiter
  233. ///
  234. /// \param psz Pointer to C-string whose contents will be tokenised
  235. /// \param charSet The delimiter to perform the tokenisation
  236. ///
  237. /// \note The tokeniser class takes a copy of \c psz. It does not alter the contents of \c psz
  238. charset_tokeniser(char_type const* psz, delimiter_type const& charSet)
  239. : parent_class_type(psz, charSet)
  240. {}
  241. // Define the string_type overload if there member template ctors are not supported, or
  242. // they are correctly discriminated
  243. #if !defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT) || \
  244. defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_OVERLOAD_DISCRIMINATED)
  245. /// Tokenise the given string with the given delimiter
  246. ///
  247. /// \param str The string whose contents will be tokenised
  248. /// \param charSet The delimiter to perform the tokenisation
  249. ///
  250. /// \note The tokeniser class takes a copy of \c str. It does not alter the contents of \c str
  251. charset_tokeniser(string_type const& str, delimiter_type const& charSet)
  252. : parent_class_type(str, charSet)
  253. {}
  254. #endif /* !STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT || STLSOFT_CF_MEMBER_TEMPLATE_CTOR_OVERLOAD_DISCRIMINATED */
  255. // Define the template overload if member template ctors are supported
  256. #if defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT)
  257. /// Tokenise the given string with the given delimiter
  258. ///
  259. /// \param str The string whose contents will be tokenised
  260. /// \param charSet The delimiter to perform the tokenisation
  261. ///
  262. /// \note The tokeniser class takes a copy of \c str. It does not alter the contents of \c str
  263. template <ss_typename_param_k S1>
  264. charset_tokeniser(S1 const& str, delimiter_type const& charSet)
  265. : parent_class_type(str, charSet)
  266. {}
  267. #endif /* STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT */
  268. /// Tokenise the specified length of the given string with the given delimiter
  269. ///
  270. /// \param psz Pointer to C-string whose contents will be tokenised
  271. /// \param cch The number of characters in \c psz to use
  272. /// \param charSet The delimiter to perform the tokenisation
  273. ///
  274. /// \note The tokeniser class takes a copy of \c psz. It does not alter the contents of \c psz
  275. charset_tokeniser(char_type const* psz, size_type cch, delimiter_type const& charSet)
  276. : parent_class_type(psz, cch, charSet)
  277. {}
  278. #if !defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT) || \
  279. defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_OVERLOAD_DISCRIMINATED)
  280. /// \brief Tokenise the given range with the given delimiter
  281. ///
  282. /// \param from The start of the asymmetric range to tokenise
  283. /// \param to The start of the asymmetric range to tokenise
  284. /// \param charSet The delimiter to use
  285. charset_tokeniser(char_type const* from, char_type const* to, delimiter_type const& charSet)
  286. : parent_class_type(from, to, charSet)
  287. {}
  288. #endif /* !STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT || STLSOFT_CF_MEMBER_TEMPLATE_CTOR_OVERLOAD_DISCRIMINATED */
  289. #if defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT)
  290. /// Tokenise the given range with the given delimiter
  291. ///
  292. /// \param from The start of the asymmetric range to tokenise
  293. /// \param to The start of the asymmetric range to tokenise
  294. /// \param charSet The delimiter to use
  295. template <ss_typename_param_k I>
  296. charset_tokeniser(I from, I to, delimiter_type const& charSet)
  297. : parent_class_type(from, to, charSet)
  298. {}
  299. #endif /* STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT */
  300. /// @}
  301. };
  302. /* /////////////////////////////////////////////////////////////////////////
  303. * Unit-testing
  304. */
  305. #ifdef STLSOFT_UNITTEST
  306. # include "./unittest/charset_tokeniser_unittest_.h"
  307. #endif /* STLSOFT_UNITTEST */
  308. /* ////////////////////////////////////////////////////////////////////// */
  309. #ifndef _STLSOFT_NO_NAMESPACE
  310. } // namespace stlsoft
  311. #endif /* _STLSOFT_NO_NAMESPACE */
  312. /* ////////////////////////////////////////////////////////////////////// */
  313. #endif /* !STLSOFT_INCL_STLSOFT_STRING_HPP_CHARSET_TOKENISER */
  314. /* ///////////////////////////// end of file //////////////////////////// */