You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1073 lines
37 KiB

  1. /* /////////////////////////////////////////////////////////////////////////
  2. * File: stlsoft/string/string_tokeniser.hpp
  3. *
  4. * Purpose: String token parsing class.
  5. *
  6. * Created: 6th January 2001
  7. * Updated: 31st July 2010
  8. *
  9. * Home: http://stlsoft.org/
  10. *
  11. * Copyright (c) 2001-2010, Matthew Wilson and Synesis Software
  12. * All rights reserved.
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright notice, this
  18. * list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright notice,
  20. * this list of conditions and the following disclaimer in the documentation
  21. * and/or other materials provided with the distribution.
  22. * - Neither the name(s) of Matthew Wilson and Synesis Software nor the names of
  23. * any contributors may be used to endorse or promote products derived from
  24. * this software without specific prior written permission.
  25. *
  26. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36. * POSSIBILITY OF SUCH DAMAGE.
  37. *
  38. * ////////////////////////////////////////////////////////////////////// */
  39. /** \file stlsoft/string/string_tokeniser.hpp
  40. *
  41. * \brief [C++ only] Definition of the stlsoft::string_tokeniser class
  42. * template
  43. * (\ref group__library__string "String" Library).
  44. */
  45. #ifndef STLSOFT_INCL_STLSOFT_STRING_HPP_STRING_TOKENISER
  46. #define STLSOFT_INCL_STLSOFT_STRING_HPP_STRING_TOKENISER
  47. #ifndef STLSOFT_DOCUMENTATION_SKIP_SECTION
  48. # define STLSOFT_VER_STLSOFT_STRING_HPP_STRING_TOKENISER_MAJOR 5
  49. # define STLSOFT_VER_STLSOFT_STRING_HPP_STRING_TOKENISER_MINOR 1
  50. # define STLSOFT_VER_STLSOFT_STRING_HPP_STRING_TOKENISER_REVISION 8
  51. # define STLSOFT_VER_STLSOFT_STRING_HPP_STRING_TOKENISER_EDIT 222
  52. #endif /* !STLSOFT_DOCUMENTATION_SKIP_SECTION */
  53. /* /////////////////////////////////////////////////////////////////////////
  54. * Compatibility
  55. */
  56. /*
  57. [Incompatibilies-start]
  58. STLSOFT_COMPILER_IS_DMC: __DMC__<0x0839
  59. STLSOFT_COMPILER_IS_MSVC: _MSC_VER<1200
  60. STLSOFT_COMPILER_IS_WATCOM:
  61. [Incompatibilies-end]
  62. */
  63. /* /////////////////////////////////////////////////////////////////////////
  64. * Includes
  65. */
  66. #ifndef STLSOFT_INCL_STLSOFT_H_STLSOFT
  67. # include <stlsoft/stlsoft.h>
  68. #endif /* !STLSOFT_INCL_STLSOFT_H_STLSOFT */
  69. #if defined(STLSOFT_COMPILER_IS_DMC) && \
  70. __DMC__ < 0x0839
  71. # error stlsoft/string/string_tokeniser.hpp is not compatible with Digital Mars C/C++ 3.38 or earlier
  72. #endif /* compiler */
  73. #if defined(STLSOFT_COMPILER_IS_MSVC) && \
  74. _MSC_VER < 1100
  75. # error stlsoft/string/string_tokeniser.hpp is not compatible with Visual C++ 5.0 or earlier
  76. #endif /* compiler */
  77. #ifndef STLSOFT_INCL_STLSOFT_UTIL_STD_HPP_ITERATOR_HELPER
  78. # include <stlsoft/util/std/iterator_helper.hpp>
  79. #endif /* !STLSOFT_INCL_STLSOFT_UTIL_STD_HPP_ITERATOR_HELPER */
  80. #ifndef STLSOFT_INCL_STLSOFT_SHIMS_ACCESS_HPP_STRING
  81. # include <stlsoft/shims/access/string.hpp>
  82. #endif /* !STLSOFT_INCL_STLSOFT_SHIMS_ACCESS_HPP_STRING */
  83. #ifndef STLSOFT_INCL_STLSOFT_COLLECTIONS_UTIL_HPP_COLLECTIONS
  84. # include <stlsoft/collections/util/collections.hpp>
  85. #endif /* !STLSOFT_INCL_STLSOFT_COLLECTIONS_UTIL_HPP_COLLECTIONS */
  86. #ifndef STLSOFT_INCL_ITERATOR
  87. # define STLSOFT_INCL_ITERATOR
  88. # include <iterator> // for std::distance()
  89. #endif /* !STLSOFT_INCL_ITERATOR */
  90. #ifdef STLSOFT_UNITTEST
  91. # ifndef STLSOFT_INCL_STLSOFT_STRING_HPP_SIMPLE_STRING
  92. # include <stlsoft/string/simple_string.hpp>
  93. # endif /* !STLSOFT_INCL_STLSOFT_STRING_HPP_SIMPLE_STRING */
  94. # ifndef STLSOFT_INCL_STLSOFT_STRING_HPP_STRING_VIEW
  95. # include <stlsoft/string/string_view.hpp>
  96. # endif /* STLSOFT_INCL_STLSOFT_STRING_HPP_STRING_VIEW */
  97. # include <string>
  98. #endif /* STLSOFT_UNITTEST */
  99. /* /////////////////////////////////////////////////////////////////////////
  100. * Compatibility
  101. */
  102. #if defined(STLSOFT_STRING_TOKENISER_USE_DELIMITER_INDIRECTION) || \
  103. ( ( defined(STLSOFT_COMPILER_IS_MSVC) && \
  104. _MSC_VER < 1300) || \
  105. ( defined(STLSOFT_COMPILER_IS_INTEL) && \
  106. defined(WIN32) && \
  107. _MSC_VER < 1300))
  108. # define STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION
  109. #endif /* compiler */
  110. /* /////////////////////////////////////////////////////////////////////////
  111. * Namespace
  112. */
  113. #ifndef _STLSOFT_NO_NAMESPACE
  114. namespace stlsoft
  115. {
  116. #endif /* _STLSOFT_NO_NAMESPACE */
  117. /* /////////////////////////////////////////////////////////////////////////
  118. * Classes
  119. */
  120. // string_tokeniser_ignore_blanks
  121. /** \brief A tokenising policy for specifying whether blanks will be included or ignored (now deprecated; replaced by skip_blank_tokens)
  122. *
  123. * \ingroup group__library__string
  124. *
  125. * This policy determines whether a tokenisation will ignore blanks, or will present them as
  126. * (empty) entries in the sequence to the caller.
  127. *
  128. * \param B A boolean as to whether blanks should be ignored (\c true) or preserved (\c false)
  129. *
  130. * \deprecated Use skip_blank_tokens instead
  131. */
  132. template <ss_bool_t B>
  133. struct string_tokeniser_ignore_blanks
  134. {
  135. enum { value = B };
  136. };
  137. /** \brief A tokenising policy for specifying whether blanks will be included or ignored
  138. *
  139. * \ingroup group__library__string
  140. *
  141. * This policy determines whether a tokenisation will ignore blanks, or will present them as
  142. * (empty) entries in the sequence to the caller.
  143. *
  144. * \param B A boolean as to whether blanks should be ignored (\c true) or preserved (\c false)
  145. *
  146. * \note This supercedes string_tokeniser_ignore_blanks
  147. */
  148. template <ss_bool_t B>
  149. struct skip_blank_tokens
  150. {
  151. enum { value = B };
  152. };
  153. // string_tokeniser_type_traits
  154. /** \brief A traits class for detecting features of the string type and the value type used to specialise string_tokeniser
  155. *
  156. * \ingroup group__library__string
  157. *
  158. * This traits class has three responsibilities. First, it defines a number of
  159. * member types that are used by the string_tokeniser and
  160. * string_tokeniser::iterator classes: value_type, const_iterator_type.
  161. *
  162. * Second, it provides a means by which the iterators of the string_tokeniser
  163. * specialisation's string type can be elicited, by defining the (static)
  164. * methods begin() and end().
  165. *
  166. * Third, it provides a means by which an instance of the string_tokeniser
  167. * specialisation's value type can be constructed from a pair of iterator of
  168. * the string type, by defining the (static) create() method.
  169. *
  170. * \param S The string tokeniser string type
  171. * \param V The string tokeniser value type
  172. */
  173. template< ss_typename_param_k S
  174. , ss_typename_param_k V
  175. >
  176. struct string_tokeniser_type_traits
  177. {
  178. /// \name Member Types
  179. /// @{
  180. private:
  181. /// The string type
  182. typedef S string_type;
  183. /// The tokeniser value type
  184. typedef V tokeniser_value_type;
  185. public:
  186. /// The value type
  187. typedef ss_typename_type_k S::value_type value_type;
  188. /// The non-mutable (const) iterator type
  189. typedef ss_typename_type_k S::const_iterator const_iterator_type;
  190. /// @}
  191. /// \name Operations
  192. /// @{
  193. public:
  194. /// Returns the start of the contained sequence of the given string
  195. static const_iterator_type begin(string_type const& s)
  196. {
  197. return s.begin();
  198. }
  199. /// Returns the end of the contained sequence of the given string
  200. static const_iterator_type end(string_type const& s)
  201. {
  202. return s.end();
  203. }
  204. /// Creates an instance of the string from the given range [f:t)
  205. static tokeniser_value_type create(const_iterator_type f, const_iterator_type t)
  206. {
  207. /* There's a bug in the Metrowerks 3.0 standard library string
  208. * implementation, such that constructing from a range leaves
  209. * a corrupted sequence due to an optimistic allocation
  210. * requirement calculation result being stored as the length
  211. */
  212. #if defined(STLSOFT_COMPILER_IS_MWERKS) || \
  213. ( ( defined(STLSOFT_COMPILER_IS_INTEL) || \
  214. defined(STLSOFT_COMPILER_IS_MSVC) && \
  215. _MSC_VER == 1300))
  216. /// The size type
  217. typedef ss_typename_type_k S::size_type size_type;
  218. return tokeniser_value_type(&*f, static_cast<size_type>(t - f));
  219. #else /* ? compiler */
  220. return tokeniser_value_type(f, t);
  221. #endif /* compiler */
  222. }
  223. /// @}
  224. };
  225. // string_tokeniser_comparator
  226. /** \brief A generic comparator, used to specialise string_tokeniser, that covers most string and delimiter types
  227. *
  228. * \ingroup group__library__string
  229. *
  230. * This the default tokeniser comparator, providing functionality for both
  231. * single-character and fixed string delimiters.
  232. *
  233. * \param D The delimiter type
  234. * \param S The string type
  235. * \param T The traits type
  236. */
  237. template< ss_typename_param_k D
  238. , ss_typename_param_k S
  239. , ss_typename_param_k T
  240. >
  241. struct string_tokeniser_comparator
  242. {
  243. /// \name Member Types
  244. /// @{
  245. public:
  246. /// The delimiter type
  247. typedef D delimiter_type;
  248. /// The string type
  249. typedef S string_type;
  250. /// The traits type
  251. typedef T traits_type;
  252. /// The non-mutating (const) iterator type
  253. typedef ss_typename_type_k traits_type::const_iterator_type const_iterator;
  254. private:
  255. typedef string_tokeniser_comparator<D, S, T> class_type;
  256. /// @}
  257. /// \name Implementation
  258. /// @{
  259. private:
  260. #if defined(STLSOFT_CF_MEMBER_TEMPLATE_FUNCTION_SUPPORT) && \
  261. ( !defined(STLSOFT_COMPILER_IS_MSVC) || \
  262. _MSC_VER >= 1200)
  263. /// Evaluates whether the contents of the two sequences are equivalent to the given extent
  264. template< ss_typename_param_k I1
  265. , ss_typename_param_k I2
  266. >
  267. static ss_bool_t is_equal_(I1 p1, I2 p2, ss_size_t n)
  268. {
  269. for(; n-- > 0; ++p1, ++p2)
  270. {
  271. if(*p1 != *p2)
  272. {
  273. return false;
  274. }
  275. }
  276. return true;
  277. }
  278. /// Evaluates whether the delimiter and the sequence are equivalent to the extent of the delimiter
  279. template< ss_typename_param_k D1
  280. , ss_typename_param_k I
  281. >
  282. static ss_bool_t is_equal_(D1 const& delim, I &p2)
  283. {
  284. return class_type::is_equal_(delim.begin(), p2, delim.length());
  285. }
  286. /// Returns the length of the delimiter
  287. template <ss_typename_param_k D1>
  288. static ss_size_t get_length_(D1 const& delim)
  289. {
  290. return delim.length();
  291. }
  292. #else /* ? STLSOFT_CF_MEMBER_TEMPLATE_FUNCTION_SUPPORT */
  293. /// Evaluates whether the contents of the two sequences are equivalent to the given extent
  294. static ss_bool_t is_equal_(string_type const& lhs, ss_typename_type_k string_type::value_type const* rhs)
  295. {
  296. { for(ss_size_t i = 0, n = lhs.length(); i < n; ++i)
  297. {
  298. if(lhs[i] != rhs[i])
  299. {
  300. return false;
  301. }
  302. }}
  303. return true;
  304. }
  305. /// Returns the length of the delimiter
  306. static ss_size_t get_length_(string_type const& s)
  307. {
  308. return s.length();
  309. }
  310. #endif // STLSOFT_CF_MEMBER_TEMPLATE_FUNCTION_SUPPORT
  311. /// Evaluates whether the delimiter and the sequence are equivalent to the extent of the delimiter
  312. static ss_bool_t is_equal_(ss_char_a_t const delim, const_iterator &it)
  313. {
  314. return delim == *it;
  315. }
  316. /// Evaluates whether the delimiter and the sequence are equivalent to the extent of the delimiter
  317. static ss_bool_t is_equal_(ss_char_w_t const delim, const_iterator &it)
  318. {
  319. return delim == *it;
  320. }
  321. /// Returns the length of the delimiter
  322. static ss_size_t get_length_(ss_char_a_t const /* delim */)
  323. {
  324. return 1;
  325. }
  326. /// Returns the length of the delimiter
  327. static ss_size_t get_length_(ss_char_w_t const /* delim */)
  328. {
  329. return 1;
  330. }
  331. #ifndef STLSOFT_DOCUMENTATION_SKIP_SECTION
  332. static const_iterator advance_(const_iterator it, delimiter_type const& delim)
  333. {
  334. return it + get_length_(delim);
  335. }
  336. #endif /* !STLSOFT_DOCUMENTATION_SKIP_SECTION */
  337. /// @}
  338. /// \name Operations
  339. /// @{
  340. public:
  341. /// Evaluates whether the delimiter and the sequence are not equivalent to the extent of the delimiter
  342. static ss_bool_t not_equal(delimiter_type const& delim, const_iterator &it)
  343. {
  344. return !is_equal_(delim, it);
  345. }
  346. /// Returns the length of the delimiter
  347. static ss_size_t length(delimiter_type const& delim)
  348. {
  349. return get_length_(delim);
  350. }
  351. #ifndef STLSOFT_DOCUMENTATION_SKIP_SECTION
  352. static ss_bool_t test_start_token_advance(const_iterator &it, const_iterator end, delimiter_type const& delim)
  353. {
  354. return is_equal_(delim, it) ? (it = advance_(it, delim), true) : false;
  355. }
  356. static ss_bool_t test_end_token_advance(const_iterator &it, const_iterator end, delimiter_type const& delim)
  357. {
  358. return is_equal_(delim, it) ? (it = advance_(it, delim), true) : false;
  359. }
  360. static const_iterator nonskip_move_to_start(const_iterator it, const_iterator end, delimiter_type const& delim)
  361. {
  362. return it;
  363. }
  364. static ss_bool_t test_end_token(const_iterator it, const_iterator end, delimiter_type const& delim)
  365. {
  366. return is_equal_(delim, it);
  367. }
  368. static const_iterator find_next_start(const_iterator it, const_iterator end, delimiter_type const& delim)
  369. {
  370. return advance_(it, delim);
  371. }
  372. #endif /* !STLSOFT_DOCUMENTATION_SKIP_SECTION */
  373. /// @}
  374. };
  375. /** \brief A class template that provides string tokenising behaviour
  376. *
  377. * \ingroup group__library__string
  378. *
  379. * This class takes a string, and a delimiter, and fashions a sequence from
  380. * the given string, with each element determined with respect to the
  381. * delimiter
  382. *
  383. * \param S The string type
  384. * \param D The delimiter type (can be a string type or a character type)
  385. * \param B The ignore-blanks type. Defaults to skip_blank_tokens&lt;true&gt;
  386. * \param V The value type (the string type that will be used for the values). Defaults to \c S
  387. * \param T The string type traits type. Defaults to string_tokeniser_type_traits&lt;S, V&gt;
  388. * \param P The tokeniser comparator type. Defaults to string_tokeniser_comparator&lt;D, S, T&gt;
  389. *
  390. * This class template provides tokenising services of a string (of type \c S)
  391. * with a delimiter (of type \c D). The four other template parameters, which are
  392. * defaulted, are used for tailoring the tokenising behaviour for special uses.
  393. *
  394. * The two typical supported tokenising scenarios are:
  395. *
  396. * - tokenising a string with a character (e.g. '\\n')
  397. * - tokenising a string with a string (e.g. "\\r\\n")
  398. *
  399. * More exotic scenarios are supported by customising the comparator and type-traits
  400. * parameters. (See stlsoft::charset_tokeniser.)
  401. *
  402. * <b>1. Tokenising a string with a character.</b>
  403. *
  404. * This uses a specialisation whereby the first template parameter is a string type,
  405. * and the second parameter is a corresponding character type.
  406. *
  407. * The following code shows a specialisation using std::string and char, and
  408. * will output: <b>abc,def,ghi,jkl,</b>
  409. *
  410. \code
  411. stlsoft::string_tokeniser<std::string, char> tokens(":abc::def:ghi:jkl::::::::::", ':');
  412. std::copy(tokens.begin(), tokens.end(), std::ostream_iterator<std::string>(std::cout, ","));
  413. \endcode
  414. *
  415. * The following code shows a specialisation using
  416. * stlsoft::basic_simple_string&lt;wchar_t&gt; and wchar_t, and
  417. * will output: <b>abc-def-ghi-jkl-</b>
  418. *
  419. \code
  420. typedef stlsoft::basic_simple_string<wchar_t> string_t;
  421. string_t s(L"|abc||def|ghi|jkl||||||||||");
  422. stlsoft::string_tokeniser<string_t, wchar_t> tokens(s, L'|');
  423. std::copy(tokens.begin(), tokens.end(), std::ostream_iterator<string_t, wchar_t>(std::wcout, L"-"));
  424. \endcode
  425. *
  426. * Optionally, you can stipulate that the blanks be retained by specifying the third
  427. * template parameter as skip_blank_tokens<false>, as in the following, which will
  428. * output: <b>,abc,,def,ghi,jkl,,,,,,,,,,</b>
  429. *
  430. \code
  431. stlsoft::string_tokeniser< std::string
  432. , char
  433. , stlsoft::skip_blank_tokens<false>
  434. > tokens(":abc::def:ghi:jkl::::::::::", ':');
  435. std::copy(tokens.begin(), tokens.end(), std::ostream_iterator<std::string>(std::cout, ","));
  436. \endcode
  437. *
  438. * \note The tokeniser uses \ref group__concept__shim__string_access "String Access Shims" to elicit the
  439. * string from the given type, so any type that for which shims are defined can be passed to the
  440. * constructor, as in the following, which will output: <b>abc;def;ghi;jkl;</b>
  441. *
  442. \code
  443. #include <stlsoft/string/string_tokeniser.hpp>
  444. #include <winstl/shims/access/string.hpp>
  445. #include <iostream>
  446. #include <iterator>
  447. int main()
  448. {
  449. HWND hwndButton = ::CreateWindowEx(0, "BUTTON", "+abc++def+ghi+jkl++++++++++", 0, 0, 0, 0, 0, NULL, (HMENU)0, NULL, NULL);
  450. stlsoft::string_tokeniser< std::string
  451. , char
  452. , stlsoft::skip_blank_tokens<true>
  453. > tokens(hwndButton, '+');
  454. std::copy(tokens.begin(), tokens.end(), std::ostream_iterator<std::string>(std::cout, ";"));
  455. return 0;
  456. }
  457. \endcode
  458. *
  459. * <b>2. Tokenising a string with a string.</b>
  460. *
  461. * This uses a specialisation whereby the first template parameter is a string type,
  462. * and the second parameter is a corresponding string type.
  463. *
  464. * The following code shows a specialisation using std::string and std::string, and
  465. * will output: <b>abc,def,ghi,jkl,</b>
  466. *
  467. \code
  468. stlsoft::string_tokeniser<std::string, std::string> tokens("\r\nabc\r\n\r\ndef\r\nghi\r\njkl\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n", "\r\n");
  469. std::copy(tokens.begin(), tokens.end(), std::ostream_iterator<std::string>(std::cout, ","));
  470. \endcode
  471. */
  472. template< ss_typename_param_k S
  473. , ss_typename_param_k D
  474. #ifdef STLSOFT_CF_TEMPLATE_CLASS_DEFAULT_CLASS_ARGUMENT_SUPPORT
  475. , ss_typename_param_k B = skip_blank_tokens<true>
  476. , ss_typename_param_k V = S
  477. , ss_typename_param_k T = string_tokeniser_type_traits<S, V>
  478. , ss_typename_param_k P = string_tokeniser_comparator<D, S, T>
  479. #else /* ? STLSOFT_CF_TEMPLATE_CLASS_DEFAULT_CLASS_ARGUMENT_SUPPORT */
  480. , ss_typename_param_k B
  481. , ss_typename_param_k V
  482. , ss_typename_param_k T
  483. , ss_typename_param_k P
  484. #endif /* STLSOFT_CF_TEMPLATE_CLASS_DEFAULT_CLASS_ARGUMENT_SUPPORT */
  485. >
  486. class string_tokeniser
  487. : public stl_collection_tag
  488. {
  489. /// \name Member Types
  490. /// @{
  491. public:
  492. /// The current parameterisation of the type
  493. typedef string_tokeniser<S, D, B, V, T, P> class_type;
  494. /// This tokeniser parameterisation
  495. typedef string_tokeniser<S, D, B, V, T, P> tokeniser_type;
  496. /// The sequence string type
  497. typedef S string_type;
  498. /// The delimiter type
  499. typedef D delimiter_type;
  500. /// The blanks policy type
  501. typedef B blanks_policy_type;
  502. #ifndef STLSOFT_DOCUMENTATION_SKIP_SECTION
  503. typedef B ignore_blanks_type;
  504. #endif /* !STLSOFT_DOCUMENTATION_SKIP_SECTION */
  505. /// The value type
  506. typedef V value_type;
  507. /// The traits type
  508. typedef T traits_type;
  509. /// The tokeniser comparator type
  510. typedef P comparator_type;
  511. /// The character type
  512. typedef ss_typename_type_k traits_type::value_type char_type;
  513. /// The size type
  514. ///
  515. /// \note This no longer relies on a size_type member type of the traits type (T). It is defined
  516. /// as size_t
  517. typedef ss_size_t size_type;
  518. /// The difference type
  519. ///
  520. /// \note This no longer relies on a difference_type member type of the traits type (T). It is defined
  521. /// as ptrdiff_t
  522. typedef ss_ptrdiff_t difference_type;
  523. /// The non-mutating (const) reference type
  524. typedef const value_type const_reference;
  525. /// The non-mutating (const) iterator type
  526. class const_iterator;
  527. /// @}
  528. /// \name Construction
  529. /// @{
  530. public:
  531. /// Tokenise the given C-string with the given delimiter
  532. ///
  533. /// \param psz Pointer to C-string whose contents will be tokenised
  534. /// \param delim The delimiter to perform the tokenisation
  535. ///
  536. /// \note The tokeniser class takes a copy of \c psz. It does not alter the contents of \c psz
  537. string_tokeniser(char_type const* psz, delimiter_type const& delim)
  538. : m_str(psz)
  539. , m_delimiter(delim)
  540. {
  541. STLSOFT_MESSAGE_ASSERT("Delimiter of zero-length", 0 != comparator_type::length(m_delimiter));
  542. STLSOFT_ASSERT(is_valid());
  543. }
  544. // Define the string_type overload if there member template ctors are not supported, or
  545. // they are correctly discriminated
  546. #if !defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT) || \
  547. defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_OVERLOAD_DISCRIMINATED)
  548. /// Tokenise the given string with the given delimiter
  549. ///
  550. /// \param str The string whose contents will be tokenised
  551. /// \param delim The delimiter to perform the tokenisation
  552. ///
  553. /// \note The tokeniser class takes a copy of \c str. It does not alter the contents of \c str
  554. string_tokeniser(string_type const& str, delimiter_type const& delim)
  555. : m_str(str)
  556. , m_delimiter(delim)
  557. {
  558. STLSOFT_MESSAGE_ASSERT("Delimiter of zero-length", 0 != comparator_type::length(m_delimiter));
  559. STLSOFT_ASSERT(is_valid());
  560. }
  561. #endif /* !STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT || STLSOFT_CF_MEMBER_TEMPLATE_CTOR_OVERLOAD_DISCRIMINATED */
  562. // Define the template overload if member template ctors are supported
  563. #if defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT)
  564. /// Tokenise the given string with the given delimiter
  565. ///
  566. /// \param str The string whose contents will be tokenised
  567. /// \param delim The delimiter to perform the tokenisation
  568. ///
  569. /// \note The tokeniser class takes a copy of \c str. It does not alter the contents of \c str
  570. template <ss_typename_param_k S1>
  571. string_tokeniser(S1 const& str, delimiter_type const& delim)
  572. : m_str(c_str_data(str), c_str_len(str))
  573. , m_delimiter(delim)
  574. {
  575. STLSOFT_MESSAGE_ASSERT("Delimiter of zero-length", 0 != comparator_type::length(m_delimiter));
  576. STLSOFT_ASSERT(is_valid());
  577. }
  578. #endif /* STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT */
  579. /// Tokenise the specified length of the given string with the given delimiter
  580. ///
  581. /// \param psz Pointer to C-string whose contents will be tokenised
  582. /// \param cch The number of characters in \c psz to use
  583. /// \param delim The delimiter to perform the tokenisation
  584. ///
  585. /// \note The tokeniser class takes a copy of \c psz. It does not alter the contents of \c psz
  586. string_tokeniser(char_type const* psz, size_type cch, delimiter_type const& delim)
  587. : m_str(psz, cch)
  588. , m_delimiter(delim)
  589. {
  590. STLSOFT_MESSAGE_ASSERT("Delimiter of zero-length", 0 != comparator_type::length(m_delimiter));
  591. STLSOFT_ASSERT(is_valid());
  592. }
  593. #if !defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT) || \
  594. defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_OVERLOAD_DISCRIMINATED)
  595. /// \brief Tokenise the given range with the given delimiter
  596. ///
  597. /// \param from The start of the asymmetric range to tokenise
  598. /// \param to The start of the asymmetric range to tokenise
  599. /// \param delim The delimiter to use
  600. string_tokeniser(char_type const* from, char_type const* to, delimiter_type const& delim)
  601. : m_str(from, to)
  602. , m_delimiter(delim)
  603. {
  604. STLSOFT_MESSAGE_ASSERT("Delimiter of zero-length", 0 != comparator_type::length(m_delimiter));
  605. STLSOFT_ASSERT(is_valid());
  606. }
  607. #endif /* !STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT || STLSOFT_CF_MEMBER_TEMPLATE_CTOR_OVERLOAD_DISCRIMINATED */
  608. #if defined(STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT)
  609. /// Tokenise the given range with the given delimiter
  610. ///
  611. /// \param from The start of the asymmetric range to tokenise
  612. /// \param to The start of the asymmetric range to tokenise
  613. /// \param delim The delimiter to use
  614. template <ss_typename_param_k I>
  615. string_tokeniser(I from, I to, delimiter_type const& delim)
  616. : m_str(from, to)
  617. , m_delimiter(delim)
  618. {
  619. STLSOFT_MESSAGE_ASSERT("Delimiter of zero-length", 0 != comparator_type::length(m_delimiter));
  620. STLSOFT_ASSERT(is_valid());
  621. }
  622. #endif /* STLSOFT_CF_MEMBER_TEMPLATE_CTOR_SUPPORT */
  623. /// @}
  624. /// \name Iteration
  625. /// @{
  626. public:
  627. /// Iterator for string_tokeniser, supporting the Forward Iterator concept
  628. class const_iterator
  629. : public iterator_base< stlsoft_ns_qual_std(forward_iterator_tag)
  630. , value_type
  631. , ss_ptrdiff_t
  632. , void
  633. , value_type
  634. >
  635. {
  636. /// \name Member Types
  637. /// @{
  638. public:
  639. /// The type
  640. typedef const_iterator class_type;
  641. #if ( defined(STLSOFT_COMPILER_IS_DMC) && \
  642. __DMC__ <= 0x0843) || \
  643. defined(STLSOFT_COMPILER_IS_MSVC)
  644. /// The delimiter type
  645. typedef delimiter_type delimiter_type;
  646. /// The value type
  647. typedef value_type value_type;
  648. /// The traits type
  649. typedef traits_type traits_type;
  650. #else /* ? compiler */
  651. /// The delimiter type
  652. typedef ss_typename_type_k tokeniser_type::delimiter_type delimiter_type;
  653. /// The value type
  654. typedef ss_typename_type_k tokeniser_type::value_type value_type;
  655. /// The traits type
  656. typedef ss_typename_type_k tokeniser_type::traits_type traits_type;
  657. #endif /* compiler */
  658. typedef value_type effective_const_reference;
  659. private:
  660. typedef ss_typename_type_k traits_type::const_iterator_type underlying_iterator_type;
  661. # if defined(STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION)
  662. typedef delimiter_type const* delimiter_ref_type;
  663. # else /* ? STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION */
  664. typedef delimiter_type delimiter_ref_type;
  665. # endif /* STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION */
  666. /// @}
  667. /// \name Construction
  668. /// @{
  669. private:
  670. friend class string_tokeniser<S, D, B, V, T, P>;
  671. /// Conversion constructor
  672. const_iterator(underlying_iterator_type first, underlying_iterator_type last, delimiter_type const& delimiter)
  673. : m_find0(first)
  674. , m_find1(first)
  675. , m_next(first)
  676. , m_end(last)
  677. # if defined(STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION)
  678. , m_delimiter(&delimiter)
  679. # else /* ? STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION */
  680. , m_delimiter(delimiter)
  681. # endif /* STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION */
  682. , m_cchDelimiter(comparator_type::length(delimiter))
  683. {
  684. if(m_end != m_find0)
  685. {
  686. increment_();
  687. }
  688. }
  689. public:
  690. /// Default constructor
  691. const_iterator()
  692. : m_find0(NULL)
  693. , m_find1(NULL)
  694. , m_next(NULL)
  695. , m_end(NULL)
  696. , m_delimiter(delimiter_ref_type())
  697. , m_cchDelimiter(0)
  698. {}
  699. /// Copy constructor
  700. ///
  701. /// \param rhs The iterator whose current search position will be copied
  702. const_iterator(class_type const& rhs)
  703. : m_find0(rhs.m_find0)
  704. , m_find1(rhs.m_find1)
  705. , m_next(rhs.m_next)
  706. , m_end(rhs.m_end)
  707. , m_delimiter(rhs.m_delimiter)
  708. , m_cchDelimiter(comparator_type::length(get_delim_ref_(rhs.m_delimiter)))
  709. {}
  710. /// Copy-assignment operator
  711. ///
  712. /// \param rhs The iterator whose current search position will be copied
  713. class_type const& operator =(class_type const& rhs)
  714. {
  715. m_find0 = rhs.m_find0;
  716. m_find1 = rhs.m_find1;
  717. m_next = rhs.m_next;
  718. m_end = rhs.m_end;
  719. m_delimiter = rhs.m_delimiter;
  720. m_cchDelimiter = rhs.m_cchDelimiter;
  721. return *this;
  722. }
  723. /// @}
  724. /// \name Forward Iterator Methods
  725. /// @{
  726. public:
  727. /// Dereference operator
  728. //
  729. // This has to be V, rather than value_type, because Visual C++ thinks that S is the value_type!!
  730. const V operator *() const
  731. {
  732. return traits_type::create(m_find0, m_find1);
  733. }
  734. /// Pre-increment operator
  735. class_type& operator ++()
  736. {
  737. increment_();
  738. return *this;
  739. }
  740. /// Post-increment operator
  741. const class_type operator ++(int)
  742. {
  743. class_type ret(*this);
  744. operator ++();
  745. return ret;
  746. }
  747. /// Evaluates whether \c this and \c rhs are equivalent
  748. ss_bool_t equal(class_type const& rhs) const
  749. {
  750. STLSOFT_MESSAGE_ASSERT("Comparing iterators from different tokenisers", m_end == rhs.m_end);
  751. return m_find0 == rhs.m_find0;
  752. }
  753. /// Evaluates whether \c this and \c rhs are equivalent
  754. ss_bool_t operator == (class_type const& rhs) const
  755. {
  756. return equal(rhs);
  757. }
  758. /// Evaluates whether \c this and \c rhs are not equivalent
  759. ss_bool_t operator != (class_type const& rhs) const
  760. {
  761. return !equal(rhs);
  762. }
  763. /// @}
  764. /// \name Implementation
  765. /// @{
  766. private:
  767. static delimiter_type const& get_delim_ref_(delimiter_ref_type const& delim)
  768. {
  769. # if defined(STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION)
  770. return *delim;
  771. # else /* ? STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION */
  772. return delim;
  773. # endif /* STLSOFT_STRING_TOKENISER_CF_REQUIRE_DELIMITER_INDIRECTION */
  774. }
  775. void increment_()
  776. {
  777. STLSOFT_MESSAGE_ASSERT("Attempting to increment an invalid iterator", m_find0 != m_end);
  778. // This is a two-phase algorithm:
  779. //
  780. // 1. If skipping blanks, then do that. Otherwise, locate the the start-of-item
  781. // iterator (m_find0) to the previously identified start of the next item (m_next)
  782. // 2. Starting from m_find0, determine the end-of-item (m_find1)
  783. // TODO: Make this into a overload-selector, to avoid the "conditional expression is constant" warning
  784. if(blanks_policy_type::value)
  785. {
  786. // 1. Skip blanks until at start of next item
  787. for(m_find0 = m_next; m_find0 != m_end; )
  788. {
  789. if(comparator_type::not_equal(get_delim_ref_(m_delimiter), m_find0))
  790. {
  791. break;
  792. }
  793. else
  794. {
  795. m_find0 += static_cast<ss_ptrdiff_t>(m_cchDelimiter);
  796. }
  797. }
  798. }
  799. else
  800. {
  801. m_find0 = m_next;
  802. }
  803. // 2. Determine the end-of-item (m_find1), starting from m_find0
  804. for(m_find1 = m_find0; ; )
  805. {
  806. if(m_find1 == m_end)
  807. {
  808. // End of sequence. Item will be [m_find0, m_end (== m_find1))
  809. m_next = m_find1;
  810. break;
  811. }
  812. else if(comparator_type::not_equal(get_delim_ref_(m_delimiter), m_find1))
  813. {
  814. // current item does not hold a delimiter, so advance one position
  815. ++m_find1;
  816. }
  817. else
  818. {
  819. // Determine the start of the next potential element, ready
  820. // for the next call of increment_()
  821. m_next = m_find1 + static_cast<ss_ptrdiff_t>(m_cchDelimiter);
  822. break;
  823. }
  824. }
  825. }
  826. /// @}
  827. /// \name Members
  828. /// @{
  829. private:
  830. underlying_iterator_type m_find0; // the start of the current item
  831. underlying_iterator_type m_find1; // the end of the current item
  832. underlying_iterator_type m_next; // the start of the next valid (non-null) item
  833. underlying_iterator_type m_end; // end point of controlled sequence
  834. delimiter_ref_type m_delimiter; // The delimiter
  835. ss_size_t m_cchDelimiter;
  836. /// @}
  837. };
  838. /// Begins the iteration
  839. ///
  840. /// \return An iterator representing the start of the sequence
  841. const_iterator begin() const
  842. {
  843. STLSOFT_ASSERT(is_valid());
  844. return const_iterator(traits_type::begin(m_str), traits_type::end(m_str), m_delimiter);
  845. }
  846. /// Ends the iteration
  847. ///
  848. /// \return An iterator representing the end of the sequence
  849. const_iterator end() const
  850. {
  851. STLSOFT_ASSERT(is_valid());
  852. return const_iterator(traits_type::end(m_str), traits_type::end(m_str), m_delimiter);
  853. }
  854. /// @}
  855. /// \name Attributes
  856. /// @{
  857. public:
  858. /// Indicates whether the search sequence is empty
  859. ss_bool_t empty() const
  860. {
  861. STLSOFT_ASSERT(is_valid());
  862. return begin() == end();
  863. }
  864. /// @}
  865. /// \name Invariant
  866. /// @{
  867. private:
  868. ss_bool_t is_valid() const
  869. {
  870. return true;
  871. }
  872. /// @}
  873. /// \name Members
  874. /// @{
  875. private:
  876. string_type const m_str;
  877. delimiter_type const m_delimiter;
  878. /// @}
  879. /// \name Not to be implemented
  880. /// @{
  881. private:
  882. class_type const& operator =(class_type const&);
  883. /// @}
  884. };
  885. /* /////////////////////////////////////////////////////////////////////////
  886. * Operators
  887. */
  888. #if 0
  889. /** \brief Evaluates whether \c this and \c rhs are equivalent
  890. *
  891. * \ingroup group__library__string
  892. */
  893. template< ss_typename_param_k S
  894. , ss_typename_param_k D
  895. , ss_typename_param_k B
  896. , ss_typename_param_k V
  897. , ss_typename_param_k T
  898. , ss_typename_param_k P
  899. >
  900. inline ss_bool_t operator ==( ss_typename_type_k string_tokeniser<S, D, B, V, T, P>::const_iterator const& lhs
  901. , ss_typename_type_k string_tokeniser<S, D, B, V, T, P>::const_iterator const& rhs)
  902. {
  903. return lhs.equal(rhs);
  904. }
  905. /** \brief Evaluates whether \c this and \c rhs are not equivalent
  906. *
  907. * \ingroup group__library__string
  908. */
  909. template< ss_typename_param_k S
  910. , ss_typename_param_k D
  911. , ss_typename_param_k B
  912. , ss_typename_param_k V
  913. , ss_typename_param_k T
  914. , ss_typename_param_k P
  915. >
  916. inline ss_bool_t operator !=( ss_typename_type_k string_tokeniser<S, D, B, V, T, P>::const_iterator const& lhs
  917. , ss_typename_type_k string_tokeniser<S, D, B, V, T, P>::const_iterator const& rhs)
  918. {
  919. return !lhs.equal(rhs);
  920. }
  921. #endif /* 0 */
  922. /* /////////////////////////////////////////////////////////////////////////
  923. * Unit-testing
  924. */
  925. #ifdef STLSOFT_UNITTEST
  926. # include "./unittest/string_tokeniser_unittest_.h"
  927. #endif /* STLSOFT_UNITTEST */
  928. /* ////////////////////////////////////////////////////////////////////// */
  929. #if defined(STLSOFT_COMPILER_IS_DMC) && \
  930. !defined(_STLPORT_VERSION)
  931. template< ss_typename_param_k S
  932. , ss_typename_param_k D
  933. , ss_typename_param_k B
  934. , ss_typename_param_k V
  935. , ss_typename_param_k T
  936. , ss_typename_param_k P
  937. >
  938. inline forward_iterator_tag iterator_category(string_tokeniser<S, D, B, V, T, P>::const_iterator const&)
  939. {
  940. return forward_iterator_tag();
  941. }
  942. template< ss_typename_param_k S
  943. , ss_typename_param_k D
  944. , ss_typename_param_k B
  945. , ss_typename_param_k V
  946. , ss_typename_param_k T
  947. , ss_typename_param_k P
  948. >
  949. inline ss_ptrdiff_t* distance_type(string_tokeniser<S, D, B, V, T, P>::const_iterator const&)
  950. {
  951. return static_cast<ss_ptrdiff_t*>(0);
  952. }
  953. #endif /* compiler */
  954. /* ////////////////////////////////////////////////////////////////////// */
  955. #ifndef _STLSOFT_NO_NAMESPACE
  956. } // namespace stlsoft
  957. #endif /* _STLSOFT_NO_NAMESPACE */
  958. /* ////////////////////////////////////////////////////////////////////// */
  959. #endif /* !STLSOFT_INCL_STLSOFT_STRING_HPP_STRING_TOKENISER */
  960. /* ///////////////////////////// end of file //////////////////////////// */