You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

254 lines
8.9 KiB

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * $Id: XMLRangeFactory.cpp 678879 2008-07-22 20:05:05Z amassari $
  19. */
  20. // ---------------------------------------------------------------------------
  21. // Includes
  22. // ---------------------------------------------------------------------------
  23. #include <xercesc/util/regx/XMLRangeFactory.hpp>
  24. #include <xercesc/internal/CharTypeTables.hpp>
  25. #include <xercesc/util/regx/RegxDefs.hpp>
  26. #include <xercesc/util/regx/TokenFactory.hpp>
  27. #include <xercesc/util/regx/RangeToken.hpp>
  28. #include <xercesc/util/regx/RangeTokenMap.hpp>
  29. #include <xercesc/util/regx/UnicodeRangeFactory.hpp>
  30. #include <xercesc/util/Janitor.hpp>
  31. #include <string.h>
  32. XERCES_CPP_NAMESPACE_BEGIN
  33. // ---------------------------------------------------------------------------
  34. // Local static functions
  35. // ---------------------------------------------------------------------------
  36. static void setupRange(XMLInt32* const rangeMap,
  37. const XMLCh* const theTable,
  38. unsigned int startingIndex) {
  39. const XMLCh* pchCur = theTable;
  40. // Do the ranges first
  41. while (*pchCur)
  42. {
  43. rangeMap[startingIndex++] = *pchCur++;
  44. }
  45. // Skip the range terminator
  46. pchCur++;
  47. // And then the singles until we hit its terminator
  48. while (*pchCur) {
  49. const XMLCh chSingle = *pchCur++;
  50. rangeMap[startingIndex++] = chSingle;
  51. rangeMap[startingIndex++] = chSingle;
  52. }
  53. }
  54. static unsigned int getTableLen(const XMLCh* const theTable) {
  55. XMLSize_t rangeLen = XMLString::stringLen(theTable);
  56. return (unsigned int)(rangeLen + 2*XMLString::stringLen(theTable + rangeLen + 1));
  57. }
  58. // ---------------------------------------------------------------------------
  59. // XMLRangeFactory: Constructors and Destructor
  60. // ---------------------------------------------------------------------------
  61. XMLRangeFactory::XMLRangeFactory()
  62. {
  63. }
  64. XMLRangeFactory::~XMLRangeFactory() {
  65. }
  66. // ---------------------------------------------------------------------------
  67. // XMLRangeFactory: Range creation methods
  68. // ---------------------------------------------------------------------------
  69. void XMLRangeFactory::buildRanges(RangeTokenMap *rangeTokMap) {
  70. if (fRangesCreated)
  71. return;
  72. if (!fKeywordsInitialized) {
  73. initializeKeywordMap(rangeTokMap);
  74. }
  75. TokenFactory* tokFactory = rangeTokMap->getTokenFactory();
  76. // Create space ranges
  77. unsigned int wsTblLen = getTableLen(gWhitespaceChars);
  78. RangeToken* tok = tokFactory->createRange();
  79. XMLInt32* wsRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
  80. (
  81. wsTblLen * sizeof(XMLInt32)
  82. );//new XMLInt32[wsTblLen];
  83. tok->setRangeValues(wsRange, wsTblLen);
  84. setupRange(wsRange, gWhitespaceChars, 0);
  85. // Build the internal map.
  86. tok->createMap();
  87. rangeTokMap->setRangeToken(fgXMLSpace, tok);
  88. tok = RangeToken::complementRanges(tok, tokFactory);
  89. // Build the internal map.
  90. tok->createMap();
  91. rangeTokMap->setRangeToken(fgXMLSpace, tok , true);
  92. // Create digits ranges
  93. tok = tokFactory->createRange();
  94. unsigned int digitTblLen = getTableLen(gDigitChars);
  95. XMLInt32* digitRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
  96. (
  97. digitTblLen * sizeof(XMLInt32)
  98. );//new XMLInt32[digitTblLen];
  99. tok->setRangeValues(digitRange, digitTblLen);
  100. setupRange(digitRange, gDigitChars, 0);
  101. // Build the internal map.
  102. tok->createMap();
  103. rangeTokMap->setRangeToken(fgXMLDigit, tok);
  104. tok = RangeToken::complementRanges(tok, tokFactory);
  105. // Build the internal map.
  106. tok->createMap();
  107. rangeTokMap->setRangeToken(fgXMLDigit, tok , true);
  108. // Build word ranges
  109. unsigned int baseTblLen = getTableLen(gBaseChars);
  110. unsigned int ideoTblLen = getTableLen(gIdeographicChars);
  111. unsigned int wordRangeLen = baseTblLen + ideoTblLen + digitTblLen;
  112. XMLInt32* wordRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
  113. (
  114. wordRangeLen * sizeof(XMLInt32)
  115. );//new XMLInt32[wordRangeLen];
  116. ArrayJanitor<XMLInt32> janWordRange(wordRange, XMLPlatformUtils::fgMemoryManager);
  117. setupRange(wordRange, gBaseChars, 0);
  118. setupRange(wordRange, gIdeographicChars, baseTblLen);
  119. memcpy(wordRange + baseTblLen + ideoTblLen, digitRange, digitTblLen * sizeof(XMLInt32));
  120. // Create NameChar ranges
  121. tok = tokFactory->createRange();
  122. unsigned int combTblLen = getTableLen(gCombiningChars);
  123. unsigned int extTblLen = getTableLen(gExtenderChars);
  124. unsigned int nameTblLen = wordRangeLen + combTblLen + extTblLen;
  125. XMLInt32* nameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
  126. (
  127. (nameTblLen + 8) * sizeof(XMLInt32)
  128. );//new XMLInt32[nameTblLen + 8];
  129. tok->setRangeValues(nameRange, nameTblLen + 8);
  130. memcpy(nameRange, wordRange, wordRangeLen * sizeof(XMLInt32));
  131. setupRange(nameRange, gCombiningChars, wordRangeLen);
  132. setupRange(nameRange, gExtenderChars, wordRangeLen + combTblLen);
  133. nameRange[nameTblLen++] = chDash;
  134. nameRange[nameTblLen++] = chDash;
  135. nameRange[nameTblLen++] = chColon;
  136. nameRange[nameTblLen++] = chColon;
  137. nameRange[nameTblLen++] = chPeriod;
  138. nameRange[nameTblLen++] = chPeriod;
  139. nameRange[nameTblLen++] = chUnderscore;
  140. nameRange[nameTblLen++] = chUnderscore;
  141. tok->sortRanges();
  142. tok->compactRanges();
  143. // Build the internal map.
  144. tok->createMap();
  145. rangeTokMap->setRangeToken(fgXMLNameChar, tok);
  146. tok = RangeToken::complementRanges(tok, tokFactory);
  147. // Build the internal map.
  148. tok->createMap();
  149. rangeTokMap->setRangeToken(fgXMLNameChar, tok , true);
  150. // Create initialNameChar ranges
  151. tok = tokFactory->createRange();
  152. unsigned int initialNameTblLen = baseTblLen + ideoTblLen;
  153. XMLInt32* initialNameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
  154. (
  155. (initialNameTblLen + 4) * sizeof(XMLInt32)
  156. );//new XMLInt32[initialNameTblLen + 4];
  157. tok->setRangeValues(initialNameRange, initialNameTblLen + 4);
  158. memcpy(initialNameRange, wordRange, initialNameTblLen * sizeof(XMLInt32));
  159. initialNameRange[initialNameTblLen++] = chColon;
  160. initialNameRange[initialNameTblLen++] = chColon;
  161. initialNameRange[initialNameTblLen++] = chUnderscore;
  162. initialNameRange[initialNameTblLen++] = chUnderscore;
  163. tok->sortRanges();
  164. tok->compactRanges();
  165. // Build the internal map.
  166. tok->createMap();
  167. rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok);
  168. tok = RangeToken::complementRanges(tok, tokFactory);
  169. // Build the internal map.
  170. tok->createMap();
  171. rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok , true);
  172. // Create word range
  173. // \w = [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
  174. tok = tokFactory->createRange();
  175. for(int i=0; i<=0xFFFF; i++)
  176. {
  177. unsigned short chType=UnicodeRangeFactory::getUniCategory(XMLUniCharacter::getType(i));
  178. if(chType == UnicodeRangeFactory::CHAR_PUNCTUATION ||
  179. chType == UnicodeRangeFactory::CHAR_SEPARATOR ||
  180. chType == UnicodeRangeFactory::CHAR_OTHER)
  181. tok->addRange(i, i);
  182. }
  183. tok->sortRanges();
  184. tok->compactRanges();
  185. // Build the internal map.
  186. tok->createMap();
  187. rangeTokMap->setRangeToken(fgXMLWord, tok , true);
  188. tok = RangeToken::complementRanges(tok, tokFactory);
  189. // Build the internal map.
  190. tok->createMap();
  191. rangeTokMap->setRangeToken(fgXMLWord, tok);
  192. fRangesCreated = true;
  193. }
  194. // ---------------------------------------------------------------------------
  195. // XMLRangeFactory: Range creation methods
  196. // ---------------------------------------------------------------------------
  197. void XMLRangeFactory::initializeKeywordMap(RangeTokenMap *rangeTokMap) {
  198. if (fKeywordsInitialized)
  199. return;
  200. rangeTokMap->addKeywordMap(fgXMLSpace, fgXMLCategory);
  201. rangeTokMap->addKeywordMap(fgXMLDigit, fgXMLCategory);
  202. rangeTokMap->addKeywordMap(fgXMLWord, fgXMLCategory);
  203. rangeTokMap->addKeywordMap(fgXMLNameChar, fgXMLCategory);
  204. rangeTokMap->addKeywordMap(fgXMLInitialNameChar, fgXMLCategory);
  205. fKeywordsInitialized = true;
  206. }
  207. XERCES_CPP_NAMESPACE_END
  208. /**
  209. * End of file XMLRangeFactory.cpp
  210. */