Xerces-C++  3.1.2
XMLChar.hpp
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id: XMLChar.hpp 932887 2010-04-11 13:04:59Z borisk $
20  */
21 
22 #if !defined(XERCESC_INCLUDE_GUARD_XMLCHAR_HPP)
23 #define XERCESC_INCLUDE_GUARD_XMLCHAR_HPP
24 
26 
28 
29 // ---------------------------------------------------------------------------
30 // This file defines Char and utility that conforms to XML 1.0 and XML 1.1
31 // ---------------------------------------------------------------------------
32 // Masks for the fgCharCharsTable1_0 array
35 const XMLByte gNameCharMask = 0x4;
39 const XMLByte gXMLCharMask = 0x40;
41 
42 // ---------------------------------------------------------------------------
43 // This class is for XML 1.0
44 // ---------------------------------------------------------------------------
46 {
47 public:
48  // -----------------------------------------------------------------------
49  // Public, static methods, check the string
50  // -----------------------------------------------------------------------
51  static bool isAllSpaces
52  (
53  const XMLCh* const toCheck
54  , const XMLSize_t count
55  );
56 
57  static bool containsWhiteSpace
58  (
59  const XMLCh* const toCheck
60  , const XMLSize_t count
61  );
62 
63  static bool isValidNmtoken
64  (
65  const XMLCh* const toCheck
66  , const XMLSize_t count
67  );
68 
69  static bool isValidName
70  (
71  const XMLCh* const toCheck
72  , const XMLSize_t count
73  );
74 
75  static bool isValidName
76  (
77  const XMLCh* const toCheck
78  );
79 
80  static bool isValidNCName
81  (
82  const XMLCh* const toCheck
83  , const XMLSize_t count
84  );
85 
86  static bool isValidQName
87  (
88  const XMLCh* const toCheck
89  , const XMLSize_t count
90  );
91 
92  // -----------------------------------------------------------------------
93  // Public, static methods, check the XMLCh
94  // surrogate pair is assumed if second parameter is not null
95  // -----------------------------------------------------------------------
96  static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
97  static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
98  static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
99  static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
100  static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
101  static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
102  static bool isWhitespace(const XMLCh toCheck);
103  static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
104  static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
105 
106  static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
107  static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
108  static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
109 
110  // -----------------------------------------------------------------------
111  // Special Non-conformant Public, static methods
112  // -----------------------------------------------------------------------
116  static bool isNELRecognized();
117 
121  static void enableNELWS();
122 
123 private:
124  // -----------------------------------------------------------------------
125  // Unimplemented constructors and operators
126  // -----------------------------------------------------------------------
127  XMLChar1_0();
128 
129  // -----------------------------------------------------------------------
130  // Static data members
131  //
132  // fgCharCharsTable1_0
133  // The character characteristics table. Bits in each byte, represent
134  // the characteristics of each character. It is generated via some
135  // code and then hard coded into the cpp file for speed.
136  //
137  // fNEL
138  // Flag to represents whether NEL and LSEP newline recognition is enabled
139  // or disabled
140  // -----------------------------------------------------------------------
141  static XMLByte fgCharCharsTable1_0[0x10000];
142  static bool enableNEL;
143 
144  friend class XMLReader;
145 };
146 
147 
148 // ---------------------------------------------------------------------------
149 // XMLReader: Public, static methods
150 // ---------------------------------------------------------------------------
151 inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
152 {
153  // An XML letter is a FirstNameChar minus ':' and '_'.
154  if (!toCheck2) {
155  return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0)
156  && (toCheck != chColon) && (toCheck != chUnderscore));
157  }
158  return false;
159 }
160 
161 inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
162 {
163  if (!toCheck2)
164  return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
165  return false;
166 }
167 
168 inline bool XMLChar1_0::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
169 {
170  if (!toCheck2) {
171  return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
172  }
173 
174  return false;
175 }
176 
177 inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
178 {
179  if (!toCheck2)
180  return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
181  return false;
182 }
183 
184 inline bool XMLChar1_0::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
185 {
186  if (!toCheck2)
187  return ((fgCharCharsTable1_0[toCheck] & gNCNameCharMask) != 0);
188  return false;
189 }
190 
191 inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
192 {
193  if (!toCheck2)
194  return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
195  else {
196  if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
197  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
198  return true;
199  }
200  return false;
201 }
202 
203 
204 inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
205 {
206  if (!toCheck2)
207  return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
208  return false;
209 }
210 
211 inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
212 {
213  if (!toCheck2)
214  return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
215  else {
216  if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
217  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
218  return true;
219  }
220  return false;
221 }
222 
223 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
224 {
225  return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
226 }
227 
228 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
229 {
230  if (!toCheck2)
231  return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
232  return false;
233 }
234 
235 inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
236 {
237  if (!toCheck2)
238  return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
239  return false;
240 }
241 
243 
244  return enableNEL;
245 }
246 
247 
248 // ---------------------------------------------------------------------------
249 // This class is for XML 1.1
250 // ---------------------------------------------------------------------------
252 {
253 public:
254  // -----------------------------------------------------------------------
255  // Public, static methods, check the string
256  // -----------------------------------------------------------------------
257  static bool isAllSpaces
258  (
259  const XMLCh* const toCheck
260  , const XMLSize_t count
261  );
262 
263  static bool containsWhiteSpace
264  (
265  const XMLCh* const toCheck
266  , const XMLSize_t count
267  );
268 
269  static bool isValidNmtoken
270  (
271  const XMLCh* const toCheck
272  , const XMLSize_t count
273  );
274 
275  static bool isValidName
276  (
277  const XMLCh* const toCheck
278  , const XMLSize_t count
279  );
280 
281  static bool isValidName
282  (
283  const XMLCh* const toCheck
284  );
285 
286  static bool isValidNCName
287  (
288  const XMLCh* const toCheck
289  , const XMLSize_t count
290  );
291 
292  static bool isValidQName
293  (
294  const XMLCh* const toCheck
295  , const XMLSize_t count
296  );
297 
298  // -----------------------------------------------------------------------
299  // Public, static methods, check the XMLCh
300  // -----------------------------------------------------------------------
301  static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
302  static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
303  static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
304  static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
305  static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
306  static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
307  static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
308  static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
309 
310  static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
311  static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
312  static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
313 
314 private:
315  // -----------------------------------------------------------------------
316  // Unimplemented constructors and operators
317  // -----------------------------------------------------------------------
318  XMLChar1_1();
319 
320  // -----------------------------------------------------------------------
321  // Static data members
322  //
323  // fgCharCharsTable1_1
324  // The character characteristics table. Bits in each byte, represent
325  // the characteristics of each character. It is generated via some
326  // code and then hard coded into the cpp file for speed.
327  //
328  // -----------------------------------------------------------------------
329  static XMLByte fgCharCharsTable1_1[0x10000];
330 
331  friend class XMLReader;
332 };
333 
334 
335 // ---------------------------------------------------------------------------
336 // XMLReader: Public, static methods
337 // ---------------------------------------------------------------------------
338 inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
339 {
341  return XMLChar1_0::isXMLLetter(toCheck, toCheck2);
342 }
343 
344 inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
345 {
346  if (!toCheck2)
347  return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
348  else {
349  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
350  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
351  return true;
352  }
353  return false;
354 }
355 
356 inline bool XMLChar1_1::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
357 {
358  if (!toCheck2) {
359  return (((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
360  }
361  else {
362  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
363  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
364  return true;
365  }
366  return false;
367 }
368 
369 inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
370 {
371  if (!toCheck2)
372  return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
373  else {
374  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
375  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
376  return true;
377  }
378  return false;
379 }
380 
381 inline bool XMLChar1_1::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
382 {
383  if (!toCheck2)
384  return ((fgCharCharsTable1_1[toCheck] & gNCNameCharMask) != 0);
385  else {
386  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
387  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
388  return true;
389  }
390  return false;
391 }
392 
393 inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
394 {
395  if (!toCheck2)
396  return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
397  else {
398  if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
399  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
400  return true;
401  }
402  return false;
403 }
404 
405 
406 inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
407 {
408  if (!toCheck2)
409  return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
410  return false;
411 }
412 
413 inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
414 {
415  if (!toCheck2)
416  return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
417  else {
418  if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
419  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
420  return true;
421  }
422  return false;
423 }
424 
425 inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
426 {
427  if (!toCheck2)
428  return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
429  return false;
430 }
431 
432 inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
433 {
434  if (!toCheck2)
435  return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
436  return false;
437 }
438 
439 
441 
442 #endif
static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:369
static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:184
static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:381
static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:338
static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:356
static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:177
static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:151
Definition: XMLChar.hpp:251
static bool isNELRecognized()
Return true if NEL (0x85) and LSEP (0x2028) to be treated as white space char.
Definition: XMLChar.hpp:242
static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:168
const XMLByte gFirstNameCharMask
Definition: XMLChar.hpp:34
static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:211
static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:204
const XMLByte gSpecialStartTagCharMask
Definition: XMLChar.hpp:37
static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:161
Definition: XMLChar.hpp:45
static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:344
const XMLByte gXMLCharMask
Definition: XMLChar.hpp:39
const XMLCh chUnderscore
Definition: XMLUniDefs.hpp:76
static bool isWhitespace(const XMLCh toCheck)
Definition: XMLChar.hpp:223
const XMLByte gControlCharMask
Definition: XMLChar.hpp:38
static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:393
static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:235
SIZE_T XMLSize_t
Definition: Xerces_autoconf_config.borland.hpp:86
const XMLByte gNameCharMask
Definition: XMLChar.hpp:35
XERCES_CPP_NAMESPACE_BEGIN const XMLByte gNCNameCharMask
Definition: XMLChar.hpp:33
static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:406
#define XERCES_CPP_NAMESPACE_BEGIN
Definition: XercesDefs.hpp:112
wchar_t XMLCh
Definition: Xerces_autoconf_config.borland.hpp:92
unsigned char XMLByte
Definition: XercesDefs.hpp:65
#define XMLUTIL_EXPORT
Definition: XercesDefs.hpp:162
#define XERCES_CPP_NAMESPACE_END
Definition: XercesDefs.hpp:113
static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:432
const XMLCh chColon
Definition: XMLUniDefs.hpp:53
static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:425
static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:191
static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:413
const XMLByte gWhitespaceCharMask
Definition: XMLChar.hpp:40
const XMLByte gPlainContentCharMask
Definition: XMLChar.hpp:36