You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

383 lines
10 KiB

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * $Id: IANACharset.cpp 470094 2006-11-01 20:41:30Z amassari $
  19. */
  20. // ---------------------------------------------------------------------------
  21. // This program is designed to parse an XML file containing the valid IANA
  22. // encodings. It will build a DOM tree from that source file and and spit out
  23. // a C++ code fragment that represents the table required by the TransService
  24. // class to check for valid IANA encodings before creating the corresponding
  25. // transcoder
  26. //
  27. // The file format is pretty simple and this program is not intended to be
  28. // industrial strength by any means. Its use by anyone but the author is
  29. // at the user's own risk.
  30. //
  31. // ---------------------------------------------------------------------------
  32. // ---------------------------------------------------------------------------
  33. // Includes
  34. // ---------------------------------------------------------------------------
  35. #include <stdio.h>
  36. #include <xercesc/util/PlatformUtils.hpp>
  37. #include <xercesc/sax/SAXParseException.hpp>
  38. #include <xercesc/parsers/XercesDOMParser.hpp>
  39. #include <xercesc/dom/DOM.hpp>
  40. #include "IANACharset_ErrHandler.hpp"
  41. // ---------------------------------------------------------------------------
  42. // Const data
  43. // ---------------------------------------------------------------------------
  44. enum ErrReturns
  45. {
  46. ErrReturn_Success = 0
  47. , ErrReturn_BadParameters = 1
  48. , ErrReturn_OutFileOpenFailed = 4
  49. , ErrReturn_ParserInit = 5
  50. , ErrReturn_ParseErr = 6
  51. , ErrReturn_SrcFmtError = 7
  52. };
  53. // ---------------------------------------------------------------------------
  54. // Local data
  55. //
  56. // gOutPath
  57. // This is the path to the output path, which is given on the command
  58. // line as /OutPath=. Its just the path, not a name.
  59. //
  60. // gSrcFile
  61. // This the IANA encodings input file.
  62. //
  63. // ---------------------------------------------------------------------------
  64. const XMLCh* gOutPath = 0;
  65. const XMLCh* gSrcFile = 0;
  66. static FILE* gOutFile;
  67. static bool gFirst = false;
  68. // ---------------------------------------------------------------------------
  69. // Local functions
  70. // ---------------------------------------------------------------------------
  71. //
  72. // This method is called to parse the parameters. They must be in this
  73. // order and format, for simplicity:
  74. //
  75. // /SrcFile=xxx /OutPath=xxx
  76. //
  77. static bool parseParms(const int argC, XMLCh** argV)
  78. {
  79. if (argC < 3)
  80. return false;
  81. unsigned int curParm = 1;
  82. if (XMLString::startsWith(argV[curParm], L"/SrcFile="))
  83. {
  84. gSrcFile = &argV[curParm][9];
  85. }
  86. else
  87. {
  88. return false;
  89. }
  90. curParm++;
  91. if (XMLString::startsWith(argV[curParm], L"/OutPath="))
  92. {
  93. gOutPath = &argV[curParm][9];
  94. }
  95. else
  96. {
  97. return false;
  98. }
  99. return true;
  100. }
  101. static void parseError(const XMLException& toCatch)
  102. {
  103. wprintf
  104. (
  105. L"Exception\n (Line.File):%d.%s\n ERROR: %s\n\n"
  106. , toCatch.getSrcLine()
  107. , toCatch.getSrcFile()
  108. , toCatch.getMessage()
  109. );
  110. throw ErrReturn_ParseErr;
  111. }
  112. static void parseError(const SAXParseException& toCatch)
  113. {
  114. wprintf
  115. (
  116. L"SAX Parse Error:\n (Line.Col.SysId): %d.%d.%s\n ERROR: %s\n\n"
  117. , toCatch.getLineNumber()
  118. , toCatch.getColumnNumber()
  119. , toCatch.getSystemId()
  120. , toCatch.getMessage()
  121. );
  122. throw ErrReturn_ParseErr;
  123. }
  124. static void startOutput(const XMLCh* const outPath)
  125. {
  126. //
  127. // Ok, lets try to open the the output file.
  128. //
  129. const unsigned int bufSize = 4095;
  130. XMLCh tmpBuf[bufSize + 1];
  131. swprintf(tmpBuf, L"%s/%s.hpp", outPath, L"IANAEncodings");
  132. gOutFile = _wfopen(tmpBuf, L"wt");
  133. if (!gOutFile)
  134. {
  135. wprintf(L"Could not open the output file: %s\n\n", tmpBuf);
  136. throw ErrReturn_OutFileOpenFailed;
  137. }
  138. //
  139. // Ok, lets output the grunt data at the start of the file. We put out a
  140. // comment that indicates its a generated file, and the title string.
  141. //
  142. fwprintf
  143. (
  144. gOutFile
  145. , L"// ----------------------------------------------------------------\n"
  146. L"// This file was generated from the IANA charset source.\n"
  147. L"// so do not edit this file directly!!\n"
  148. L"// ----------------------------------------------------------------\n\n"
  149. L"#if !defined(IANAENCODINGS_HPP)\n"
  150. L"#define IANAENCODINGS_HPP\n\n"
  151. L"#include <xercesc/util/XercesDefs.hpp>\n\n"
  152. L"XERCES_CPP_NAMESPACE_BEGIN\n\n"
  153. );
  154. //
  155. // Output the leading part of the array declaration. Its just an
  156. // array of pointers to Unicode chars.
  157. //
  158. fwprintf(gOutFile, L"const XMLCh gEncodingArray[][46] = \n{\n");
  159. //
  160. // Reset first element trigger
  161. gFirst = true;
  162. }
  163. static void nextEncoding(const XMLCh* const encodingName)
  164. {
  165. // Store the straight Unicode format as numeric character
  166. // values.
  167. if (gFirst)
  168. {
  169. fwprintf(gOutFile, L" { ");
  170. gFirst = false;
  171. }
  172. else
  173. {
  174. fwprintf(gOutFile, L" , { ");
  175. }
  176. const XMLCh* rawData = encodingName;
  177. while (*rawData)
  178. fwprintf(gOutFile, L"0x%04lX,", *rawData++);
  179. fwprintf(gOutFile, L"0x00 }\n");
  180. }
  181. static void endOutput(const unsigned int encCount)
  182. {
  183. // And close out the array declaration
  184. fwprintf(gOutFile, L"\n};\n");
  185. // Output the const size value
  186. fwprintf(gOutFile, L"const unsigned int gEncodingArraySize = %d;\n\n", encCount);
  187. fwprintf
  188. (
  189. gOutFile
  190. , L"XERCES_CPP_NAMESPACE_END\n\n"
  191. L"#endif\n\n"
  192. );
  193. // Close the output file
  194. fclose(gOutFile);
  195. }
  196. static void usage()
  197. {
  198. wprintf(L"Usage:\n IANACharset /SrcFile=xx /OutPath=xx\n\n");
  199. }
  200. // ---------------------------------------------------------------------------
  201. // Program entry point
  202. // ---------------------------------------------------------------------------
  203. extern "C" int wmain(int argC, XMLCh** argV)
  204. {
  205. try
  206. {
  207. XMLPlatformUtils::Initialize();
  208. }
  209. catch(const XMLException& toCatch)
  210. {
  211. wprintf(L"Parser init error.\n ERROR: %s\n\n", toCatch.getMessage());
  212. return ErrReturn_ParserInit;
  213. }
  214. //
  215. // Lets check the parameters and save them away in globals for use by
  216. // the processing code.
  217. //
  218. if (!parseParms(argC, argV))
  219. {
  220. usage();
  221. XMLPlatformUtils::Terminate();
  222. return ErrReturn_BadParameters;
  223. }
  224. DOMDocument* srcDoc = 0;
  225. const unsigned int bufSize = 4095;
  226. XMLCh tmpFileBuf[bufSize + 1];
  227. try
  228. {
  229. try
  230. {
  231. // Build the input file name
  232. swprintf
  233. (
  234. tmpFileBuf
  235. , L"%s"
  236. , gSrcFile
  237. );
  238. //
  239. // Ok, lets invoke the DOM parser on the input file and build
  240. // a DOM tree. Turn on validation when we do this.
  241. //
  242. XercesDOMParser parser;
  243. parser.setDoValidation(true);
  244. IANACharsetErrHandler errHandler;
  245. parser.setErrorHandler(&errHandler);
  246. parser.parse(tmpFileBuf);
  247. srcDoc = parser.adoptDocument();
  248. }
  249. catch(const XMLException& toCatch)
  250. {
  251. parseError(toCatch);
  252. }
  253. //
  254. // Get the root element.
  255. //
  256. DOMElement* rootElem = srcDoc->getDocumentElement();
  257. //
  258. // Ok, its good enough to get started. So lets call the start output
  259. // method.
  260. //
  261. startOutput(gOutPath);
  262. //
  263. // Loop through the children of this node, which should take us
  264. // through the optional Warning, Error, and Validity subsections.
  265. //
  266. DOMNode* encNode = rootElem->getFirstChild();
  267. unsigned int count = 0;
  268. while (encNode)
  269. {
  270. // Skip over text nodes or comment nodes ect...
  271. if (encNode->getNodeType() != DOMNode::ELEMENT_NODE)
  272. {
  273. encNode = encNode->getNextSibling();
  274. continue;
  275. }
  276. // Convert it to an element node
  277. const DOMElement* encElem = (const DOMElement*)encNode;
  278. // Now get its tag name
  279. const XMLCh* tagName = encElem->getTagName();
  280. if (XMLString::compareString(tagName, L"Encoding"))
  281. {
  282. wprintf(L"Expected an Encoding node\n\n");
  283. throw ErrReturn_SrcFmtError;
  284. }
  285. //
  286. // Ok, lets pull out the encoding name and output it to the file
  287. //
  288. const XMLCh* encName = encElem->getAttribute(L"name");
  289. nextEncoding(encName);
  290. count++;
  291. // Move to the next child of the source element
  292. encNode = encNode->getNextSibling();
  293. }
  294. endOutput(count);
  295. }
  296. catch(const ErrReturns retVal)
  297. {
  298. // And call the termination method
  299. delete srcDoc;
  300. XMLPlatformUtils::Terminate();
  301. return retVal;
  302. }
  303. delete srcDoc;
  304. // And call the termination method
  305. XMLPlatformUtils::Terminate();
  306. // Went ok, so return success
  307. return ErrReturn_Success;
  308. }
  309. // -----------------------------------------------------------------------
  310. // IANACharsetErrHandler: Implementation of the error handler interface
  311. // -----------------------------------------------------------------------
  312. void IANACharsetErrHandler::warning(const SAXParseException& toCatch)
  313. {
  314. parseError(toCatch);
  315. }
  316. void IANACharsetErrHandler::error(const SAXParseException& toCatch)
  317. {
  318. parseError(toCatch);
  319. }
  320. void IANACharsetErrHandler::fatalError(const SAXParseException& toCatch)
  321. {
  322. parseError(toCatch);
  323. }
  324. void IANACharsetErrHandler::resetErrors()
  325. {
  326. }