You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

383 lines
10 KiB

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: IANACharset.cpp 470094 2006-11-01 20:41:30Z amassari $
*/
// ---------------------------------------------------------------------------
// This program is designed to parse an XML file containing the valid IANA
// encodings. It will build a DOM tree from that source file and and spit out
// a C++ code fragment that represents the table required by the TransService
// class to check for valid IANA encodings before creating the corresponding
// transcoder
//
// The file format is pretty simple and this program is not intended to be
// industrial strength by any means. Its use by anyone but the author is
// at the user's own risk.
//
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <stdio.h>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/sax/SAXParseException.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
#include "IANACharset_ErrHandler.hpp"
// ---------------------------------------------------------------------------
// Const data
// ---------------------------------------------------------------------------
enum ErrReturns
{
ErrReturn_Success = 0
, ErrReturn_BadParameters = 1
, ErrReturn_OutFileOpenFailed = 4
, ErrReturn_ParserInit = 5
, ErrReturn_ParseErr = 6
, ErrReturn_SrcFmtError = 7
};
// ---------------------------------------------------------------------------
// Local data
//
// gOutPath
// This is the path to the output path, which is given on the command
// line as /OutPath=. Its just the path, not a name.
//
// gSrcFile
// This the IANA encodings input file.
//
// ---------------------------------------------------------------------------
const XMLCh* gOutPath = 0;
const XMLCh* gSrcFile = 0;
static FILE* gOutFile;
static bool gFirst = false;
// ---------------------------------------------------------------------------
// Local functions
// ---------------------------------------------------------------------------
//
// This method is called to parse the parameters. They must be in this
// order and format, for simplicity:
//
// /SrcFile=xxx /OutPath=xxx
//
static bool parseParms(const int argC, XMLCh** argV)
{
if (argC < 3)
return false;
unsigned int curParm = 1;
if (XMLString::startsWith(argV[curParm], L"/SrcFile="))
{
gSrcFile = &argV[curParm][9];
}
else
{
return false;
}
curParm++;
if (XMLString::startsWith(argV[curParm], L"/OutPath="))
{
gOutPath = &argV[curParm][9];
}
else
{
return false;
}
return true;
}
static void parseError(const XMLException& toCatch)
{
wprintf
(
L"Exception\n (Line.File):%d.%s\n ERROR: %s\n\n"
, toCatch.getSrcLine()
, toCatch.getSrcFile()
, toCatch.getMessage()
);
throw ErrReturn_ParseErr;
}
static void parseError(const SAXParseException& toCatch)
{
wprintf
(
L"SAX Parse Error:\n (Line.Col.SysId): %d.%d.%s\n ERROR: %s\n\n"
, toCatch.getLineNumber()
, toCatch.getColumnNumber()
, toCatch.getSystemId()
, toCatch.getMessage()
);
throw ErrReturn_ParseErr;
}
static void startOutput(const XMLCh* const outPath)
{
//
// Ok, lets try to open the the output file.
//
const unsigned int bufSize = 4095;
XMLCh tmpBuf[bufSize + 1];
swprintf(tmpBuf, L"%s/%s.hpp", outPath, L"IANAEncodings");
gOutFile = _wfopen(tmpBuf, L"wt");
if (!gOutFile)
{
wprintf(L"Could not open the output file: %s\n\n", tmpBuf);
throw ErrReturn_OutFileOpenFailed;
}
//
// Ok, lets output the grunt data at the start of the file. We put out a
// comment that indicates its a generated file, and the title string.
//
fwprintf
(
gOutFile
, L"// ----------------------------------------------------------------\n"
L"// This file was generated from the IANA charset source.\n"
L"// so do not edit this file directly!!\n"
L"// ----------------------------------------------------------------\n\n"
L"#if !defined(IANAENCODINGS_HPP)\n"
L"#define IANAENCODINGS_HPP\n\n"
L"#include <xercesc/util/XercesDefs.hpp>\n\n"
L"XERCES_CPP_NAMESPACE_BEGIN\n\n"
);
//
// Output the leading part of the array declaration. Its just an
// array of pointers to Unicode chars.
//
fwprintf(gOutFile, L"const XMLCh gEncodingArray[][46] = \n{\n");
//
// Reset first element trigger
gFirst = true;
}
static void nextEncoding(const XMLCh* const encodingName)
{
// Store the straight Unicode format as numeric character
// values.
if (gFirst)
{
fwprintf(gOutFile, L" { ");
gFirst = false;
}
else
{
fwprintf(gOutFile, L" , { ");
}
const XMLCh* rawData = encodingName;
while (*rawData)
fwprintf(gOutFile, L"0x%04lX,", *rawData++);
fwprintf(gOutFile, L"0x00 }\n");
}
static void endOutput(const unsigned int encCount)
{
// And close out the array declaration
fwprintf(gOutFile, L"\n};\n");
// Output the const size value
fwprintf(gOutFile, L"const unsigned int gEncodingArraySize = %d;\n\n", encCount);
fwprintf
(
gOutFile
, L"XERCES_CPP_NAMESPACE_END\n\n"
L"#endif\n\n"
);
// Close the output file
fclose(gOutFile);
}
static void usage()
{
wprintf(L"Usage:\n IANACharset /SrcFile=xx /OutPath=xx\n\n");
}
// ---------------------------------------------------------------------------
// Program entry point
// ---------------------------------------------------------------------------
extern "C" int wmain(int argC, XMLCh** argV)
{
try
{
XMLPlatformUtils::Initialize();
}
catch(const XMLException& toCatch)
{
wprintf(L"Parser init error.\n ERROR: %s\n\n", toCatch.getMessage());
return ErrReturn_ParserInit;
}
//
// Lets check the parameters and save them away in globals for use by
// the processing code.
//
if (!parseParms(argC, argV))
{
usage();
XMLPlatformUtils::Terminate();
return ErrReturn_BadParameters;
}
DOMDocument* srcDoc = 0;
const unsigned int bufSize = 4095;
XMLCh tmpFileBuf[bufSize + 1];
try
{
try
{
// Build the input file name
swprintf
(
tmpFileBuf
, L"%s"
, gSrcFile
);
//
// Ok, lets invoke the DOM parser on the input file and build
// a DOM tree. Turn on validation when we do this.
//
XercesDOMParser parser;
parser.setDoValidation(true);
IANACharsetErrHandler errHandler;
parser.setErrorHandler(&errHandler);
parser.parse(tmpFileBuf);
srcDoc = parser.adoptDocument();
}
catch(const XMLException& toCatch)
{
parseError(toCatch);
}
//
// Get the root element.
//
DOMElement* rootElem = srcDoc->getDocumentElement();
//
// Ok, its good enough to get started. So lets call the start output
// method.
//
startOutput(gOutPath);
//
// Loop through the children of this node, which should take us
// through the optional Warning, Error, and Validity subsections.
//
DOMNode* encNode = rootElem->getFirstChild();
unsigned int count = 0;
while (encNode)
{
// Skip over text nodes or comment nodes ect...
if (encNode->getNodeType() != DOMNode::ELEMENT_NODE)
{
encNode = encNode->getNextSibling();
continue;
}
// Convert it to an element node
const DOMElement* encElem = (const DOMElement*)encNode;
// Now get its tag name
const XMLCh* tagName = encElem->getTagName();
if (XMLString::compareString(tagName, L"Encoding"))
{
wprintf(L"Expected an Encoding node\n\n");
throw ErrReturn_SrcFmtError;
}
//
// Ok, lets pull out the encoding name and output it to the file
//
const XMLCh* encName = encElem->getAttribute(L"name");
nextEncoding(encName);
count++;
// Move to the next child of the source element
encNode = encNode->getNextSibling();
}
endOutput(count);
}
catch(const ErrReturns retVal)
{
// And call the termination method
delete srcDoc;
XMLPlatformUtils::Terminate();
return retVal;
}
delete srcDoc;
// And call the termination method
XMLPlatformUtils::Terminate();
// Went ok, so return success
return ErrReturn_Success;
}
// -----------------------------------------------------------------------
// IANACharsetErrHandler: Implementation of the error handler interface
// -----------------------------------------------------------------------
void IANACharsetErrHandler::warning(const SAXParseException& toCatch)
{
parseError(toCatch);
}
void IANACharsetErrHandler::error(const SAXParseException& toCatch)
{
parseError(toCatch);
}
void IANACharsetErrHandler::fatalError(const SAXParseException& toCatch)
{
parseError(toCatch);
}
void IANACharsetErrHandler::resetErrors()
{
}