/* ///////////////////////////////////////////////////////////////////////// * File: src/pattern.cpp * * Purpose: C string object for shwild implementation * * Created: 17th June 2005 * Updated: 20th December 2011 * * Home: http://shwild.org/ * * Copyright (c) 2005-2011, Sean Kelly and Matthew Wilson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - Neither the names of Matthew Wilson and Sean Kelly nor the names of * any contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ////////////////////////////////////////////////////////////////////// */ /* ///////////////////////////////////////////////////////////////////////// * Includes */ #include #include "shwild_stlsoft.h" #include "shwild_safestr.h" #if defined(STLSOFT_COMPILER_IS_MSVC) && \ defined(SHWILD_USING_SAFE_STR_FUNCTIONS) # pragma warning(disable : 4996) #endif /* SHWILD_USING_SAFE_STR_FUNCTIONS && compiler */ #include "pattern.hpp" #include "shwild_assert.h" #include #include #include #include #ifdef _MSC_VER # include #endif /* _MSC_VER */ /* ///////////////////////////////////////////////////////////////////////// * Compiler features */ /* ///////////////////////////////////////////////////////////////////////// * Checks */ #if CHAR_MAX >= INT_MAX || CHAR_MIN <= INT_MIN # error char must be smaller than int #endif /* ///////////////////////////////////////////////////////////////////////// * Helper functions */ namespace { char const* strnchr(char const* s, size_t len, char ch) { for(; 0 != len; ++s, --len) { if(ch == 0[s]) { return s; } } return NULL; } } // anonymous namespace /* ///////////////////////////////////////////////////////////////////////// * API functions */ static int get_token(char const* buf, size_t* len , unsigned flags) { int tok; SHWILD_ASSERT( len && buf ); *len = 1; if( ( tok = *buf ) != TOK_END ) { switch( tok ) { case '\\': if(0 != (flags & SHWILD_F_SUPPRESS_BACKSLASH_ESCAPE)) { return tok; } ++buf; ++*len; tok = *buf; switch( tok ) { case '\\': case '?': case '*': case '[': case ']': case '^': return tok; default: return TOK_INVALID; } case '?': return TOK_WILD_1; case '*': return TOK_WILD_N; case '[': if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_SUPPORT)) { return tok; } return TOK_RANGE_BEG; case ']': if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_SUPPORT)) { return tok; } return TOK_RANGE_END; default: return tok; } } return TOK_END; } static int get_literal( shwild_slice_t& content, node_buffer_t &scratch, char const* buf, size_t* len, unsigned flags, int bInRange ) { char const* pos = buf; int tok; size_t tok_len; SHWILD_ASSERT( NULL != buf && 0 != len ); if(!scratch.resize(1)) // This trims without discarding any hard-won heap mem. { return TOK_ENOMEM; } /* * NOTE: I'm cheating here a bit and checking the token by range rather than * against specific token values. */ for(;;) { tok = get_token( pos, &tok_len, flags ); if( bInRange && 0 == (SHWILD_F_SUPPRESS_RANGE_LITERAL_WILDCARD_SUPPORT & flags)) { if(tok == TOK_WILD_1) { tok = '?'; } if(tok == TOK_WILD_N) { tok = '*'; } } if( tok != TOK_END && tok >= CHAR_MIN && tok <= CHAR_MAX ) { const size_t sz = scratch.size(); if(!scratch.resize(1 + sz)) { return TOK_ENOMEM; } scratch[sz] = static_cast(tok); pos += tok_len; } else { break; } } content.len = scratch.size() - 1; content.ptr = &scratch[1]; *len = static_cast(pos - buf); return tok; } int get_node( node_t* node, node_buffer_t &scratch, char const* buf, size_t* len, unsigned flags ) { int tok; size_t tok_len; SHWILD_ASSERT( node && buf && len ); *len = 0; /* memset( node, 0, sizeof( node_t ) ); */ switch( tok = get_token( buf, &tok_len, flags ) ) { case TOK_INVALID: case TOK_RANGE_END: return SHWILD_RC_PARSE_ERROR; case TOK_END: node->type = NODE_END; node->data.len = 0; break; case TOK_WILD_1: node->type = NODE_WILD_1; node->data.len = 0; break; case TOK_WILD_N: node->type = NODE_WILD_N; node->data.len = 0; break; case TOK_RANGE_BEG: node->type = NODE_RANGE; node->data.len = 0; *len += tok_len; tok = get_literal( node->data, scratch, buf + tok_len, &tok_len, flags, 1 ); if(TOK_ENOMEM == tok) { return SHWILD_RC_ALLOC_ERROR; } else if(tok != TOK_RANGE_END) { *len = 0; return SHWILD_RC_PARSE_ERROR; } *len += tok_len; tok = get_token( buf + *len, &tok_len, flags ); STLSOFT_SUPPRESS_UNUSED(tok); SHWILD_ASSERT( tok == TOK_RANGE_END ); /* Account for !range */ if( 0 == (flags & SHWILD_F_SUPPRESS_RANGE_NOT_SUPPORT) && node->data.len > 0 && '^' == node->data.ptr[0] && '\\' != buf[tok_len]) { node->type = NODE_NOT_RANGE; ++node->data.ptr; --node->data.len; } /* Now must post-process if got any embedded '-'. */ if( 0 == (flags & SHWILD_F_SUPPRESS_RANGE_CONTINUUM_SUPPORT) && node->data.len > 0) { /* Search for the first '-', ignoring the first ... */ char const* first = node->data.ptr; char const* minus = strnchr(first + 1, node->data.len - 1, '-'); char const* const last = node->data.ptr + (node->data.len - 1); if(SHWILD_F_SUPPRESS_RANGE_LEADTRAIL_LITERAL_HYPHEN_SUPPORT == (flags & SHWILD_F_SUPPRESS_RANGE_LEADTRAIL_LITERAL_HYPHEN_SUPPORT)) { if( '-' == *first || '-' == *last) { return SHWILD_RC_PARSE_ERROR; } } /* ... and the last, since they're treated as literals. */ if( NULL != minus && minus != last) { node_buffer_t xstr(1); char const* begin = node->data.ptr; char const* const end = begin + node->data.len; for(; NULL != minus && last != minus; ) { char prev = minus[-1]; char post = minus[+1]; if( !isalnum(prev) || !isalnum(post) || isdigit(prev) != isdigit(post)) { return SHWILD_RC_PARSE_ERROR; /* "[%-&]", "[a-9]" not valid */ } const size_t sz = xstr.size(); if(!xstr.resize(sz + (static_cast(minus - begin) - 1))) { return SHWILD_RC_ALLOC_ERROR; } else { std::copy(begin, minus - 1, xstr.begin() + sz); } if(isupper(prev) == isupper(post)) { if(post < prev) { if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_CONTINUUM_HIGHLOW_SUPPORT)) { return SHWILD_RC_PARSE_ERROR; } else { prev = minus[+1]; post = minus[-1]; } } { for(char ch = prev; ch <= post; ++ch) { const size_t sz = xstr.size(); if(!xstr.resize(1 + sz)) { return SHWILD_RC_ALLOC_ERROR; } else { xstr[sz] = ch; } }} } else { if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_CONTINUUM_CROSSCASE_SUPPORT)) { return SHWILD_RC_PARSE_ERROR; } else { char prevLower = static_cast(tolower(prev)); char postLower = static_cast(tolower(post)); char prevUpper = static_cast(toupper(prev)); char postUpper = static_cast(toupper(post)); SHWILD_ASSERT((postLower < prevLower) == (postUpper < prevUpper)); if(postLower < prevLower) { if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_CONTINUUM_HIGHLOW_SUPPORT)) { return SHWILD_RC_PARSE_ERROR; } else { // TODO: Use std::swap() char ch; ch = prevLower; prevLower = postLower; postLower = ch; ch = prevUpper; prevUpper = postUpper; postUpper = ch; } } { for(char ch = prevLower; ch <= postLower; ++ch) { const size_t sz = xstr.size(); if(!xstr.resize(1 + sz)) { return SHWILD_RC_ALLOC_ERROR; } else { xstr[sz] = ch; } }} { for(char ch = prevUpper; ch <= postUpper; ++ch) { const size_t sz = xstr.size(); if(!xstr.resize(1 + sz)) { return SHWILD_RC_ALLOC_ERROR; } else { xstr[sz] = ch; } }} } } begin = minus + 2; if(end == begin) { break; } else { if('-' == *begin) { return SHWILD_RC_PARSE_ERROR; /* "[0-1-234]" not valid */ } minus = strnchr(begin, static_cast(end - begin), '-'); } } const size_t sz = xstr.size(); if(!xstr.resize(sz + static_cast(end - begin))) { return SHWILD_RC_ALLOC_ERROR; } else { std::copy(begin, end, xstr.begin() + sz); } scratch.swap(xstr); node->data.len = scratch.size() - 1; node->data.ptr = &scratch[1]; } } break; default: node->type = NODE_LITERAL; node->data.len = 0; tok = get_literal( node->data, scratch, buf, &tok_len, flags, 0 ); if(TOK_ENOMEM == tok) { return SHWILD_RC_ALLOC_ERROR; } } *len += tok_len; return 0; } void node_init( node_t* node ) { node->type = NODE_NOTHING; node->data.len = 0; } void node_reset( node_t* node ) { node->type = NODE_NOTHING; node->data.len = 0; } /* ///////////////////////////// end of file //////////////////////////// */