You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
479 lines
15 KiB
479 lines
15 KiB
/* /////////////////////////////////////////////////////////////////////////
|
|
* File: src/pattern.cpp
|
|
*
|
|
* Purpose: C string object for shwild implementation
|
|
*
|
|
* Created: 17th June 2005
|
|
* Updated: 20th December 2011
|
|
*
|
|
* Home: http://shwild.org/
|
|
*
|
|
* Copyright (c) 2005-2011, Sean Kelly and Matthew Wilson
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* - Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* - Neither the names of Matthew Wilson and Sean Kelly nor the names of
|
|
* any contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
|
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* ////////////////////////////////////////////////////////////////////// */
|
|
|
|
|
|
/* /////////////////////////////////////////////////////////////////////////
|
|
* Includes
|
|
*/
|
|
|
|
#include <shwild/shwild.h>
|
|
#include "shwild_stlsoft.h"
|
|
|
|
#include "shwild_safestr.h"
|
|
#if defined(STLSOFT_COMPILER_IS_MSVC) && \
|
|
defined(SHWILD_USING_SAFE_STR_FUNCTIONS)
|
|
# pragma warning(disable : 4996)
|
|
#endif /* SHWILD_USING_SAFE_STR_FUNCTIONS && compiler */
|
|
|
|
|
|
#include "pattern.hpp"
|
|
#include "shwild_assert.h"
|
|
|
|
#include <ctype.h>
|
|
#include <limits.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#ifdef _MSC_VER
|
|
# include <crtdbg.h>
|
|
#endif /* _MSC_VER */
|
|
|
|
/* /////////////////////////////////////////////////////////////////////////
|
|
* Compiler features
|
|
*/
|
|
|
|
/* /////////////////////////////////////////////////////////////////////////
|
|
* Checks
|
|
*/
|
|
|
|
#if CHAR_MAX >= INT_MAX || CHAR_MIN <= INT_MIN
|
|
# error char must be smaller than int
|
|
#endif
|
|
|
|
/* /////////////////////////////////////////////////////////////////////////
|
|
* Helper functions
|
|
*/
|
|
|
|
namespace
|
|
{
|
|
|
|
char const* strnchr(char const* s, size_t len, char ch)
|
|
{
|
|
for(; 0 != len; ++s, --len)
|
|
{
|
|
if(ch == 0[s])
|
|
{
|
|
return s;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
/* /////////////////////////////////////////////////////////////////////////
|
|
* API functions
|
|
*/
|
|
|
|
static int get_token(char const* buf, size_t* len , unsigned flags)
|
|
{
|
|
int tok;
|
|
|
|
SHWILD_ASSERT( len && buf );
|
|
|
|
*len = 1;
|
|
if( ( tok = *buf ) != TOK_END )
|
|
{
|
|
switch( tok )
|
|
{
|
|
case '\\':
|
|
if(0 != (flags & SHWILD_F_SUPPRESS_BACKSLASH_ESCAPE))
|
|
{
|
|
return tok;
|
|
}
|
|
++buf;
|
|
++*len;
|
|
tok = *buf;
|
|
switch( tok )
|
|
{
|
|
case '\\':
|
|
case '?':
|
|
case '*':
|
|
case '[':
|
|
case ']':
|
|
case '^':
|
|
return tok;
|
|
default:
|
|
return TOK_INVALID;
|
|
}
|
|
case '?':
|
|
return TOK_WILD_1;
|
|
case '*':
|
|
return TOK_WILD_N;
|
|
case '[':
|
|
if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_SUPPORT))
|
|
{
|
|
return tok;
|
|
}
|
|
return TOK_RANGE_BEG;
|
|
case ']':
|
|
if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_SUPPORT))
|
|
{
|
|
return tok;
|
|
}
|
|
return TOK_RANGE_END;
|
|
default:
|
|
return tok;
|
|
}
|
|
}
|
|
return TOK_END;
|
|
}
|
|
|
|
static int get_literal( shwild_slice_t& content, node_buffer_t &scratch, char const* buf, size_t* len, unsigned flags, int bInRange )
|
|
{
|
|
char const* pos = buf;
|
|
int tok;
|
|
size_t tok_len;
|
|
|
|
SHWILD_ASSERT( NULL != buf && 0 != len );
|
|
|
|
if(!scratch.resize(1)) // This trims without discarding any hard-won heap mem.
|
|
{
|
|
return TOK_ENOMEM;
|
|
}
|
|
|
|
/*
|
|
* NOTE: I'm cheating here a bit and checking the token by range rather than
|
|
* against specific token values.
|
|
*/
|
|
for(;;)
|
|
{
|
|
tok = get_token( pos, &tok_len, flags );
|
|
|
|
if( bInRange &&
|
|
0 == (SHWILD_F_SUPPRESS_RANGE_LITERAL_WILDCARD_SUPPORT & flags))
|
|
{
|
|
if(tok == TOK_WILD_1)
|
|
{
|
|
tok = '?';
|
|
}
|
|
if(tok == TOK_WILD_N)
|
|
{
|
|
tok = '*';
|
|
}
|
|
}
|
|
|
|
if( tok != TOK_END &&
|
|
tok >= CHAR_MIN &&
|
|
tok <= CHAR_MAX )
|
|
{
|
|
const size_t sz = scratch.size();
|
|
|
|
if(!scratch.resize(1 + sz))
|
|
{
|
|
return TOK_ENOMEM;
|
|
}
|
|
|
|
scratch[sz] = static_cast<char>(tok);
|
|
|
|
pos += tok_len;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
content.len = scratch.size() - 1;
|
|
content.ptr = &scratch[1];
|
|
|
|
*len = static_cast<size_t>(pos - buf);
|
|
|
|
return tok;
|
|
}
|
|
|
|
int get_node( node_t* node, node_buffer_t &scratch, char const* buf, size_t* len, unsigned flags )
|
|
{
|
|
int tok;
|
|
size_t tok_len;
|
|
|
|
SHWILD_ASSERT( node && buf && len );
|
|
|
|
*len = 0;
|
|
/* memset( node, 0, sizeof( node_t ) ); */
|
|
switch( tok = get_token( buf, &tok_len, flags ) )
|
|
{
|
|
case TOK_INVALID:
|
|
case TOK_RANGE_END:
|
|
return SHWILD_RC_PARSE_ERROR;
|
|
case TOK_END:
|
|
node->type = NODE_END;
|
|
node->data.len = 0;
|
|
break;
|
|
case TOK_WILD_1:
|
|
node->type = NODE_WILD_1;
|
|
node->data.len = 0;
|
|
break;
|
|
case TOK_WILD_N:
|
|
node->type = NODE_WILD_N;
|
|
node->data.len = 0;
|
|
break;
|
|
case TOK_RANGE_BEG:
|
|
node->type = NODE_RANGE;
|
|
node->data.len = 0;
|
|
*len += tok_len;
|
|
tok = get_literal( node->data, scratch, buf + tok_len, &tok_len, flags, 1 );
|
|
if(TOK_ENOMEM == tok)
|
|
{
|
|
return SHWILD_RC_ALLOC_ERROR;
|
|
}
|
|
else if(tok != TOK_RANGE_END)
|
|
{
|
|
*len = 0;
|
|
return SHWILD_RC_PARSE_ERROR;
|
|
}
|
|
*len += tok_len;
|
|
tok = get_token( buf + *len, &tok_len, flags );
|
|
STLSOFT_SUPPRESS_UNUSED(tok);
|
|
SHWILD_ASSERT( tok == TOK_RANGE_END );
|
|
/* Account for !range */
|
|
if( 0 == (flags & SHWILD_F_SUPPRESS_RANGE_NOT_SUPPORT) &&
|
|
node->data.len > 0 &&
|
|
'^' == node->data.ptr[0] &&
|
|
'\\' != buf[tok_len])
|
|
{
|
|
node->type = NODE_NOT_RANGE;
|
|
++node->data.ptr;
|
|
--node->data.len;
|
|
}
|
|
/* Now must post-process if got any embedded '-'. */
|
|
if( 0 == (flags & SHWILD_F_SUPPRESS_RANGE_CONTINUUM_SUPPORT) &&
|
|
node->data.len > 0)
|
|
{
|
|
/* Search for the first '-', ignoring the first ... */
|
|
char const* first = node->data.ptr;
|
|
char const* minus = strnchr(first + 1, node->data.len - 1, '-');
|
|
char const* const last = node->data.ptr + (node->data.len - 1);
|
|
|
|
if(SHWILD_F_SUPPRESS_RANGE_LEADTRAIL_LITERAL_HYPHEN_SUPPORT == (flags & SHWILD_F_SUPPRESS_RANGE_LEADTRAIL_LITERAL_HYPHEN_SUPPORT))
|
|
{
|
|
if( '-' == *first ||
|
|
'-' == *last)
|
|
{
|
|
return SHWILD_RC_PARSE_ERROR;
|
|
}
|
|
}
|
|
|
|
/* ... and the last, since they're treated as literals. */
|
|
if( NULL != minus &&
|
|
minus != last)
|
|
{
|
|
node_buffer_t xstr(1);
|
|
|
|
char const* begin = node->data.ptr;
|
|
char const* const end = begin + node->data.len;
|
|
|
|
for(; NULL != minus && last != minus; )
|
|
{
|
|
char prev = minus[-1];
|
|
char post = minus[+1];
|
|
|
|
if( !isalnum(prev) ||
|
|
!isalnum(post) ||
|
|
isdigit(prev) != isdigit(post))
|
|
{
|
|
return SHWILD_RC_PARSE_ERROR; /* "[%-&]", "[a-9]" not valid */
|
|
}
|
|
|
|
const size_t sz = xstr.size();
|
|
|
|
if(!xstr.resize(sz + (static_cast<size_t>(minus - begin) - 1)))
|
|
{
|
|
return SHWILD_RC_ALLOC_ERROR;
|
|
}
|
|
else
|
|
{
|
|
std::copy(begin, minus - 1, xstr.begin() + sz);
|
|
}
|
|
if(isupper(prev) == isupper(post))
|
|
{
|
|
if(post < prev)
|
|
{
|
|
if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_CONTINUUM_HIGHLOW_SUPPORT))
|
|
{
|
|
return SHWILD_RC_PARSE_ERROR;
|
|
}
|
|
else
|
|
{
|
|
prev = minus[+1];
|
|
post = minus[-1];
|
|
}
|
|
}
|
|
|
|
{ for(char ch = prev; ch <= post; ++ch)
|
|
{
|
|
const size_t sz = xstr.size();
|
|
|
|
if(!xstr.resize(1 + sz))
|
|
{
|
|
return SHWILD_RC_ALLOC_ERROR;
|
|
}
|
|
else
|
|
{
|
|
xstr[sz] = ch;
|
|
}
|
|
}}
|
|
}
|
|
else
|
|
{
|
|
if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_CONTINUUM_CROSSCASE_SUPPORT))
|
|
{
|
|
return SHWILD_RC_PARSE_ERROR;
|
|
}
|
|
else
|
|
{
|
|
char prevLower = static_cast<char>(tolower(prev));
|
|
char postLower = static_cast<char>(tolower(post));
|
|
char prevUpper = static_cast<char>(toupper(prev));
|
|
char postUpper = static_cast<char>(toupper(post));
|
|
|
|
SHWILD_ASSERT((postLower < prevLower) == (postUpper < prevUpper));
|
|
|
|
if(postLower < prevLower)
|
|
{
|
|
if(0 != (flags & SHWILD_F_SUPPRESS_RANGE_CONTINUUM_HIGHLOW_SUPPORT))
|
|
{
|
|
return SHWILD_RC_PARSE_ERROR;
|
|
}
|
|
else
|
|
{
|
|
// TODO: Use std::swap()
|
|
char ch;
|
|
|
|
ch = prevLower;
|
|
prevLower = postLower;
|
|
postLower = ch;
|
|
|
|
ch = prevUpper;
|
|
prevUpper = postUpper;
|
|
postUpper = ch;
|
|
}
|
|
}
|
|
|
|
{ for(char ch = prevLower; ch <= postLower; ++ch)
|
|
{
|
|
const size_t sz = xstr.size();
|
|
|
|
if(!xstr.resize(1 + sz))
|
|
{
|
|
return SHWILD_RC_ALLOC_ERROR;
|
|
}
|
|
else
|
|
{
|
|
xstr[sz] = ch;
|
|
}
|
|
}}
|
|
|
|
{ for(char ch = prevUpper; ch <= postUpper; ++ch)
|
|
{
|
|
const size_t sz = xstr.size();
|
|
|
|
if(!xstr.resize(1 + sz))
|
|
{
|
|
return SHWILD_RC_ALLOC_ERROR;
|
|
}
|
|
else
|
|
{
|
|
xstr[sz] = ch;
|
|
}
|
|
}}
|
|
}
|
|
}
|
|
|
|
begin = minus + 2;
|
|
if(end == begin)
|
|
{
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
if('-' == *begin)
|
|
{
|
|
return SHWILD_RC_PARSE_ERROR; /* "[0-1-234]" not valid */
|
|
}
|
|
minus = strnchr(begin, static_cast<size_t>(end - begin), '-');
|
|
}
|
|
}
|
|
|
|
const size_t sz = xstr.size();
|
|
|
|
if(!xstr.resize(sz + static_cast<size_t>(end - begin)))
|
|
{
|
|
return SHWILD_RC_ALLOC_ERROR;
|
|
}
|
|
else
|
|
{
|
|
std::copy(begin, end, xstr.begin() + sz);
|
|
}
|
|
|
|
scratch.swap(xstr);
|
|
|
|
node->data.len = scratch.size() - 1;
|
|
node->data.ptr = &scratch[1];
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
node->type = NODE_LITERAL;
|
|
node->data.len = 0;
|
|
tok = get_literal( node->data, scratch, buf, &tok_len, flags, 0 );
|
|
if(TOK_ENOMEM == tok)
|
|
{
|
|
return SHWILD_RC_ALLOC_ERROR;
|
|
}
|
|
}
|
|
*len += tok_len;
|
|
return 0;
|
|
}
|
|
|
|
void node_init( node_t* node )
|
|
{
|
|
node->type = NODE_NOTHING;
|
|
node->data.len = 0;
|
|
}
|
|
|
|
void node_reset( node_t* node )
|
|
{
|
|
node->type = NODE_NOTHING;
|
|
node->data.len = 0;
|
|
}
|
|
|
|
/* ///////////////////////////// end of file //////////////////////////// */
|