diff options
Diffstat (limited to 'libopts/tokenize.c')
-rw-r--r-- | libopts/tokenize.c | 321 |
1 files changed, 0 insertions, 321 deletions
diff --git a/libopts/tokenize.c b/libopts/tokenize.c deleted file mode 100644 index 0e576ce..0000000 --- a/libopts/tokenize.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * This file defines the string_tokenize interface - * Time-stamp: "2006-06-24 15:27:49 bkorb" - * - * string_tokenize copyright 2005 Bruce Korb - * - * string_tokenize is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * string_tokenize is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with string_tokenize; if not, write to: - * The Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ -#include <ctype.h> -#include <errno.h> -#include <stdlib.h> - -#define cc_t const unsigned char -#define ch_t unsigned char - -/* = = = START-STATIC-FORWARD = = = */ -/* static forward declarations maintained by :mkfwd */ -static void -copy_cooked( ch_t** ppDest, char const ** ppSrc ); - -static void -copy_raw( ch_t** ppDest, char const ** ppSrc ); -/* = = = END-STATIC-FORWARD = = = */ - -static void -copy_cooked( ch_t** ppDest, char const ** ppSrc ) -{ - ch_t* pDest = (ch_t*)*ppDest; - const ch_t* pSrc = (const ch_t*)(*ppSrc + 1); - - for (;;) { - ch_t ch = *(pSrc++); - switch (ch) { - case NUL: *ppSrc = NULL; return; - case '"': goto done; - case '\\': - pSrc += ao_string_cook_escape_char( (char*)pSrc, (char*)&ch, 0x7F ); - if (ch == 0x7F) - break; - /* FALLTHROUGH */ - - default: - *(pDest++) = ch; - } - } - - done: - *ppDest = (ch_t*)pDest; /* next spot for storing character */ - *ppSrc = (char const *)pSrc; /* char following closing quote */ -} - - -static void -copy_raw( ch_t** ppDest, char const ** ppSrc ) -{ - ch_t* pDest = *ppDest; - cc_t* pSrc = (cc_t*) (*ppSrc + 1); - - for (;;) { - ch_t ch = *(pSrc++); - switch (ch) { - case NUL: *ppSrc = NULL; return; - case '\'': goto done; - case '\\': - /* - * *Four* escapes are handled: newline removal, escape char - * quoting and apostrophe quoting - */ - switch (*pSrc) { - case NUL: *ppSrc = NULL; return; - case '\r': - if (*(++pSrc) == '\n') - ++pSrc; - continue; - - case '\n': - ++pSrc; - continue; - - case '\'': - ch = '\''; - /* FALLTHROUGH */ - - case '\\': - ++pSrc; - break; - } - /* FALLTHROUGH */ - - default: - *(pDest++) = ch; - } - } - - done: - *ppDest = pDest; /* next spot for storing character */ - *ppSrc = (char const *) pSrc; /* char following closing quote */ -} - - -/*=export_func ao_string_tokenize - * - * what: tokenize an input string - * - * arg: + char const* + string + string to be tokenized + - * - * ret_type: token_list_t* - * ret_desc: pointer to a structure that lists each token - * - * doc: - * - * This function will convert one input string into a list of strings. - * The list of strings is derived by separating the input based on - * white space separation. However, if the input contains either single - * or double quote characters, then the text after that character up to - * a matching quote will become the string in the list. - * - * The returned pointer should be deallocated with @code{free(3C)} when - * are done using the data. The data are placed in a single block of - * allocated memory. Do not deallocate individual token/strings. - * - * The structure pointed to will contain at least these two fields: - * @table @samp - * @item tkn_ct - * The number of tokens found in the input string. - * @item tok_list - * An array of @code{tkn_ct + 1} pointers to substring tokens, with - * the last pointer set to NULL. - * @end table - * - * There are two types of quoted strings: single quoted (@code{'}) and - * double quoted (@code{"}). Singly quoted strings are fairly raw in that - * escape characters (@code{\\}) are simply another character, except when - * preceding the following characters: - * @example - * @code{\\} double backslashes reduce to one - * @code{'} incorporates the single quote into the string - * @code{\n} suppresses both the backslash and newline character - * @end example - * - * Double quote strings are formed according to the rules of string - * constants in ANSI-C programs. - * - * example: - * @example - * #include <stdlib.h> - * int ix; - * token_list_t* ptl = ao_string_tokenize( some_string ) - * for (ix = 0; ix < ptl->tkn_ct; ix++) - * do_something_with_tkn( ptl->tkn_list[ix] ); - * free( ptl ); - * @end example - * Note that everything is freed with the one call to @code{free(3C)}. - * - * err: - * NULL is returned and @code{errno} will be set to indicate the problem: - * @itemize @bullet - * @item - * @code{EINVAL} - There was an unterminated quoted string. - * @item - * @code{ENOENT} - The input string was empty. - * @item - * @code{ENOMEM} - There is not enough memory. - * @end itemize -=*/ -token_list_t* -ao_string_tokenize( char const* str ) -{ - int max_token_ct = 1; /* allow for trailing NUL on string */ - token_list_t* res; - - if (str == NULL) goto bogus_str; - - /* - * Trim leading white space. Use "ENOENT" and a NULL return to indicate - * an empty string was passed. - */ - while (isspace( (ch_t)*str )) str++; - if (*str == NUL) { - bogus_str: - errno = ENOENT; - return NULL; - } - - /* - * Take an approximate count of tokens. If no quoted strings are used, - * it will be accurate. If quoted strings are used, it will be a little - * high and we'll squander the space for a few extra pointers. - */ - { - cc_t* pz = (cc_t*)str; - - do { - max_token_ct++; - while (! isspace( *++pz )) - if (*pz == NUL) goto found_nul; - while (isspace( *pz )) pz++; - } while (*pz != NUL); - - found_nul: - ; - } - - res = malloc( sizeof(*res) + strlen(str) + (max_token_ct * sizeof(ch_t*)) ); - if (res == NULL) { - errno = ENOMEM; - return res; - } - - /* - * Now copy each token into the output buffer. - */ - { - ch_t* pzDest = (ch_t*)(res->tkn_list + (max_token_ct + 1)); - res->tkn_ct = 0; - - do { - res->tkn_list[ res->tkn_ct++ ] = pzDest; - for (;;) { - int ch = (ch_t)*str; - if (isspace( ch )) { - found_white_space: - while (isspace( (ch_t)*++str )) ; - break; - } - - switch (ch) { - case '"': - copy_cooked( &pzDest, &str ); - if (str == NULL) { - free(res); - errno = EINVAL; - return NULL; - } - if (isspace( (ch_t)*str )) - goto found_white_space; - break; - - case '\'': - copy_raw( &pzDest, &str ); - if (str == NULL) { - free(res); - errno = EINVAL; - return NULL; - } - if (isspace( (ch_t)*str )) - goto found_white_space; - break; - - case NUL: - goto copy_done; - - default: - str++; - *(pzDest++) = ch; - } - } copy_done:; - - /* - * NUL terminate the last token and see if we have any more tokens. - */ - *(pzDest++) = NUL; - } while (*str != NUL); - - res->tkn_list[ res->tkn_ct ] = NULL; - } - - return res; -} - -#ifdef TEST -#include <stdio.h> -#include <string.h> - -int -main( int argc, char** argv ) -{ - if (argc == 1) { - printf("USAGE: %s arg [ ... ]\n", *argv); - return 1; - } - while (--argc > 0) { - char* arg = *(++argv); - token_list_t* p = ao_string_tokenize( arg ); - if (p == NULL) { - printf( "Parsing string ``%s'' failed:\n\terrno %d (%s)\n", - arg, errno, strerror( errno )); - } else { - int ix = 0; - printf( "Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct ); - do { - printf( " %3d: ``%s''\n", ix+1, p->tkn_list[ix] ); - } while (++ix < p->tkn_ct); - free(p); - } - } - return 0; -} -#endif - -/* - * Local Variables: - * mode: C - * c-file-style: "stroustrup" - * indent-tabs-mode: nil - * End: - * end of autoopts/tokenize.c */ |