Home | History | Annotate | Download | only in MagickCore
      1 /*
      2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      3 %                                                                             %
      4 %                                                                             %
      5 %                                                                             %
      6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
      7 %                      T    O   O  K  K   E      NN  N                        %
      8 %                      T    O   O  KKK    EEE    N N N                        %
      9 %                      T    O   O  K  K   E      N  NN                        %
     10 %                      T     OOO   K   K  EEEEE  N   N                        %
     11 %                                                                             %
     12 %                                                                             %
     13 %                         MagickCore Token Methods                            %
     14 %                                                                             %
     15 %                             Software Design                                 %
     16 %                                  Cristy                                     %
     17 %                              January 1993                                   %
     18 %                                                                             %
     19 %                                                                             %
     20 %  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
     21 %  dedicated to making software imaging solutions freely available.           %
     22 %                                                                             %
     23 %  You may not use this file except in compliance with the License.  You may  %
     24 %  obtain a copy of the License at                                            %
     25 %                                                                             %
     26 %    http://www.imagemagick.org/script/license.php                            %
     27 %                                                                             %
     28 %  Unless required by applicable law or agreed to in writing, software        %
     29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
     30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
     31 %  See the License for the specific language governing permissions and        %
     32 %  limitations under the License.                                             %
     33 %                                                                             %
     34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     35 %
     36 %
     37 %
     38 */
     39 
     40 /*
     42   Include declarations.
     43 */
     44 #include "MagickCore/studio.h"
     45 #include "MagickCore/exception.h"
     46 #include "MagickCore/exception-private.h"
     47 #include "MagickCore/image.h"
     48 #include "MagickCore/memory_.h"
     49 #include "MagickCore/string_.h"
     50 #include "MagickCore/string-private.h"
     51 #include "MagickCore/token.h"
     52 #include "MagickCore/token-private.h"
     53 #include "MagickCore/utility.h"
     54 #include "MagickCore/utility-private.h"
     55 
     56 /*
     58   Typedef declaractions.
     59 */
     60 struct _TokenInfo
     61 {
     62   int
     63     state;
     64 
     65   MagickStatusType
     66     flag;
     67 
     68   ssize_t
     69     offset;
     70 
     71   char
     72     quote;
     73 
     74   size_t
     75     signature;
     76 };
     77 
     78 /*
     80 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     81 %                                                                             %
     82 %                                                                             %
     83 %                                                                             %
     84 %   A c q u i r e T o k e n I n f o                                           %
     85 %                                                                             %
     86 %                                                                             %
     87 %                                                                             %
     88 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     89 %
     90 %  AcquireTokenInfo() allocates the TokenInfo structure.
     91 %
     92 %  The format of the AcquireTokenInfo method is:
     93 %
     94 %      TokenInfo *AcquireTokenInfo()
     95 %
     96 */
     97 MagickExport TokenInfo *AcquireTokenInfo(void)
     98 {
     99   TokenInfo
    100     *token_info;
    101 
    102   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
    103   if (token_info == (TokenInfo *) NULL)
    104     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
    105   token_info->signature=MagickCoreSignature;
    106   return(token_info);
    107 }
    108 
    109 /*
    111 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    112 %                                                                             %
    113 %                                                                             %
    114 %                                                                             %
    115 %   D e s t r o y T o k e n I n f o                                           %
    116 %                                                                             %
    117 %                                                                             %
    118 %                                                                             %
    119 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    120 %
    121 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
    122 %  structure.
    123 %
    124 %  The format of the DestroyTokenInfo method is:
    125 %
    126 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
    127 %
    128 %  A description of each parameter follows:
    129 %
    130 %    o token_info: Specifies a pointer to an TokenInfo structure.
    131 %
    132 */
    133 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
    134 {
    135   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
    136   assert(token_info != (TokenInfo *) NULL);
    137   assert(token_info->signature == MagickCoreSignature);
    138   token_info->signature=(~MagickCoreSignature);
    139   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
    140   return(token_info);
    141 }
    142 
    143 /*
    145 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    146 %                                                                             %
    147 %                                                                             %
    148 %                                                                             %
    149 +   G e t N e x t T o k e n                                                   %
    150 %                                                                             %
    151 %                                                                             %
    152 %                                                                             %
    153 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    154 %
    155 %  GetNextToken() gets a token from the token stream.  A token is defined as
    156 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
    157 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
    158 %  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these
    159 %  separator characters: ':', '=', ',', and ';'.
    160 %
    161 %  The format of the GetNextToken method is:
    162 %
    163 %      void GetNextToken(const char *start,const char **end,
    164 %        const size_t extent,char *token)
    165 %
    166 %  A description of each parameter follows:
    167 %
    168 %    o start: the start of the token sequence.
    169 %
    170 %    o end: point to the end of the token sequence.
    171 %
    172 %    o extent: maximum extent of the token.
    173 %
    174 %    o token: copy the token to this buffer.
    175 %
    176 */
    177 MagickExport void GetNextToken(const char *start,const char **end,
    178   const size_t extent,char *token)
    179 {
    180   double
    181     value;
    182 
    183   register const char
    184     *p;
    185 
    186   register ssize_t
    187     i;
    188 
    189   assert(start != (const char *) NULL);
    190   assert(token != (char *) NULL);
    191   i=0;
    192   p=start;
    193   while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
    194     p++;
    195   switch (*p)
    196   {
    197     case '\0':
    198       break;
    199     case '"':
    200     case '\'':
    201     case '`':
    202     case '{':
    203     {
    204       register char
    205         escape;
    206 
    207       switch (*p)
    208       {
    209         case '"': escape='"'; break;
    210         case '\'': escape='\''; break;
    211         case '`': escape='\''; break;
    212         case '{': escape='}'; break;
    213         default: escape=(*p); break;
    214       }
    215       for (p++; *p != '\0'; p++)
    216       {
    217         if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
    218           p++;
    219         else
    220           if (*p == escape)
    221             {
    222               p++;
    223               break;
    224             }
    225         if (i < (ssize_t) (extent-1))
    226           token[i++]=(*p);
    227       }
    228       break;
    229     }
    230     case '/':
    231     {
    232       if (i < (ssize_t) (extent-1))
    233         token[i++]=(*p++);
    234       if ((*p == '>') || (*p == '/'))
    235         if (i < (ssize_t) (extent-1))
    236           token[i++]=(*p++);
    237       break;
    238     }
    239     default:
    240     {
    241       char
    242         *q;
    243 
    244       value=StringToDouble(p,&q);
    245       (void) value;
    246       if ((p != q) && (*p != ','))
    247         {
    248           for ( ; (p < q) && (*p != ','); p++)
    249             if (i < (ssize_t) (extent-1))
    250               token[i++]=(*p);
    251           if (*p == '%')
    252             if (i < (ssize_t) (extent-1))
    253               token[i++]=(*p++);
    254           break;
    255         }
    256       if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
    257           (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
    258         {
    259           if (i < (ssize_t) (extent-1))
    260             token[i++]=(*p++);
    261           break;
    262         }
    263       for ( ; *p != '\0'; p++)
    264       {
    265         if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
    266             (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
    267           break;
    268         if ((i > 0) && (*p == '<'))
    269           break;
    270         if (i < (ssize_t) (extent-1))
    271           token[i++]=(*p);
    272         if (*p == '>')
    273           break;
    274         if (*p == '(')
    275           for (p++; *p != '\0'; p++)
    276           {
    277             if (i < (ssize_t) (extent-1))
    278               token[i++]=(*p);
    279             if ((*p == ')') && (*(p-1) != '\\'))
    280               break;
    281           }
    282       }
    283       break;
    284     }
    285   }
    286   token[i]='\0';
    287   if (LocaleNCompare(token,"url(",4) == 0)
    288     {
    289       ssize_t
    290         offset;
    291 
    292       offset=4;
    293       if (token[offset] == '#')
    294         offset++;
    295       i=(ssize_t) strlen(token);
    296       (void) CopyMagickString(token,token+offset,MagickPathExtent);
    297       token[i-offset-1]='\0';
    298     }
    299   while (isspace((int) ((unsigned char) *p)) != 0)
    300     p++;
    301   if (end != (const char **) NULL)
    302     *end=(const char *) p;
    303 }
    304 
    305 /*
    307 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    308 %                                                                             %
    309 %                                                                             %
    310 %                                                                             %
    311 %   G l o b E x p r e s s i o n                                               %
    312 %                                                                             %
    313 %                                                                             %
    314 %                                                                             %
    315 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    316 %
    317 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
    318 %
    319 %  The format of the GlobExpression function is:
    320 %
    321 %      MagickBooleanType GlobExpression(const char *expression,
    322 %        const char *pattern,const MagickBooleanType case_insensitive)
    323 %
    324 %  A description of each parameter follows:
    325 %
    326 %    o expression: Specifies a pointer to a text string containing a file name.
    327 %
    328 %    o pattern: Specifies a pointer to a text string containing a pattern.
    329 %
    330 %    o case_insensitive: set to MagickTrue to ignore the case when matching
    331 %      an expression.
    332 %
    333 */
    334 MagickExport MagickBooleanType GlobExpression(const char *expression,
    335   const char *pattern,const MagickBooleanType case_insensitive)
    336 {
    337   MagickBooleanType
    338     done,
    339     match;
    340 
    341   register const char
    342     *p;
    343 
    344   /*
    345     Return on empty pattern or '*'.
    346   */
    347   if (pattern == (char *) NULL)
    348     return(MagickTrue);
    349   if (GetUTFCode(pattern) == 0)
    350     return(MagickTrue);
    351   if (LocaleCompare(pattern,"*") == 0)
    352     return(MagickTrue);
    353   p=pattern+strlen(pattern)-1;
    354   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
    355     {
    356       ExceptionInfo
    357         *exception;
    358 
    359       ImageInfo
    360         *image_info;
    361 
    362       /*
    363         Determine if pattern is a scene, i.e. img0001.pcd[2].
    364       */
    365       image_info=AcquireImageInfo();
    366       (void) CopyMagickString(image_info->filename,pattern,MagickPathExtent);
    367       exception=AcquireExceptionInfo();
    368       (void) SetImageInfo(image_info,0,exception);
    369       exception=DestroyExceptionInfo(exception);
    370       if (LocaleCompare(image_info->filename,pattern) != 0)
    371         {
    372           image_info=DestroyImageInfo(image_info);
    373           return(MagickFalse);
    374         }
    375       image_info=DestroyImageInfo(image_info);
    376     }
    377   /*
    378     Evaluate glob expression.
    379   */
    380   done=MagickFalse;
    381   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
    382   {
    383     if (GetUTFCode(expression) == 0)
    384       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
    385         break;
    386     switch (GetUTFCode(pattern))
    387     {
    388       case '*':
    389       {
    390         MagickBooleanType
    391           status;
    392 
    393         status=MagickFalse;
    394         pattern+=GetUTFOctets(pattern);
    395         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
    396         {
    397           status=GlobExpression(expression,pattern,case_insensitive);
    398           expression+=GetUTFOctets(expression);
    399         }
    400         if (status != MagickFalse)
    401           {
    402             while (GetUTFCode(expression) != 0)
    403               expression+=GetUTFOctets(expression);
    404             while (GetUTFCode(pattern) != 0)
    405               pattern+=GetUTFOctets(pattern);
    406           }
    407         break;
    408       }
    409       case '[':
    410       {
    411         int
    412           c;
    413 
    414         pattern+=GetUTFOctets(pattern);
    415         for ( ; ; )
    416         {
    417           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
    418             {
    419               done=MagickTrue;
    420               break;
    421             }
    422           if (GetUTFCode(pattern) == '\\')
    423             {
    424               pattern+=GetUTFOctets(pattern);
    425               if (GetUTFCode(pattern) == 0)
    426                 {
    427                   done=MagickTrue;
    428                   break;
    429                 }
    430              }
    431           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
    432             {
    433               c=GetUTFCode(pattern);
    434               pattern+=GetUTFOctets(pattern);
    435               pattern+=GetUTFOctets(pattern);
    436               if (GetUTFCode(pattern) == ']')
    437                 {
    438                   done=MagickTrue;
    439                   break;
    440                 }
    441               if (GetUTFCode(pattern) == '\\')
    442                 {
    443                   pattern+=GetUTFOctets(pattern);
    444                   if (GetUTFCode(pattern) == 0)
    445                     {
    446                       done=MagickTrue;
    447                       break;
    448                     }
    449                 }
    450               if ((GetUTFCode(expression) < c) ||
    451                   (GetUTFCode(expression) > GetUTFCode(pattern)))
    452                 {
    453                   pattern+=GetUTFOctets(pattern);
    454                   continue;
    455                 }
    456             }
    457           else
    458             if (GetUTFCode(pattern) != GetUTFCode(expression))
    459               {
    460                 pattern+=GetUTFOctets(pattern);
    461                 continue;
    462               }
    463           pattern+=GetUTFOctets(pattern);
    464           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
    465           {
    466             if ((GetUTFCode(pattern) == '\\') &&
    467                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
    468               pattern+=GetUTFOctets(pattern);
    469             pattern+=GetUTFOctets(pattern);
    470           }
    471           if (GetUTFCode(pattern) != 0)
    472             {
    473               pattern+=GetUTFOctets(pattern);
    474               expression+=GetUTFOctets(expression);
    475             }
    476           break;
    477         }
    478         break;
    479       }
    480       case '?':
    481       {
    482         pattern+=GetUTFOctets(pattern);
    483         expression+=GetUTFOctets(expression);
    484         break;
    485       }
    486       case '{':
    487       {
    488         pattern+=GetUTFOctets(pattern);
    489         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
    490         {
    491           p=expression;
    492           match=MagickTrue;
    493           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
    494                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
    495                  (match != MagickFalse))
    496           {
    497             if (GetUTFCode(pattern) == '\\')
    498               pattern+=GetUTFOctets(pattern);
    499             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
    500               MagickFalse;
    501             p+=GetUTFOctets(p);
    502             pattern+=GetUTFOctets(pattern);
    503           }
    504           if (GetUTFCode(pattern) == 0)
    505             {
    506               match=MagickFalse;
    507               done=MagickTrue;
    508               break;
    509             }
    510           else
    511             if (match != MagickFalse)
    512               {
    513                 expression=p;
    514                 while ((GetUTFCode(pattern) != '}') &&
    515                        (GetUTFCode(pattern) != 0))
    516                 {
    517                   pattern+=GetUTFOctets(pattern);
    518                   if (GetUTFCode(pattern) == '\\')
    519                     {
    520                       pattern+=GetUTFOctets(pattern);
    521                       if (GetUTFCode(pattern) == '}')
    522                         pattern+=GetUTFOctets(pattern);
    523                     }
    524                 }
    525               }
    526             else
    527               {
    528                 while ((GetUTFCode(pattern) != '}') &&
    529                        (GetUTFCode(pattern) != ',') &&
    530                        (GetUTFCode(pattern) != 0))
    531                 {
    532                   pattern+=GetUTFOctets(pattern);
    533                   if (GetUTFCode(pattern) == '\\')
    534                     {
    535                       pattern+=GetUTFOctets(pattern);
    536                       if ((GetUTFCode(pattern) == '}') ||
    537                           (GetUTFCode(pattern) == ','))
    538                         pattern+=GetUTFOctets(pattern);
    539                     }
    540                 }
    541               }
    542             if (GetUTFCode(pattern) != 0)
    543               pattern+=GetUTFOctets(pattern);
    544           }
    545         break;
    546       }
    547       case '\\':
    548       {
    549         pattern+=GetUTFOctets(pattern);
    550         if (GetUTFCode(pattern) == 0)
    551           break;
    552       }
    553       default:
    554       {
    555         if (case_insensitive != MagickFalse)
    556           {
    557             if (tolower((int) GetUTFCode(expression)) !=
    558                 tolower((int) GetUTFCode(pattern)))
    559               {
    560                 done=MagickTrue;
    561                 break;
    562               }
    563           }
    564         else
    565           if (GetUTFCode(expression) != GetUTFCode(pattern))
    566             {
    567               done=MagickTrue;
    568               break;
    569             }
    570         expression+=GetUTFOctets(expression);
    571         pattern+=GetUTFOctets(pattern);
    572       }
    573     }
    574   }
    575   while (GetUTFCode(pattern) == '*')
    576     pattern+=GetUTFOctets(pattern);
    577   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
    578     MagickTrue : MagickFalse;
    579   return(match);
    580 }
    581 
    582 /*
    584 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    585 %                                                                             %
    586 %                                                                             %
    587 %                                                                             %
    588 +     I s G l o b                                                             %
    589 %                                                                             %
    590 %                                                                             %
    591 %                                                                             %
    592 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    593 %
    594 %  IsGlob() returns MagickTrue if the path specification contains a globbing
    595 %  pattern.
    596 %
    597 %  The format of the IsGlob method is:
    598 %
    599 %      MagickBooleanType IsGlob(const char *geometry)
    600 %
    601 %  A description of each parameter follows:
    602 %
    603 %    o path: the path.
    604 %
    605 */
    606 MagickPrivate MagickBooleanType IsGlob(const char *path)
    607 {
    608   MagickBooleanType
    609     status = MagickFalse;
    610 
    611   register const char
    612     *p;
    613 
    614   if (IsPathAccessible(path) != MagickFalse)
    615     return(MagickFalse);
    616   for (p=path; *p != '\0'; p++)
    617   {
    618     switch (*p)
    619     {
    620       case '*':
    621       case '?':
    622       case '{':
    623       case '}':
    624       case '[':
    625       case ']':
    626       {
    627         status=MagickTrue;
    628         break;
    629       }
    630       default:
    631         break;
    632     }
    633   }
    634   return(status);
    635 }
    636 
    637 /*
    639 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    640 %                                                                             %
    641 %                                                                             %
    642 %                                                                             %
    643 %   T o k e n i z e r                                                         %
    644 %                                                                             %
    645 %                                                                             %
    646 %                                                                             %
    647 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    648 %
    649 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
    650 %  one at a time from a string of characters.  The characters used for white
    651 %  space, for break characters, and for quotes can be specified.  Also,
    652 %  characters in the string can be preceded by a specifiable escape character
    653 %  which removes any special meaning the character may have.
    654 %
    655 %  Here is some terminology:
    656 %
    657 %    o token: A single unit of information in the form of a group of
    658 %      characters.
    659 %
    660 %    o white space: Apace that gets ignored (except within quotes or when
    661 %      escaped), like blanks and tabs. in addition, white space terminates a
    662 %      non-quoted token.
    663 %
    664 %    o break set: One or more characters that separates non-quoted tokens.
    665 %      Commas are a common break character. The usage of break characters to
    666 %      signal the end of a token is the same as that of white space, except
    667 %      multiple break characters with nothing or only white space between
    668 %      generate a null token for each two break characters together.
    669 %
    670 %      For example, if blank is set to be the white space and comma is set to
    671 %      be the break character, the line
    672 %
    673 %        A, B, C ,  , DEF
    674 %
    675 %        ... consists of 5 tokens:
    676 %
    677 %        1)  "A"
    678 %        2)  "B"
    679 %        3)  "C"
    680 %        4)  "" (the null string)
    681 %        5)  "DEF"
    682 %
    683 %    o Quote character: A character that, when surrounding a group of other
    684 %      characters, causes the group of characters to be treated as a single
    685 %      token, no matter how many white spaces or break characters exist in
    686 %      the group. Also, a token always terminates after the closing quote.
    687 %      For example, if ' is the quote character, blank is white space, and
    688 %      comma is the break character, the following string
    689 %
    690 %        A, ' B, CD'EF GHI
    691 %
    692 %        ... consists of 4 tokens:
    693 %
    694 %        1)  "A"
    695 %        2)  " B, CD" (note the blanks & comma)
    696 %        3)  "EF"
    697 %        4)  "GHI"
    698 %
    699 %      The quote characters themselves do not appear in the resultant
    700 %      tokens.  The double quotes are delimiters i use here for
    701 %      documentation purposes only.
    702 %
    703 %    o Escape character: A character which itself is ignored but which
    704 %      causes the next character to be used as is.  ^ and \ are often used
    705 %      as escape characters. An escape in the last position of the string
    706 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
    707 %      and non-escape) character. For example, assume white space, break
    708 %      character, and quote are the same as in the above examples, and
    709 %      further, assume that ^ is the escape character. Then, in the string
    710 %
    711 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
    712 %
    713 %        ... there are 7 tokens:
    714 %
    715 %        1)  "ABC"
    716 %        2)  " DEF ' GH"
    717 %        3)  "I"
    718 %        4)  " "     (a lone blank)
    719 %        5)  "J"
    720 %        6)  "K L"
    721 %        7)  "^"     (passed as is at end of line)
    722 %
    723 %  The format of the Tokenizer method is:
    724 %
    725 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
    726 %        const size_t max_token_length,const char *line,const char *white,
    727 %        const char *break_set,const char *quote,const char escape,
    728 %        char *breaker,int *next,char *quoted)
    729 %
    730 %  A description of each parameter follows:
    731 %
    732 %    o flag: right now, only the low order 3 bits are used.
    733 %
    734 %        1 => convert non-quoted tokens to upper case
    735 %        2 => convert non-quoted tokens to lower case
    736 %        0 => do not convert non-quoted tokens
    737 %
    738 %    o token: a character string containing the returned next token
    739 %
    740 %    o max_token_length: the maximum size of "token".  Characters beyond
    741 %      "max_token_length" are truncated.
    742 %
    743 %    o string: the string to be parsed.
    744 %
    745 %    o white: a string of the valid white spaces.  example:
    746 %
    747 %        char whitesp[]={" \t"};
    748 %
    749 %      blank and tab will be valid white space.
    750 %
    751 %    o break: a string of the valid break characters. example:
    752 %
    753 %        char breakch[]={";,"};
    754 %
    755 %      semicolon and comma will be valid break characters.
    756 %
    757 %    o quote: a string of the valid quote characters. An example would be
    758 %
    759 %        char whitesp[]={"'\"");
    760 %
    761 %      (this causes single and double quotes to be valid) Note that a
    762 %      token starting with one of these characters needs the same quote
    763 %      character to terminate it.
    764 %
    765 %      for example:
    766 %
    767 %        "ABC '
    768 %
    769 %      is unterminated, but
    770 %
    771 %        "DEF" and 'GHI'
    772 %
    773 %      are properly terminated.  Note that different quote characters
    774 %      can appear on the same line; only for a given token do the quote
    775 %      characters have to be the same.
    776 %
    777 %    o escape: the escape character (NOT a string ... only one
    778 %      allowed). Use zero if none is desired.
    779 %
    780 %    o breaker: the break character used to terminate the current
    781 %      token.  If the token was quoted, this will be the quote used.  If
    782 %      the token is the last one on the line, this will be zero.
    783 %
    784 %    o next: this variable points to the first character of the
    785 %      next token.  it gets reset by "tokenizer" as it steps through the
    786 %      string.  Set it to 0 upon initialization, and leave it alone
    787 %      after that.  You can change it if you want to jump around in the
    788 %      string or re-parse from the beginning, but be careful.
    789 %
    790 %    o quoted: set to True if the token was quoted and MagickFalse
    791 %      if not.  You may need this information (for example:  in C, a
    792 %      string with quotes around it is a character string, while one
    793 %      without is an identifier).
    794 %
    795 %    o result: 0 if we haven't reached EOS (end of string), and 1
    796 %      if we have.
    797 %
    798 */
    799 
    800 #define IN_WHITE 0
    801 #define IN_TOKEN 1
    802 #define IN_QUOTE 2
    803 #define IN_OZONE 3
    804 
    805 static ssize_t sindex(int c,const char *string)
    806 {
    807   register const char
    808     *p;
    809 
    810   for (p=string; *p != '\0'; p++)
    811     if (c == (int) (*p))
    812       return((ssize_t) (p-string));
    813   return(-1);
    814 }
    815 
    816 static void StoreToken(TokenInfo *token_info,char *string,
    817   size_t max_token_length,int c)
    818 {
    819   register ssize_t
    820     i;
    821 
    822   if ((token_info->offset < 0) ||
    823       ((size_t) token_info->offset >= (max_token_length-1)))
    824     return;
    825   i=token_info->offset++;
    826   string[i]=(char) c;
    827   if (token_info->state == IN_QUOTE)
    828     return;
    829   switch (token_info->flag & 0x03)
    830   {
    831     case 1:
    832     {
    833       string[i]=(char) toupper(c);
    834       break;
    835     }
    836     case 2:
    837     {
    838       string[i]=(char) tolower(c);
    839       break;
    840     }
    841     default:
    842       break;
    843   }
    844 }
    845 
    846 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
    847   char *token,const size_t max_token_length,const char *line,const char *white,
    848   const char *break_set,const char *quote,const char escape,char *breaker,
    849   int *next,char *quoted)
    850 {
    851   int
    852     c;
    853 
    854   register ssize_t
    855     i;
    856 
    857   *breaker='\0';
    858   *quoted='\0';
    859   if (line[*next] == '\0')
    860     return(1);
    861   token_info->state=IN_WHITE;
    862   token_info->quote=(char) MagickFalse;
    863   token_info->flag=flag;
    864   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
    865   {
    866     c=(int) line[*next];
    867     i=sindex(c,break_set);
    868     if (i >= 0)
    869       {
    870         switch (token_info->state)
    871         {
    872           case IN_WHITE:
    873           case IN_TOKEN:
    874           case IN_OZONE:
    875           {
    876             (*next)++;
    877             *breaker=break_set[i];
    878             token[token_info->offset]='\0';
    879             return(0);
    880           }
    881           case IN_QUOTE:
    882           {
    883             StoreToken(token_info,token,max_token_length,c);
    884             break;
    885           }
    886         }
    887         continue;
    888       }
    889     i=sindex(c,quote);
    890     if (i >= 0)
    891       {
    892         switch (token_info->state)
    893         {
    894           case IN_WHITE:
    895           {
    896             token_info->state=IN_QUOTE;
    897             token_info->quote=quote[i];
    898             *quoted=(char) MagickTrue;
    899             break;
    900           }
    901           case IN_QUOTE:
    902           {
    903             if (quote[i] != token_info->quote)
    904               StoreToken(token_info,token,max_token_length,c);
    905             else
    906               {
    907                 token_info->state=IN_OZONE;
    908                 token_info->quote='\0';
    909               }
    910             break;
    911           }
    912           case IN_TOKEN:
    913           case IN_OZONE:
    914           {
    915             *breaker=(char) c;
    916             token[token_info->offset]='\0';
    917             return(0);
    918           }
    919         }
    920         continue;
    921       }
    922     i=sindex(c,white);
    923     if (i >= 0)
    924       {
    925         switch (token_info->state)
    926         {
    927           case IN_WHITE:
    928           case IN_OZONE:
    929             break;
    930           case IN_TOKEN:
    931           {
    932             token_info->state=IN_OZONE;
    933             break;
    934           }
    935           case IN_QUOTE:
    936           {
    937             StoreToken(token_info,token,max_token_length,c);
    938             break;
    939           }
    940         }
    941         continue;
    942       }
    943     if (c == (int) escape)
    944       {
    945         if (line[(*next)+1] == '\0')
    946           {
    947             *breaker='\0';
    948             StoreToken(token_info,token,max_token_length,c);
    949             (*next)++;
    950             token[token_info->offset]='\0';
    951             return(0);
    952           }
    953         switch (token_info->state)
    954         {
    955           case IN_WHITE:
    956           {
    957             (*next)--;
    958             token_info->state=IN_TOKEN;
    959             break;
    960           }
    961           case IN_TOKEN:
    962           case IN_QUOTE:
    963           {
    964             (*next)++;
    965             c=(int) line[*next];
    966             StoreToken(token_info,token,max_token_length,c);
    967             break;
    968           }
    969           case IN_OZONE:
    970           {
    971             token[token_info->offset]='\0';
    972             return(0);
    973           }
    974         }
    975         continue;
    976       }
    977     switch (token_info->state)
    978     {
    979       case IN_WHITE:
    980       {
    981         token_info->state=IN_TOKEN;
    982         StoreToken(token_info,token,max_token_length,c);
    983         break;
    984       }
    985       case IN_TOKEN:
    986       case IN_QUOTE:
    987       {
    988         StoreToken(token_info,token,max_token_length,c);
    989         break;
    990       }
    991       case IN_OZONE:
    992       {
    993         token[token_info->offset]='\0';
    994         return(0);
    995       }
    996     }
    997   }
    998   token[token_info->offset]='\0';
    999   return(0);
   1000 }
   1001