Home | History | Annotate | Download | only in MagickWand
      1 /*
      2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      3 %                                                                             %
      4 %                                                                             %
      5 %    SSS    CCC  RRRR   III  PPPP  TTTTT    TTTTT  OOO   K  K  EEEE  N   N    %
      6 %   S      C     R   R   I   P   P   T        T   O   O  K K   E     NN  N    %
      7 %    SSS   C     RRRR    I   PPPP    T        T   O   O  KK    EEE   N N N    %
      8 %       S  C     R R     I   P       T        T   O   O  K K   E     N  NN    %
      9 %   SSSS    CCC  R  RR  III  P       T        T    OOO   K  K  EEEE  N   N    %
     10 %                                                                             %
     11 %                    Tokenize Magick Script into Options                      %
     12 %                                                                             %
     13 %                             Dragon Computing                                %
     14 %                             Anthony Thyssen                                 %
     15 %                               January 2012                                  %
     16 %                                                                             %
     17 %                                                                             %
     18 %  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
     19 %  dedicated to making software imaging solutions freely available.           %
     20 %                                                                             %
     21 %  You may not use this file except in compliance with the License.  You may  %
     22 %  obtain a copy of the License at                                            %
     23 %                                                                             %
     24 %    http://www.imagemagick.org/script/license.php                            %
     25 %                                                                             %
     26 %  Unless required by applicable law or agreed to in writing, software        %
     27 %  distributed under the License is distributed on an "AS IS" BASIS,          %
     28 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
     29 %  See the License for the specific language governing permissions and        %
     30 %  limitations under the License.                                             %
     31 %                                                                             %
     32 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     33 %
     34 %  Read a stream of characters and return tokens one at a time.
     35 %
     36 %  The input stream is dived into individual 'tokens' (representing 'words' or
     37 %  'options'), in a way that is as close to a UNIX shell, as is feasable.
     38 %  Only shell variable, and command substitutions will not be performed.
     39 %  Tokens can be any length.
     40 %
     41 %  The main function call is GetScriptToken() (see below) whcih returns one
     42 %  and only one token at a time.  The other functions provide support to this
     43 %  function, opening scripts, and seting up the required structures.
     44 %
     45 %  More specifically...
     46 %
     47 %  Tokens are white space separated, and may be quoted, or even partially
     48 %  quoted by either single or double quotes, or the use of backslashes,
     49 %  or any mix of the three.
     50 %
     51 %  For example:    This\ is' a 'single" token"
     52 %
     53 %  A token is returned immediatally the end of token is found. That is as soon
     54 %  as a unquoted white-space or EOF condition has been found.  That is to say
     55 %  the file stream is parsed purely character-by-character, regardless any
     56 %  buffering constraints set by the system.  It is not parsed line-by-line.
     57 %
     58 %  The function will return 'MagickTrue' if a valid token was found, while
     59 %  the token status will be set accordingally to 'OK' or 'EOF', according to
     60 %  the cause of the end of token.  The token may be an empty string if the
     61 %  input was a quoted empty string.  Other error conditions return a value of
     62 %  MagickFalse, indicating any token found but was incomplete due to some
     63 %  error condition.
     64 %
     65 %  Single quotes will preserve all characters including backslashes. Double
     66 %  quotes will also preserve backslashes unless escaping a double quote,
     67 %  or another backslashes.  Other shell meta-characters are not treated as
     68 %  special by this tokenizer.
     69 %
     70 %  For example Quoting the quote chars:
     71 %              \'  "'"       \"  '"'  "\""      \\  '\'  "\\"
     72 %
     73 %  Outside quotes, backslash characters will make spaces, tabs and quotes part
     74 %  of a token returned. However a backslash at the end of a line (and outside
     75 %  quotes) will cause the newline to be completely ignored (as per the shell
     76 %  line continuation).
     77 %
     78 %  Comments start with a '#' character at the start of a new token, will be
     79 %  completely ignored upto the end of line, regardless of any backslash at the
     80 %  end of the line.  You can escape a comment '#', using quotes or backlsashes
     81 %  just as you can in a shell.
     82 %
     83 %  The parser will accept both newlines, returns, or return-newlines to mark
     84 %  the EOL. Though this is technically breaking (or perhaps adding to) the
     85 %  'BASH' syntax that is being followed.
     86 %
     87 %
     88 %  UNIX script Launcher...
     89 %
     90 %  The use of '#' comments allow normal UNIX 'scripting' to be used to call on
     91 %  the "magick" command to parse the tokens from a file
     92 %
     93 %    #!/path/to/command/magick -script
     94 %
     95 %
     96 %  UNIX 'env' command launcher...
     97 %
     98 %  If "magick" is renamed "magick-script" you can use a 'env' UNIX launcher
     99 %
    100 %    #!/usr/bin/env magick-script
    101 %
    102 %
    103 %  Shell script launcher...
    104 %
    105 %  As a special case a ':' at the start of a line is also treated as a comment
    106 %  This allows a magick script to ignore a line that can be parsed by the shell
    107 %  and not by the magick script (tokenizer).  This allows for an alternative
    108 %  script 'launcher' to be used for magick scripts.
    109 %
    110 %    #!/bin/sh
    111 %    :; exec magick -script "$0" "$@"; exit 10
    112 %    #
    113 %    # The rest of the file is magick script
    114 %    -read label:"This is a Magick Script!"
    115 %    -write show: -exit
    116 %
    117 % Or with some shell pre/post processing...
    118 %
    119 %    #!/bin/sh
    120 %    :; echo "This part is run in the shell, but ignored by Magick"
    121 %    :; magick -script "$0" "$@"
    122 %    :; echo "This is run after the "magick" script is finished!"
    123 %    :; exit 10
    124 %    #
    125 %    # The rest of the file is magick script
    126 %    -read label:"This is a Magick Script!"
    127 %    -write show: -exit
    128 %
    129 %
    130 %  DOS script launcher...
    131 %
    132 %  Similarly any '@' at the start of the line (outside of quotes) will also be
    133 %  treated as comment. This allow you to create a DOS script launcher, to
    134 %  allow a ".bat" DOS scripts to run as "magick" scripts instead.
    135 %
    136 %    @echo This line is DOS executed but ignored by Magick
    137 %    @magick -script %~dpnx0 %*
    138 %    @echo This line is processed after the Magick script is finished
    139 %    @GOTO :EOF
    140 %    #
    141 %    # The rest of the file is magick script
    142 %    -read label:"This is a Magick Script!"
    143 %    -write show: -exit
    144 %
    145 % But this can also be used as a shell script launcher as well!
    146 % Though is more restrictive and less free-form than using ':'.
    147 %
    148 %    #!/bin/sh
    149 %    @() { exec magick -script "$@"; }
    150 %    @ "$0" "$@"; exit
    151 %    #
    152 %    # The rest of the file is magick script
    153 %    -read label:"This is a Magick Script!"
    154 %    -write show: -exit
    155 %
    156 % Or even like this...
    157 %
    158 %    #!/bin/sh
    159 %    @() { }
    160 %    @; exec magick -script "$0" "$@"; exit
    161 %    #
    162 %    # The rest of the file is magick script
    163 %    -read label:"This is a Magick Script!"
    164 %    -write show: -exit
    165 %
    166 */
    167 
    168 /*
    170   Include declarations.
    171 
    172   NOTE: Do not include if being compiled into the "test/script-token-test.c"
    173   module, for low level token testing.
    174 */
    175 #ifndef SCRIPT_TOKEN_TESTING
    176 #  include "MagickWand/studio.h"
    177 #  include "MagickWand/MagickWand.h"
    178 #  include "MagickWand/script-token.h"
    179 #  include "MagickCore/string-private.h"
    180 #  include "MagickCore/utility-private.h"
    181 #endif
    182 
    183 /*
    185 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    186 %                                                                             %
    187 %                                                                             %
    188 %                                                                             %
    189 %   A c q u i r e S c r i p t T o k e n I n f o                               %
    190 %                                                                             %
    191 %                                                                             %
    192 %                                                                             %
    193 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    194 %
    195 %  AcquireScriptTokenInfo() allocated, initializes and opens the given
    196 %  file stream from which tokens are to be extracted.
    197 %
    198 %  The format of the AcquireScriptTokenInfo method is:
    199 %
    200 %     ScriptTokenInfo *AcquireScriptTokenInfo(char *filename)
    201 %
    202 %  A description of each parameter follows:
    203 %
    204 %    o filename   the filename to open  ("-" means stdin)
    205 %
    206 */
    207 WandExport ScriptTokenInfo *AcquireScriptTokenInfo(const char *filename)
    208 {
    209   ScriptTokenInfo
    210     *token_info;
    211 
    212   token_info=(ScriptTokenInfo *) AcquireMagickMemory(sizeof(*token_info));
    213   if (token_info == (ScriptTokenInfo *) NULL)
    214     return token_info;
    215   (void) ResetMagickMemory(token_info,0,sizeof(*token_info));
    216 
    217   token_info->opened=MagickFalse;
    218   if ( LocaleCompare(filename,"-") == 0 ) {
    219     token_info->stream=stdin;
    220     token_info->opened=MagickFalse;
    221   }
    222   else if ( LocaleNCompare(filename,"fd:",3) == 0 ) {
    223     token_info->stream=fdopen(StringToLong(filename+3),"r");
    224     token_info->opened=MagickFalse;
    225   }
    226   else {
    227     token_info->stream=fopen_utf8(filename, "r");
    228   }
    229   if ( token_info->stream == (FILE *) NULL ) {
    230     token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
    231     return(token_info);
    232   }
    233 
    234   token_info->curr_line=1;
    235   token_info->length=INITAL_TOKEN_LENGTH;
    236   token_info->token=(char *) AcquireMagickMemory(token_info->length);
    237 
    238   token_info->status=(token_info->token != (char *) NULL)
    239                       ? TokenStatusOK : TokenStatusMemoryFailed;
    240   token_info->signature=MagickWandSignature;
    241 
    242   return token_info;
    243 }
    244 
    245 /*
    247 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    248 %                                                                             %
    249 %                                                                             %
    250 %                                                                             %
    251 %   D e s t r o y S c r i p t T o k e n I n f o                               %
    252 %                                                                             %
    253 %                                                                             %
    254 %                                                                             %
    255 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    256 %
    257 %  DestroyScriptTokenInfo() allocated, initializes and opens the given
    258 %  file stream from which tokens are to be extracted.
    259 %
    260 %  The format of the DestroyScriptTokenInfo method is:
    261 %
    262 %     ScriptTokenInfo *DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
    263 %
    264 %  A description of each parameter follows:
    265 %
    266 %    o token_info   The ScriptTokenInfo structure to be destroyed
    267 %
    268 */
    269 WandExport ScriptTokenInfo * DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
    270 {
    271   assert(token_info != (ScriptTokenInfo *) NULL);
    272   assert(token_info->signature == MagickWandSignature);
    273 
    274   if ( token_info->opened != MagickFalse )
    275     fclose(token_info->stream);
    276 
    277   if (token_info->token != (char *) NULL )
    278     token_info->token=(char *) RelinquishMagickMemory(token_info->token);
    279   token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
    280   return(token_info);
    281 }
    282 
    283 /*
    285 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    286 %                                                                             %
    287 %                                                                             %
    288 %                                                                             %
    289 %   G e t S c r i p t T o k e n                                               %
    290 %                                                                             %
    291 %                                                                             %
    292 %                                                                             %
    293 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    294 %
    295 %  GetScriptToken() a fairly general, finite state token parser. That returns
    296 %  tokens one at a time, as soon as posible.
    297 %
    298 %
    299 %  The format of the GetScriptToken method is:
    300 %
    301 %     MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
    302 %
    303 %  A description of each parameter follows:
    304 %
    305 %    o token_info    pointer to a structure holding token details
    306 %
    307 */
    308 /* States of the parser */
    309 #define IN_WHITE 0
    310 #define IN_TOKEN 1
    311 #define IN_QUOTE 2
    312 #define IN_COMMENT 3
    313 
    314 /* Macro to read character from stream
    315 
    316    This also keeps track of the line and column counts.
    317    The EOL is defined as either '\r\n', or '\r', or '\n'.
    318    A '\r' on its own is converted into a '\n' to correctly handle
    319    raw input, typically due to 'copy-n-paste' of text files.
    320    But a '\r\n' sequence is left ASIS for string handling
    321 */
    322 #define GetChar(c) \
    323 { \
    324   c=fgetc(token_info->stream); \
    325   token_info->curr_column++; \
    326   if ( c == '\r' ) { \
    327     c=fgetc(token_info->stream); \
    328     ungetc(c,token_info->stream); \
    329     c = (c!='\n')?'\n':'\r'; \
    330   } \
    331   if ( c == '\n' ) \
    332     token_info->curr_line++, token_info->curr_column=0; \
    333   if (c == EOF ) \
    334     break; \
    335   if ( (c>='\0' && c<'\a') || (c>'\r' && c<' ' && c!='\033') ) { \
    336     token_info->status=TokenStatusBinary; \
    337     break; \
    338   } \
    339 }
    340 /* macro to collect the token characters */
    341 #define SaveChar(c) \
    342 { \
    343   if ((size_t) offset >= (token_info->length-1)) { \
    344     if ( token_info->length >= MagickPathExtent ) \
    345       token_info->length += MagickPathExtent; \
    346     else \
    347       token_info->length *= 4; \
    348     token_info->token = (char *) \
    349          ResizeMagickMemory(token_info->token, token_info->length); \
    350     if ( token_info->token == (char *) NULL ) { \
    351       token_info->status=TokenStatusMemoryFailed; \
    352       break; \
    353     } \
    354   } \
    355   token_info->token[offset++]=(char) (c); \
    356 }
    357 
    358 WandExport MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
    359 {
    360   int
    361     quote,
    362     c;
    363 
    364   int
    365     state;
    366 
    367   ssize_t
    368     offset;
    369 
    370   /* EOF - no more tokens! */
    371   if (token_info == (ScriptTokenInfo *) NULL)
    372     return(MagickFalse);
    373   if (token_info->status != TokenStatusOK)
    374     {
    375       token_info->token[0]='\0';
    376       return(MagickFalse);
    377     }
    378   state=IN_WHITE;
    379   quote='\0';
    380   offset=0;
    381 DisableMSCWarning(4127)
    382   while(1)
    383 RestoreMSCWarning
    384   {
    385     /* get character */
    386     GetChar(c);
    387 
    388     /* hash comment handling */
    389     if ( state == IN_COMMENT ) {
    390       if ( c == '\n' )
    391         state=IN_WHITE;
    392       continue;
    393     }
    394     /* comment lines start with '#' anywhere, or ':' or '@' at start of line */
    395     if ( state == IN_WHITE )
    396       if ( ( c == '#' ) ||
    397            ( token_info->curr_column==1 && (c == ':' || c == '@' ) ) )
    398         state=IN_COMMENT;
    399     /* whitespace token separator character */
    400     if (strchr(" \n\r\t",c) != (char *) NULL) {
    401       switch (state) {
    402         case IN_TOKEN:
    403           token_info->token[offset]='\0';
    404           return(MagickTrue);
    405         case IN_QUOTE:
    406           SaveChar(c);
    407           break;
    408       }
    409       continue;
    410     }
    411     /* quote character */
    412     if ( c=='\'' || c =='"' ) {
    413       switch (state) {
    414         case IN_WHITE:
    415           token_info->token_line=token_info->curr_line;
    416           token_info->token_column=token_info->curr_column;
    417         case IN_TOKEN:
    418           state=IN_QUOTE;
    419           quote=c;
    420           break;
    421         case IN_QUOTE:
    422           if (c == quote)
    423             {
    424               state=IN_TOKEN;
    425               quote='\0';
    426             }
    427           else
    428             SaveChar(c);
    429           break;
    430       }
    431       continue;
    432     }
    433     /* escape char (preserve in quotes - unless escaping the same quote) */
    434     if (c == '\\')
    435       {
    436         if ( state==IN_QUOTE && quote == '\'' ) {
    437             SaveChar('\\');
    438             continue;
    439           }
    440         GetChar(c);
    441         if (c == '\n')
    442           switch (state) {
    443             case IN_COMMENT:
    444               state=IN_WHITE;  /* end comment */
    445             case IN_QUOTE:
    446               if (quote != '"')
    447                 break;         /* in double quotes only */
    448             case IN_WHITE:
    449             case IN_TOKEN:
    450               continue;        /* line continuation - remove line feed */
    451           }
    452         switch (state) {
    453           case IN_WHITE:
    454             token_info->token_line=token_info->curr_line;
    455             token_info->token_column=token_info->curr_column;
    456             state=IN_TOKEN;
    457             break;
    458           case IN_QUOTE:
    459             if (c != quote && c != '\\')
    460               SaveChar('\\');
    461             break;
    462         }
    463         SaveChar(c);
    464         continue;
    465       }
    466     /* ordinary character */
    467     switch (state) {
    468       case IN_WHITE:
    469         token_info->token_line=token_info->curr_line;
    470         token_info->token_column=token_info->curr_column;
    471         state=IN_TOKEN;
    472       case IN_TOKEN:
    473       case IN_QUOTE:
    474         SaveChar(c);
    475         break;
    476       case IN_COMMENT:
    477         break;
    478     }
    479   }
    480   /* input stream has EOF or produced a fatal error */
    481   token_info->token[offset]='\0';
    482   if ( token_info->status != TokenStatusOK )
    483     return(MagickFalse);  /* fatal condition - no valid token */
    484   token_info->status = TokenStatusEOF;
    485   if ( state == IN_QUOTE)
    486     token_info->status = TokenStatusBadQuotes;
    487   if ( state == IN_TOKEN)
    488     return(MagickTrue);   /* token with EOF at end - no problem */
    489   return(MagickFalse);    /* in white space or in quotes - invalid token */
    490 }
    491