Home | History | Annotate | Download | only in MagickWand
      1 /*
      2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      3 %                                                                             %
      4 %                                                                             %
      5 %    SSS    CCC  RRRR   III  PPPP  TTTTT    TTTTT  OOO   K  K  EEEE  N   N    %
      6 %   S      C     R   R   I   P   P   T        T   O   O  K K   E     NN  N    %
      7 %    SSS   C     RRRR    I   PPPP    T        T   O   O  KK    EEE   N N N    %
      8 %       S  C     R R     I   P       T        T   O   O  K K   E     N  NN    %
      9 %   SSSS    CCC  R  RR  III  P       T        T    OOO   K  K  EEEE  N   N    %
     10 %                                                                             %
     11 %                    Tokenize Magick Script into Options                      %
     12 %                                                                             %
     13 %                             Dragon Computing                                %
     14 %                             Anthony Thyssen                                 %
     15 %                               January 2012                                  %
     16 %                                                                             %
     17 %                                                                             %
     18 %  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
     19 %  dedicated to making software imaging solutions freely available.           %
     20 %                                                                             %
     21 %  You may not use this file except in compliance with the License.  You may  %
     22 %  obtain a copy of the License at                                            %
     23 %                                                                             %
     24 %    http://www.imagemagick.org/script/license.php                            %
     25 %                                                                             %
     26 %  Unless required by applicable law or agreed to in writing, software        %
     27 %  distributed under the License is distributed on an "AS IS" BASIS,          %
     28 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
     29 %  See the License for the specific language governing permissions and        %
     30 %  limitations under the License.                                             %
     31 %                                                                             %
     32 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     33 %
     34 %  Read a stream of characters and return tokens one at a time.
     35 %
     36 %  The input stream is dived into individual 'tokens' (representing 'words' or
     37 %  'options'), in a way that is as close to a UNIX shell, as is feasable.
     38 %  Only shell variable, and command substitutions will not be performed.
     39 %  Tokens can be any length.
     40 %
     41 %  The main function call is GetScriptToken() (see below) whcih returns one
     42 %  and only one token at a time.  The other functions provide support to this
     43 %  function, opening scripts, and seting up the required structures.
     44 %
     45 %  More specifically...
     46 %
     47 %  Tokens are white space separated, and may be quoted, or even partially
     48 %  quoted by either single or double quotes, or the use of backslashes,
     49 %  or any mix of the three.
     50 %
     51 %  For example:    This\ is' a 'single" token"
     52 %
     53 %  A token is returned immediatally the end of token is found. That is as soon
     54 %  as a unquoted white-space or EOF condition has been found.  That is to say
     55 %  the file stream is parsed purely character-by-character, regardless any
     56 %  buffering constraints set by the system.  It is not parsed line-by-line.
     57 %
     58 %  The function will return 'MagickTrue' if a valid token was found, while
     59 %  the token status will be set accordingally to 'OK' or 'EOF', according to
     60 %  the cause of the end of token.  The token may be an empty string if the
     61 %  input was a quoted empty string.  Other error conditions return a value of
     62 %  MagickFalse, indicating any token found but was incomplete due to some
     63 %  error condition.
     64 %
     65 %  Single quotes will preserve all characters including backslashes. Double
     66 %  quotes will also preserve backslashes unless escaping a double quote,
     67 %  or another backslashes.  Other shell meta-characters are not treated as
     68 %  special by this tokenizer.
     69 %
     70 %  For example Quoting the quote chars:
     71 %              \'  "'"       \"  '"'  "\""      \\  '\'  "\\"
     72 %
     73 %  Outside quotes, backslash characters will make spaces, tabs and quotes part
     74 %  of a token returned. However a backslash at the end of a line (and outside
     75 %  quotes) will cause the newline to be completely ignored (as per the shell
     76 %  line continuation).
     77 %
     78 %  Comments start with a '#' character at the start of a new token, will be
     79 %  completely ignored upto the end of line, regardless of any backslash at the
     80 %  end of the line.  You can escape a comment '#', using quotes or backlsashes
     81 %  just as you can in a shell.
     82 %
     83 %  The parser will accept both newlines, returns, or return-newlines to mark
     84 %  the EOL. Though this is technically breaking (or perhaps adding to) the
     85 %  'BASH' syntax that is being followed.
     86 %
     87 %
     88 %  UNIX script Launcher...
     89 %
     90 %  The use of '#' comments allow normal UNIX 'scripting' to be used to call on
     91 %  the "magick" command to parse the tokens from a file
     92 %
     93 %    #!/path/to/command/magick -script
     94 %
     95 %
     96 %  UNIX 'env' command launcher...
     97 %
     98 %  If "magick" is renamed "magick-script" you can use a 'env' UNIX launcher
     99 %
    100 %    #!/usr/bin/env magick-script
    101 %
    102 %
    103 %  Shell script launcher...
    104 %
    105 %  As a special case a ':' at the start of a line is also treated as a comment
    106 %  This allows a magick script to ignore a line that can be parsed by the shell
    107 %  and not by the magick script (tokenizer).  This allows for an alternative
    108 %  script 'launcher' to be used for magick scripts.
    109 %
    110 %    #!/bin/sh
    111 %    :; exec magick -script "$0" "$@"; exit 10
    112 %    #
    113 %    # The rest of the file is magick script
    114 %    -read label:"This is a Magick Script!"
    115 %    -write show: -exit
    116 %
    117 % Or with some shell pre/post processing...
    118 %
    119 %    #!/bin/sh
    120 %    :; echo "This part is run in the shell, but ignored by Magick"
    121 %    :; magick -script "$0" "$@"
    122 %    :; echo "This is run after the "magick" script is finished!"
    123 %    :; exit 10
    124 %    #
    125 %    # The rest of the file is magick script
    126 %    -read label:"This is a Magick Script!"
    127 %    -write show: -exit
    128 %
    129 %
    130 %  DOS script launcher...
    131 %
    132 %  Similarly any '@' at the start of the line (outside of quotes) will also be
    133 %  treated as comment. This allow you to create a DOS script launcher, to
    134 %  allow a ".bat" DOS scripts to run as "magick" scripts instead.
    135 %
    136 %    @echo This line is DOS executed but ignored by Magick
    137 %    @magick -script %~dpnx0 %*
    138 %    @echo This line is processed after the Magick script is finished
    139 %    @GOTO :EOF
    140 %    #
    141 %    # The rest of the file is magick script
    142 %    -read label:"This is a Magick Script!"
    143 %    -write show: -exit
    144 %
    145 % But this can also be used as a shell script launcher as well!
    146 % Though is more restrictive and less free-form than using ':'.
    147 %
    148 %    #!/bin/sh
    149 %    @() { exec magick -script "$@"; }
    150 %    @ "$0" "$@"; exit
    151 %    #
    152 %    # The rest of the file is magick script
    153 %    -read label:"This is a Magick Script!"
    154 %    -write show: -exit
    155 %
    156 % Or even like this...
    157 %
    158 %    #!/bin/sh
    159 %    @() { }
    160 %    @; exec magick -script "$0" "$@"; exit
    161 %    #
    162 %    # The rest of the file is magick script
    163 %    -read label:"This is a Magick Script!"
    164 %    -write show: -exit
    165 %
    166 */
    168 /*
    170   Include declarations.
    172   NOTE: Do not include if being compiled into the "test/script-token-test.c"
    173   module, for low level token testing.
    174 */
    175 #ifndef SCRIPT_TOKEN_TESTING
    176 #  include "MagickWand/studio.h"
    177 #  include "MagickWand/MagickWand.h"
    178 #  include "MagickWand/script-token.h"
    179 #  include "MagickCore/string-private.h"
    180 #  include "MagickCore/utility-private.h"
    181 #endif
    183 /*
    185 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    186 %                                                                             %
    187 %                                                                             %
    188 %                                                                             %
    189 %   A c q u i r e S c r i p t T o k e n I n f o                               %
    190 %                                                                             %
    191 %                                                                             %
    192 %                                                                             %
    193 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    194 %
    195 %  AcquireScriptTokenInfo() allocated, initializes and opens the given
    196 %  file stream from which tokens are to be extracted.
    197 %
    198 %  The format of the AcquireScriptTokenInfo method is:
    199 %
    200 %     ScriptTokenInfo *AcquireScriptTokenInfo(char *filename)
    201 %
    202 %  A description of each parameter follows:
    203 %
    204 %    o filename   the filename to open  ("-" means stdin)
    205 %
    206 */
    207 WandExport ScriptTokenInfo *AcquireScriptTokenInfo(const char *filename)
    208 {
    209   ScriptTokenInfo
    210     *token_info;
    212   token_info=(ScriptTokenInfo *) AcquireMagickMemory(sizeof(*token_info));
    213   if (token_info == (ScriptTokenInfo *) NULL)
    214     return token_info;
    215   (void) ResetMagickMemory(token_info,0,sizeof(*token_info));
    217   token_info->opened=MagickFalse;
    218   if ( LocaleCompare(filename,"-") == 0 ) {
    219     token_info->stream=stdin;
    220     token_info->opened=MagickFalse;
    221   }
    222   else if ( LocaleNCompare(filename,"fd:",3) == 0 ) {
    223     token_info->stream=fdopen(StringToLong(filename+3),"r");
    224     token_info->opened=MagickFalse;
    225   }
    226   else {
    227     token_info->stream=fopen_utf8(filename, "r");
    228   }
    229   if ( token_info->stream == (FILE *) NULL ) {
    230     token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
    231     return(token_info);
    232   }
    234   token_info->curr_line=1;
    235   token_info->length=INITAL_TOKEN_LENGTH;
    236   token_info->token=(char *) AcquireMagickMemory(token_info->length);
    238   token_info->status=(token_info->token != (char *) NULL)
    239                       ? TokenStatusOK : TokenStatusMemoryFailed;
    240   token_info->signature=MagickWandSignature;
    242   return token_info;
    243 }
    245 /*
    247 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    248 %                                                                             %
    249 %                                                                             %
    250 %                                                                             %
    251 %   D e s t r o y S c r i p t T o k e n I n f o                               %
    252 %                                                                             %
    253 %                                                                             %
    254 %                                                                             %
    255 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    256 %
    257 %  DestroyScriptTokenInfo() allocated, initializes and opens the given
    258 %  file stream from which tokens are to be extracted.
    259 %
    260 %  The format of the DestroyScriptTokenInfo method is:
    261 %
    262 %     ScriptTokenInfo *DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
    263 %
    264 %  A description of each parameter follows:
    265 %
    266 %    o token_info   The ScriptTokenInfo structure to be destroyed
    267 %
    268 */
    269 WandExport ScriptTokenInfo * DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
    270 {
    271   assert(token_info != (ScriptTokenInfo *) NULL);
    272   assert(token_info->signature == MagickWandSignature);
    274   if ( token_info->opened != MagickFalse )
    275     fclose(token_info->stream);
    277   if (token_info->token != (char *) NULL )
    278     token_info->token=(char *) RelinquishMagickMemory(token_info->token);
    279   token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
    280   return(token_info);
    281 }
    283 /*
    285 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    286 %                                                                             %
    287 %                                                                             %
    288 %                                                                             %
    289 %   G e t S c r i p t T o k e n                                               %
    290 %                                                                             %
    291 %                                                                             %
    292 %                                                                             %
    293 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    294 %
    295 %  GetScriptToken() a fairly general, finite state token parser. That returns
    296 %  tokens one at a time, as soon as posible.
    297 %
    298 %
    299 %  The format of the GetScriptToken method is:
    300 %
    301 %     MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
    302 %
    303 %  A description of each parameter follows:
    304 %
    305 %    o token_info    pointer to a structure holding token details
    306 %
    307 */
    308 /* States of the parser */
    309 #define IN_WHITE 0
    310 #define IN_TOKEN 1
    311 #define IN_QUOTE 2
    312 #define IN_COMMENT 3
    314 /* Macro to read character from stream
    316    This also keeps track of the line and column counts.
    317    The EOL is defined as either '\r\n', or '\r', or '\n'.
    318    A '\r' on its own is converted into a '\n' to correctly handle
    319    raw input, typically due to 'copy-n-paste' of text files.
    320    But a '\r\n' sequence is left ASIS for string handling
    321 */
    322 #define GetChar(c) \
    323 { \
    324   c=fgetc(token_info->stream); \
    325   token_info->curr_column++; \
    326   if ( c == '\r' ) { \
    327     c=fgetc(token_info->stream); \
    328     ungetc(c,token_info->stream); \
    329     c = (c!='\n')?'\n':'\r'; \
    330   } \
    331   if ( c == '\n' ) \
    332     token_info->curr_line++, token_info->curr_column=0; \
    333   if (c == EOF ) \
    334     break; \
    335   if ( (c>='\0' && c<'\a') || (c>'\r' && c<' ' && c!='\033') ) { \
    336     token_info->status=TokenStatusBinary; \
    337     break; \
    338   } \
    339 }
    340 /* macro to collect the token characters */
    341 #define SaveChar(c) \
    342 { \
    343   if ((size_t) offset >= (token_info->length-1)) { \
    344     if ( token_info->length >= MagickPathExtent ) \
    345       token_info->length += MagickPathExtent; \
    346     else \
    347       token_info->length *= 4; \
    348     token_info->token = (char *) \
    349          ResizeMagickMemory(token_info->token, token_info->length); \
    350     if ( token_info->token == (char *) NULL ) { \
    351       token_info->status=TokenStatusMemoryFailed; \
    352       break; \
    353     } \
    354   } \
    355   token_info->token[offset++]=(char) (c); \
    356 }
    358 WandExport MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
    359 {
    360   int
    361     quote,
    362     c;
    364   int
    365     state;
    367   ssize_t
    368     offset;
    370   /* EOF - no more tokens! */
    371   if (token_info == (ScriptTokenInfo *) NULL)
    372     return(MagickFalse);
    373   if (token_info->status != TokenStatusOK)
    374     {
    375       token_info->token[0]='\0';
    376       return(MagickFalse);
    377     }
    378   state=IN_WHITE;
    379   quote='\0';
    380   offset=0;
    381 DisableMSCWarning(4127)
    382   while(1)
    383 RestoreMSCWarning
    384   {
    385     /* get character */
    386     GetChar(c);
    388     /* hash comment handling */
    389     if ( state == IN_COMMENT ) {
    390       if ( c == '\n' )
    391         state=IN_WHITE;
    392       continue;
    393     }
    394     /* comment lines start with '#' anywhere, or ':' or '@' at start of line */
    395     if ( state == IN_WHITE )
    396       if ( ( c == '#' ) ||
    397            ( token_info->curr_column==1 && (c == ':' || c == '@' ) ) )
    398         state=IN_COMMENT;
    399     /* whitespace token separator character */
    400     if (strchr(" \n\r\t",c) != (char *) NULL) {
    401       switch (state) {
    402         case IN_TOKEN:
    403           token_info->token[offset]='\0';
    404           return(MagickTrue);
    405         case IN_QUOTE:
    406           SaveChar(c);
    407           break;
    408       }
    409       continue;
    410     }
    411     /* quote character */
    412     if ( c=='\'' || c =='"' ) {
    413       switch (state) {
    414         case IN_WHITE:
    415           token_info->token_line=token_info->curr_line;
    416           token_info->token_column=token_info->curr_column;
    417         case IN_TOKEN:
    418           state=IN_QUOTE;
    419           quote=c;
    420           break;
    421         case IN_QUOTE:
    422           if (c == quote)
    423             {
    424               state=IN_TOKEN;
    425               quote='\0';
    426             }
    427           else
    428             SaveChar(c);
    429           break;
    430       }
    431       continue;
    432     }
    433     /* escape char (preserve in quotes - unless escaping the same quote) */
    434     if (c == '\\')
    435       {
    436         if ( state==IN_QUOTE && quote == '\'' ) {
    437             SaveChar('\\');
    438             continue;
    439           }
    440         GetChar(c);
    441         if (c == '\n')
    442           switch (state) {
    443             case IN_COMMENT:
    444               state=IN_WHITE;  /* end comment */
    445             case IN_QUOTE:
    446               if (quote != '"')
    447                 break;         /* in double quotes only */
    448             case IN_WHITE:
    449             case IN_TOKEN:
    450               continue;        /* line continuation - remove line feed */
    451           }
    452         switch (state) {
    453           case IN_WHITE:
    454             token_info->token_line=token_info->curr_line;
    455             token_info->token_column=token_info->curr_column;
    456             state=IN_TOKEN;
    457             break;
    458           case IN_QUOTE:
    459             if (c != quote && c != '\\')
    460               SaveChar('\\');
    461             break;
    462         }
    463         SaveChar(c);
    464         continue;
    465       }
    466     /* ordinary character */
    467     switch (state) {
    468       case IN_WHITE:
    469         token_info->token_line=token_info->curr_line;
    470         token_info->token_column=token_info->curr_column;
    471         state=IN_TOKEN;
    472       case IN_TOKEN:
    473       case IN_QUOTE:
    474         SaveChar(c);
    475         break;
    476       case IN_COMMENT:
    477         break;
    478     }
    479   }
    480   /* input stream has EOF or produced a fatal error */
    481   token_info->token[offset]='\0';
    482   if ( token_info->status != TokenStatusOK )
    483     return(MagickFalse);  /* fatal condition - no valid token */
    484   token_info->status = TokenStatusEOF;
    485   if ( state == IN_QUOTE)
    486     token_info->status = TokenStatusBadQuotes;
    487   if ( state == IN_TOKEN)
    488     return(MagickTrue);   /* token with EOF at end - no problem */
    489   return(MagickFalse);    /* in white space or in quotes - invalid token */
    490 }