Home | History | Annotate | Download | only in cgi
      1 /*
      2  * Copyright 2001-2004 Brandon Long
      3  * All Rights Reserved.
      4  *
      5  * ClearSilver Templating System
      6  *
      7  * This code is made available under the terms of the ClearSilver License.
      8  * http://www.clearsilver.net/license.hdf
      9  *
     10  */
     11 
     12 #ifndef __CGI_H_
     13 #define __CGI_H_ 1
     14 
     15 #include <stdarg.h>
     16 #include "util/neo_err.h"
     17 #include "util/neo_hdf.h"
     18 #include "cs/cs.h"
     19 
     20 __BEGIN_DECLS
     21 
     22 extern NERR_TYPE CGIFinished;
     23 extern NERR_TYPE CGIUploadCancelled;
     24 extern NERR_TYPE CGIParseNotHandled;
     25 
     26 /* HACK: Set this value if you want to treat empty CGI Query variables as
     27  * non-existant.
     28  */
     29 extern int IgnoreEmptyFormVars;
     30 
     31 typedef struct _cgi CGI;
     32 
     33 typedef int (*UPLOAD_CB)(CGI *, int nread, int expected);
     34 typedef NEOERR* (*CGI_PARSE_CB)(CGI *, char *method, char *ctype, void *rock);
     35 
     36 struct _cgi_parse_cb
     37 {
     38   char *method;
     39   int any_method;
     40   char *ctype;
     41   int any_ctype;
     42   void *rock;
     43   CGI_PARSE_CB parse_cb;
     44   struct _cgi_parse_cb *next;
     45 };
     46 
     47 struct _cgi
     48 {
     49   /* Only public parts of this structure */
     50   void *data;  /* you can store your own information here */
     51   HDF *hdf;    /* the HDF dataset associated with this CGI */
     52 
     53   BOOL ignore_empty_form_vars;
     54 
     55   UPLOAD_CB upload_cb;
     56 
     57   int data_expected;
     58   int data_read;
     59   struct _cgi_parse_cb *parse_callbacks;
     60 
     61   /* For line oriented reading of form-data input.  Used during cgi_init
     62    * only */
     63   char *buf;
     64   int buflen;
     65   int readlen;
     66   BOOL found_nl;
     67   BOOL unget;
     68   char *last_start;
     69   int last_length;
     70   int nl;
     71 
     72   /* this is a list of filepointers pointing at files that were uploaded */
     73   /* Use cgi_filehandle to access these */
     74   ULIST *files;
     75 
     76   /* By default, cgi_parse unlinks uploaded files as it opens them. */
     77   /* If Config.Upload.Unlink is set to 0, the files are not unlinked */
     78   /* and there names are stored in this list. */
     79   /* Use Query.*.FileName to access these */
     80   ULIST *filenames;
     81 
     82   /* keep track of the time between cgi_init and cgi_render */
     83   double time_start;
     84   double time_end;
     85 };
     86 
     87 
     88 /*
     89  * Function: cgi_init - Initialize ClearSilver CGI environment
     90  * Description: cgi_init initializes the ClearSilver CGI environment,
     91  *              including creating the HDF data set.  It will then import
     92  *              the standard CGI environment variables into that dataset,
     93  *              will parse the QUERY_STRING into the data set, and parse
     94  *              the HTTP_COOKIE into the data set.  Note that if the
     95  *              var xdisplay is in the form data, cgi_init will attempt
     96  *              to validate the value and launch the configured debugger
     97  *              on the CGI program.  These variables have to be
     98  *              specified in the hdf_file pointed to by hdf_file.  The
     99  *              default settings do not allow debugger launching for
    100  *              security reasons.
    101  * Input: cgi - a pointer to a CGI pointer
    102  *        hdf_file - the path to an HDF data set file that will also be
    103  *                   loaded into the dataset.  This will likely have to
    104  *                   a be a full path, as the HDF search paths are not
    105  *                   yet set up.  Certain things, like
    106  * Output: cgi - an allocated CGI struct, including
    107  * Return: NERR_PARSE - parse error in CGI input
    108  *         NERR_NOMEM - unable to allocate memory
    109  *         NERR_NOT_FOUND - hdf_file doesn't exist
    110  */
    111 NEOERR *cgi_init (CGI **cgi, HDF *hdf);
    112 
    113 /*
    114  * Function: cgi_parse - Parse incoming CGI data
    115  * Description: We split cgi_init into two sections, one that parses
    116  * 		just the basics, and the second is cgi_parse.  cgi_parse
    117  * 		is responsible for parsing the entity body of the HTTP
    118  * 		request.  This payload is typically only sent (expected)
    119  * 		on POST/PUT requests, but generally this is called on
    120  * 		all incoming requests.  This function walks the list of
    121  * 		registered parse callbacks (see cgi_register_parse_cb),
    122  * 		and if none of those matches or handles the request, it
    123  * 		falls back to the builtin handlers:
    124  * 		  POST w/ application/x-www-form-urlencoded
    125  * 		  POST w/ application/form-data
    126  * 		  PUT w/ any content type
    127  * 		In general, if there is no Content-Length, then
    128  * 		cgi_parse ignores the payload and doesn't raise an
    129  * 		error.
    130  * Input: cgi - a pointer to a CGI pointer
    131  * Output: Either data populated into files and cgi->hdf, or whatever
    132  *         other side effects of your own registered callbacks.
    133  * Return: NERR_PARSE - parse error in CGI input
    134  *         NERR_NOMEM - unable to allocate memory
    135  *         NERR_NOT_FOUND - hdf_file doesn't exist
    136  *         NERR_IO - error reading HDF file or reading CGI stdin, or
    137  *                   writing data on multipart/form-data file submission
    138  *         Anything else you raise.
    139  */
    140 NEOERR *cgi_parse (CGI *cgi);
    141 
    142 /*
    143  * Function: cgi_register_parse_cb - Register a parse callback
    144  * Description: The ClearSilver CGI Kit has built-in functionality to handle
    145  *              the following methods:
    146  *              GET -> doesn't have any data except query string, which
    147  *                is processed for all methods
    148  *              POST w/ application/x-www-form-urlencoded
    149  *              POST w/ multipart/form-data
    150  *                processed as RFC2388 data into files and HDF (see
    151  *                cgi_filehandle())
    152  *              PUT (any type)
    153  *                The entire data chunk is stored as a file, with meta
    154  *                data in HDF (similar to single files in RFC2388).
    155  *                The data is accessible via cgi_filehandle with NULL
    156  *                for name.
    157  *              To handle other methods/content types, you have to
    158  *              register your own parse function.  This isn't necessary
    159  *              if you aren't expecting any data, and technically HTTP
    160  *              only allows data on PUT/POST requests (and presumably
    161  *              user defined methods).  In particular, if you want to
    162  *              implement XML-RPC or SOAP, you'll have to register a
    163  *              callback here to grab the XML data chunk.  Usually
    164  *              you'll want to register POST w/ application/xml or POST
    165  *              w/ text/xml (you either need to register both or
    166  *              register POST w/ * and check the ctype yourself,
    167  *              remember to nerr_raise(CGIParseNotHandled) if you aren't
    168  *              handling the POST).
    169  *              In general, your callback should:
    170  *                Find out how much data is available:
    171  *                 l = hdf_get_value (cgi->hdf, "CGI.ContentLength", NULL);
    172  *                 len = atoi(l);
    173  *                And read/handle all of the data using cgiwrap_read.
    174  *                See the builtin handlers for how this is done.  Note
    175  *                that cgiwrap_read is not guarunteed to return all of
    176  *                the data you request (just like fread(3)) since it
    177  *                might be reading of a socket.  Sorry.
    178  *                You should be careful when reading the data to watch
    179  *                for short reads (ie, end of file) and cases where the
    180  *                client sends you data ad infinitum.
    181  * Input: cgi - a CGI struct
    182  *        method - the HTTP method you want to handle, or * for all
    183  *        ctype - the HTTP Content-Type you want to handle, or * for all
    184  *        rock - opaque data that we'll pass to your call back
    185  * Output: None
    186  * Return: CGIParseNotHandled if your callback doesn't want to handle
    187  *         this.  This causes cgi_parse to continue walking the list of
    188  *         callbacks.
    189  *
    190  */
    191 NEOERR *cgi_register_parse_cb(CGI *cgi, const char *method, const char *ctype,
    192                               void *rock, CGI_PARSE_CB parse_cb);
    193 
    194 /*
    195  * Function: cgi_destroy - deallocate the data associated with a CGI
    196  * Description: cgi_destroy will destroy all the data associated with a
    197  *              CGI, which mostly means the associated HDF and removal
    198  *              of any files that were uploaded via multipart/form-data.
    199  *              (Note that even in the event of a crash, these files
    200  *              will be deleted, as they were unlinked on creation and
    201  *              only exist because of the open file pointer)
    202  * Input: cgi - a pointer to a pointer to a CGI struct
    203  * Output: cgi - NULL on output
    204  * Return: None
    205  */
    206 void cgi_destroy (CGI **cgi);
    207 
    208 /*
    209  * Function: cgi_cs_init - initialize CS parser with the CGI defaults
    210  * Description: cgi_cs_init initializes a CS parser with the CGI HDF
    211  *              context, and registers the standard CGI filters
    212  * Input: cgi - a pointer a CGI struct allocated with cgi_init
    213  *        cs - a pointer to a CS struct pointer
    214  * Output: cs - the allocated/initialized CS struct
    215  * Return: NERR_NOMEM - no memory was available to render the template
    216  */
    217 NEOERR *cgi_cs_init(CGI *cgi, CSPARSE **cs);
    218 
    219 /*
    220  * Function: cgi_display - render and display the CGI output to the user
    221  * Description: cgi_display will render the CS template pointed to by
    222  *              cs_file using the CGI's HDF data set, and send the
    223  *              output to the user.  Note that the output is actually
    224  *              rendered into memory first.
    225  * Input: cgi - a pointer a CGI struct allocated with cgi_init
    226  *        cs_file - a ClearSilver template file
    227  * Output: None
    228  * Return: NERR_IO - an IO error occured during output
    229  *         NERR_NOMEM - no memory was available to render the template
    230  */
    231 NEOERR *cgi_display (CGI *cgi, const char *cs_file);
    232 
    233 /*
    234  * Function: cgi_output - display the CGI output to the user
    235  * Description: Normally, this is called by cgi_display, but some
    236  *              people wanted it external so they could call it
    237  *              directly.
    238  * Input: cgi - a pointer a CGI struct allocated with cgi_init
    239  *        output - the data to send to output from the CGI
    240  * Output: None
    241  * Return: NERR_IO - an IO error occured during output
    242  *         NERR_NOMEM - no memory was available to render the template
    243  */
    244 NEOERR *cgi_output (CGI *cgi, STRING *output);
    245 
    246 /*
    247  * Function: cgi_filehandle - return a file pointer to an uploaded file
    248  * Description: cgi_filehandle will return the stdio FILE pointer
    249  *              associated with a file that was uploaded using
    250  *              multipart/form-data.  The FILE pointer is positioned at
    251  *              the start of the file when first available.
    252  * Input: cgi - a pointer to a CGI struct allocated with cgi_init
    253  *        form_name - the form name that the file was uploaded as
    254  *                    (not the filename) (if NULL, we're asking for the
    255  *                    file handle for the PUT upload)
    256  * Output: None
    257  * Return: A stdio FILE pointer, or NULL if an error occurs (usually
    258  *         indicates that the form_name wasn't found, but might indicate
    259  *         a problem with the HDF dataset)
    260  */
    261 FILE *cgi_filehandle (CGI *cgi, const char *form_name);
    262 
    263 /*
    264  * Function: cgi_neo_error - display a NEOERR call backtrace
    265  * Description: cgi_neo_error will output a 500 error containing the
    266  *              NEOERR call backtrace.  This function is likely to be
    267  *              removed from future versions in favor of some sort of
    268  *              user error mechanism.
    269  * Input: cgi - a pointer to a CGI struct
    270  *        err - a NEOERR (see util/neo_err.h for details)
    271  * Output: None
    272  * Return: None
    273  */
    274 void cgi_neo_error (CGI *cgi, NEOERR *err);
    275 
    276 /*
    277  * Function: cgi_error - display an error string to the user
    278  * Description: cgi_error will output a 500 error containing the
    279  *              specified error message.  This function is likely to be
    280  *              removed from future versions in favor of a user error
    281  *              mechanism.
    282  * Input: cgi - a pointer to a CGI struct
    283  *        fmt - printf style format string and arguments
    284  * Output: None
    285  * Return: None
    286  */
    287 void cgi_error (CGI *cgi, const char *fmt, ...)
    288                 ATTRIBUTE_PRINTF(2,3);
    289 
    290 /*
    291  * Function: cgi_debug_init - initialize standalone debugging
    292  * Description: cgi_debug_init initializes a CGI program for standalone
    293  *              debugging.  By running a ClearSilver CGI program with a
    294  *              filename on the command line as the first argument, the
    295  *              CGI program will load that file of the form K=V as a set
    296  *              of HTTP/CGI environment variables.  This allows you to
    297  *              run the program under a debugger in a reproducible
    298  *              environment.
    299  * Input: argc/argv - the arguments from main
    300  * Output: None
    301  * Return: None
    302  */
    303 void cgi_debug_init (int argc, char **argv);
    304 
    305 /*
    306  * Function: cgi_url_escape - url escape a string
    307  * Description: cgi_url_escape will do URL escaping on the passed in
    308  *              string, and return a newly allocated string that is escaped.
    309  *              Characters which are escaped include control characters,
    310  *              %, ?, +, space, =, &, /, and "
    311  * Input: buf - a 0 terminated string
    312  * Output: esc - a newly allocated string
    313  * Return: NERR_NOMEM - no memory available to allocate the escaped string
    314  */
    315 NEOERR *cgi_url_escape (const char *buf, char **esc);
    316 
    317 /*
    318  * Function: cgi_url_escape_more - url escape a string
    319  * Description: cgi_url_escape_more will do URL escaping on the passed in
    320  *              string, and return a newly allocated string that is escaped.
    321  *              Characters which are escaped include control characters,
    322  *              %, ?, +, space, =, &, /, and " and any characters in
    323  *              other
    324  * Input: buf - a 0 terminated string
    325  *        other - a 0 terminated string of characters to escape
    326  * Output: esc - a newly allocated string
    327  * Return: NERR_NOMEM - no memory available to allocate the escaped string
    328  */
    329 NEOERR *cgi_url_escape_more (const char *buf, char **esc, const char *other);
    330 
    331 /*
    332  * Function: cgi_url_validate - validate that url is of an allowed format
    333  * Description: cgi_url_validate will check that a URL starts with
    334  *              one of the accepted safe schemes.
    335  *              If not, it returns "#" as a safe substitute.
    336  *              Currently accepted schemes are http, https, ftp and mailto.
    337  *              It then html escapes the entire URL so that it is safe to
    338  *              insert in an href attribute.
    339  * Input: buf - a 0 terminated string
    340  * Output: esc - a newly allocated string
    341  * Return: NERR_NOMEM - no memory available to allocate the escaped string
    342  */
    343 NEOERR *cgi_url_validate (const char *buf, char **esc);
    344 
    345 /*
    346  * Function: cgi_url_unescape - unescape an url encoded string
    347  * Description: cgi_url_unescape will do URL unescaping on the passed in
    348  *              string.  This function modifies the string in place
    349  *              This function will decode any %XX character, and will
    350  *              decode + as space
    351  * Input: buf - a 0 terminated string
    352  * Return: pointer to same buf
    353  */
    354 char *cgi_url_unescape (char *buf);
    355 
    356 /*
    357  * Function: cgi_redirect - send an HTTP 302 redirect response
    358  * Description: cgi_redirect will redirect the user to another page on
    359  *              your site.  This version takes only the path portion of
    360  *              the URL.  As with all printf style commands, you should
    361  *              not call this with arbitrary input that may contain %
    362  *              characters, if you are forwarding something directly,
    363  *              use a format like cgi_redirect (cgi, "%s", buf)
    364  * Input: cgi - cgi struct
    365  *        fmt - printf style format with args
    366  * Output: None
    367  * Return: None
    368  */
    369 void cgi_redirect (CGI *cgi, const char *fmt, ...)
    370                    ATTRIBUTE_PRINTF(2,3);
    371 
    372 /*
    373  * Function: cgi_redirect_uri - send an HTTP 302 redirect response
    374  * Description: cgi_redirect_uri will redirect the user to another page on
    375  *              your site.  This version takes the full URL, including
    376  *              protocol/domain/port/path.
    377  *              As with all printf style commands, you should
    378  *              not call this with arbitrary input that may contain %
    379  *              characters, if you are forwarding something directly,
    380  *              use a format like cgi_redirect (cgi, "%s", buf)
    381  * Input: cgi - cgi struct
    382  *        fmt - printf style format with args
    383  * Output: None
    384  * Return: None
    385  */
    386 void cgi_redirect_uri (CGI *cgi, const char *fmt, ...)
    387                        ATTRIBUTE_PRINTF(2,3);
    388 
    389 /*
    390  * Function: cgi_vredirect - send an HTTP 302 redirect response
    391  * Description: cgi_vredirect is mostly used internally, but can be used
    392  *              if you need a varargs version of the function.
    393  * Input: cgi - cgi struct
    394  *        uri - whether the URL is full (1) or path only (0)
    395  *        fmt - printf format string
    396  *        ap - stdarg va_list
    397  * Output: None
    398  * Return: None
    399  */
    400 void cgi_vredirect (CGI *cgi, int uri, const char *fmt, va_list ap);
    401 
    402 
    403 /*
    404  * Function: cgi_cookie_authority - determine the cookie authority for a
    405  *            domain
    406  * Description: cgi_cookie_authority will walk the CookieAuthority
    407  *              portion of the CGI HDF data set, and return the matching
    408  *              domain if it exists.  The purpose of this is so that you
    409  *              set domain specific cookies.  For instance, you might
    410  *              have
    411  *                CookieAuthority.0 = neotonic.com
    412  *              In which case, any webserver using a hostname ending in
    413  *              neotonic.com will generate a cookie authority of
    414  *              neotonic.com.
    415  * Input: cgi - a CGI struct
    416  *        host - optional host to match against.  If NULL, the function
    417  *               will use the HTTP.Host HDF variable.
    418  * Output: None
    419  * Return: The authority domain, or NULL if none found.
    420  */
    421 char *cgi_cookie_authority (CGI *cgi, const char *host);
    422 
    423 /*
    424  * Function: cgi_cookie_set - Set a browser Cookie
    425  * Description: cgi_cookie_set will issue a Set-Cookie header that
    426  *              should cause a browser to return a cookie when required.
    427  *              Note this function does no escaping of anything, you
    428  *              have to take care of that first.
    429  * Input: cgi - a CGI struct
    430  *        name - the name of the cookie
    431  *        value - the value to set the cookie to.
    432  *        path - optional path for which the cookie is valid.  Default
    433  *               is /
    434  *        domain - optional domain for which the cookie is valid.  You
    435  *                 can use cgi_cookie_authority to determine this
    436  *                 domain.  Default is none, which is interpreted by
    437  *                 the browser as the sending domain only.
    438  *        time_str - expiration time string in the following format
    439  *                   Wdy, DD-Mon-YYYY HH:MM:SS GMT.  Only used if
    440  *                   persistent.  Default is one year from time of call.
    441  *        persistent - cookie will be stored by the browser between sessions
    442  *        secure - cookie will only be sent over secure connections
    443  * Output: None
    444  * Return: NERR_IO
    445  */
    446 NEOERR *cgi_cookie_set (CGI *cgi, const char *name, const char *value,
    447                         const char *path, const char *domain,
    448                         const char *time_str, int persistent, int secure);
    449 
    450 /*
    451  * Function: cgi_cookie_clear - clear browser cookie
    452  * Description: cgi_cookie_clear will send back a Set-Cookie string that
    453  *              will attempt to stop a browser from continuing to send
    454  *              back a cookie.  Note that the cookie has to match in
    455  *              name, domain, and path, and the luck of the Irish has to
    456  *              be with you for this work all the time, but at the least
    457  *              it will make the browser send back a cookie with no
    458  *              value, which the ClearSilver cookie parsing code will
    459  *              ignore.
    460  * Input: cgi - a CGI struct
    461  *        name - the cookie name to clear
    462  *        domain - the domain to clear, NULL for none
    463  *        path - the cookie's path
    464  * Output: None
    465  * Return: NERR_IO
    466  */
    467 NEOERR *cgi_cookie_clear (CGI *cgi, const char *name, const char *domain,
    468                           const char *path);
    469 
    470 /* not documented *yet* */
    471 NEOERR *cgi_text_html_strfunc(const char *str, char **ret);
    472 NEOERR *cgi_html_strip_strfunc(const char *str, char **ret);
    473 NEOERR *cgi_html_escape_strfunc(const char *str, char **ret);
    474 NEOERR *cgi_js_escape (const char *buf, char **esc);
    475 void cgi_html_ws_strip(STRING *str, int level);
    476 NEOERR *cgi_register_strfuncs(CSPARSE *cs);
    477 
    478 /* internal use only */
    479 NEOERR * parse_rfc2388 (CGI *cgi);
    480 NEOERR * open_upload(CGI *cgi, int unlink_files, FILE **fpw);
    481 
    482 __END_DECLS
    483 
    484 #endif /* __CGI_H_ */
    485