Home | History | Annotate | Download | only in cgi
      1 /*
      2  * Copyright 2001-2004 Brandon Long
      3  * All Rights Reserved.
      4  *
      5  * ClearSilver Templating System
      6  *
      7  * This code is made available under the terms of the ClearSilver License.
      8  * http://www.clearsilver.net/license.hdf
      9  *
     10  */
     11 
     12 /* rfc2388 defines multipart/form-data which is primarily used for
     13  * HTTP file upload
     14  */
     15 
     16 #include "cs_config.h"
     17 
     18 #include <stdio.h>
     19 #include <stdlib.h>
     20 #include <unistd.h>
     21 #include <sys/stat.h>
     22 #include <limits.h>
     23 #include <ctype.h>
     24 #include <string.h>
     25 #include "util/neo_misc.h"
     26 #include "util/neo_err.h"
     27 #include "util/neo_str.h"
     28 #include "cgi.h"
     29 #include "cgiwrap.h"
     30 
     31 static NEOERR * _header_value (char *hdr, char **val)
     32 {
     33   char *p, *q;
     34   int l;
     35 
     36   *val = NULL;
     37 
     38   p = hdr;
     39   while (*p && isspace(*p)) p++;
     40   q = p;
     41   while (*q && !isspace(*q) && *q != ';') q++;
     42   if (!*p || p == q) return STATUS_OK;
     43 
     44   l = q - p;
     45   *val = (char *) malloc (l+1);
     46   if (*val == NULL)
     47     return nerr_raise (NERR_NOMEM, "Unable to allocate space for val");
     48   memcpy (*val, p, l);
     49   (*val)[l] = '\0';
     50 
     51   return STATUS_OK;
     52 }
     53 
     54 static NEOERR * _header_attr (char *hdr, char *attr, char **val)
     55 {
     56   char *p, *k, *v;
     57   int found = 0;
     58   int l, al;
     59   char *r;
     60 
     61   *val = NULL;
     62   l = strlen(attr);
     63 
     64   /* skip value */
     65   p = hdr;
     66   while (*p && *p != ';') p++;
     67   if (!*p) return STATUS_OK;
     68 
     69   p++;
     70   while(*p && !found)
     71   {
     72     while (*p && isspace(*p)) p++;
     73     if (!*p) return STATUS_OK;
     74     /* attr name */
     75     k = p;
     76     while (*p && !isspace(*p) && *p != ';' && *p != '=') p++;
     77     if (!*p) return STATUS_OK;
     78     if (l == (p-k) && !strncasecmp(attr, k, l))
     79       found = 1;
     80 
     81     while (*p && isspace(*p)) p++;
     82     if (*p != ';' && *p != '=') return STATUS_OK;
     83     if (*p == ';')
     84     {
     85       if (found)
     86       {
     87 	*val = strdup ("");
     88 	if (*val == NULL)
     89 	  return nerr_raise (NERR_NOMEM, "Unable to allocate value");
     90 	return STATUS_OK;
     91       }
     92     }
     93     else
     94     {
     95       p++;
     96       if (*p == '"')
     97       {
     98 	v = ++p;
     99 	while (*p && *p != '"') p++;
    100 	al = p-v;
    101 	if (*p) p++;
    102       }
    103       else
    104       {
    105 	v = p;
    106 	while (*p && !isspace(*p) && *p != ';') p++;
    107 	al = p-v;
    108       }
    109       if (found)
    110       {
    111 	r = (char *) malloc (al+1);
    112 	if (r == NULL)
    113 	  return nerr_raise (NERR_NOMEM, "Unable to allocate value");
    114 	memcpy (r, v, al);
    115 	r[al] = '\0';
    116 	*val = r;
    117 	return STATUS_OK;
    118       }
    119     }
    120     if (*p) p++;
    121   }
    122   return STATUS_OK;
    123 }
    124 
    125 static NEOERR * _read_line (CGI *cgi, char **s, int *l, int *done)
    126 {
    127   int ofs = 0;
    128   char *p;
    129   int to_read;
    130 
    131   if (cgi->buf == NULL)
    132   {
    133     cgi->buflen = 4096;
    134     cgi->buf = (char *) malloc (sizeof(char) * cgi->buflen);
    135     if (cgi->buf == NULL)
    136       return nerr_raise (NERR_NOMEM, "Unable to allocate cgi buf");
    137   }
    138   if (cgi->unget)
    139   {
    140     cgi->unget = FALSE;
    141     *s = cgi->last_start;
    142     *l = cgi->last_length;
    143     return STATUS_OK;
    144   }
    145   if (cgi->found_nl)
    146   {
    147     p = memchr (cgi->buf + cgi->nl, '\n', cgi->readlen - cgi->nl);
    148     if (p) {
    149       cgi->last_start = *s = cgi->buf + cgi->nl;
    150       cgi->last_length = *l = p - (cgi->buf + cgi->nl) + 1;
    151       cgi->found_nl = TRUE;
    152       cgi->nl = p - cgi->buf + 1;
    153       return STATUS_OK;
    154     }
    155     ofs = cgi->readlen - cgi->nl;
    156     memmove(cgi->buf, cgi->buf + cgi->nl, ofs);
    157   }
    158   // Read either as much buffer space as we have left, or up to
    159   // the amount of data remaining according to Content-Length
    160   // If there is no Content-Length, just use the buffer space, but recognize
    161   // that it might not work on some servers or cgiwrap implementations.
    162   // Some servers will close their end of the stdin pipe, so cgiwrap_read
    163   // will return if we ask for too much.  Techically, not including
    164   // Content-Length is against the HTTP spec, so we should consider failing
    165   // earlier if we don't have a length.
    166   to_read = cgi->buflen - ofs;
    167   if (cgi->data_expected && (to_read > cgi->data_expected - cgi->data_read))
    168   {
    169     to_read = cgi->data_expected - cgi->data_read;
    170   }
    171   cgiwrap_read (cgi->buf + ofs, to_read, &(cgi->readlen));
    172   if (cgi->readlen < 0)
    173   {
    174     return nerr_raise_errno (NERR_IO, "POST Read Error");
    175   }
    176   if (cgi->readlen == 0)
    177   {
    178     *done = 1;
    179     return STATUS_OK;
    180   }
    181   cgi->data_read += cgi->readlen;
    182   if (cgi->upload_cb)
    183   {
    184     if (cgi->upload_cb (cgi, cgi->data_read, cgi->data_expected))
    185       return nerr_raise (CGIUploadCancelled, "Upload Cancelled");
    186   }
    187   cgi->readlen += ofs;
    188   p = memchr (cgi->buf, '\n', cgi->readlen);
    189   if (!p)
    190   {
    191     cgi->found_nl = FALSE;
    192     cgi->last_start = *s = cgi->buf;
    193     cgi->last_length = *l = cgi->readlen;
    194     return STATUS_OK;
    195   }
    196   cgi->last_start = *s = cgi->buf;
    197   cgi->last_length = *l = p - cgi->buf + 1;
    198   cgi->found_nl = TRUE;
    199   cgi->nl = *l;
    200   return STATUS_OK;
    201 }
    202 
    203 static NEOERR * _read_header_line (CGI *cgi, STRING *line, int *done)
    204 {
    205   NEOERR *err;
    206   char *s, *p;
    207   int l;
    208 
    209   err = _read_line (cgi, &s, &l, done);
    210   if (err) return nerr_pass (err);
    211   if (*done || (l == 0)) return STATUS_OK;
    212   if (isspace (s[0])) return STATUS_OK;
    213   while (l && isspace(s[l-1])) l--;
    214   err = string_appendn (line, s, l);
    215   if (err) return nerr_pass (err);
    216 
    217   while (1)
    218   {
    219     err = _read_line (cgi, &s, &l, done);
    220     if (err) break;
    221     if (l == 0) break;
    222     if (*done) break;
    223     if (!(s[0] == ' ' || s[0] == '\t'))
    224     {
    225       cgi->unget = TRUE;
    226       break;
    227     }
    228     while (l && isspace(s[l-1])) l--;
    229     p = s;
    230     while (*p && isspace(*p) && (p-s < l)) p++;
    231     err = string_append_char (line, ' ');
    232     if (err) break;
    233     err = string_appendn (line, p, l - (p-s));
    234     if (err) break;
    235     if (line->len > 50*1024*1024)
    236     {
    237       string_clear(line);
    238       return nerr_raise(NERR_ASSERT, "read_header_line exceeded 50MB");
    239     }
    240   }
    241   return nerr_pass (err);
    242 }
    243 
    244 static BOOL _is_boundary (char *boundary, char *s, int l, int *done)
    245 {
    246   static char *old_boundary = NULL;
    247   static int bl;
    248 
    249   /* cache the boundary strlen... more pointless optimization by blong */
    250   if (old_boundary != boundary)
    251   {
    252     old_boundary = boundary;
    253     bl = strlen(boundary);
    254   }
    255 
    256   if (s[l-1] != '\n')
    257     return FALSE;
    258   l--;
    259   if (s[l-1] == '\r')
    260     l--;
    261 
    262   if (bl+2 == l && s[0] == '-' && s[1] == '-' && !strncmp (s+2, boundary, bl))
    263     return TRUE;
    264   if (bl+4 == l && s[0] == '-' && s[1] == '-' &&
    265       !strncmp (s+2, boundary, bl) &&
    266       s[l-1] == '-' && s[l-2] == '-')
    267   {
    268     *done = 1;
    269     return TRUE;
    270   }
    271   return FALSE;
    272 }
    273 
    274 static NEOERR * _find_boundary (CGI *cgi, char *boundary, int *done)
    275 {
    276   NEOERR *err;
    277   char *s;
    278   int l;
    279 
    280   *done = 0;
    281   while (1)
    282   {
    283     err = _read_line (cgi, &s, &l, done);
    284     if (err) return nerr_pass (err);
    285     if ((l == 0) || (*done)) {
    286       *done = 1;
    287       return STATUS_OK;
    288     }
    289     if (_is_boundary(boundary, s, l, done))
    290       return STATUS_OK;
    291   }
    292   return STATUS_OK;
    293 }
    294 
    295 NEOERR *open_upload(CGI *cgi, int unlink_files, FILE **fpw)
    296 {
    297   NEOERR *err = STATUS_OK;
    298   FILE *fp;
    299   char path[_POSIX_PATH_MAX];
    300   int fd;
    301 
    302   *fpw = NULL;
    303 
    304   snprintf (path, sizeof(path), "%s/cgi_upload.XXXXXX",
    305       hdf_get_value(cgi->hdf, "Config.Upload.TmpDir", "/var/tmp"));
    306 
    307   fd = mkstemp(path);
    308   if (fd == -1)
    309   {
    310     return nerr_raise_errno (NERR_SYSTEM, "Unable to open temp file %s",
    311 	path);
    312   }
    313 
    314   fp = fdopen (fd, "w+");
    315   if (fp == NULL)
    316   {
    317     close(fd);
    318     return nerr_raise_errno (NERR_SYSTEM, "Unable to fdopen file %s", path);
    319   }
    320   if (unlink_files) unlink(path);
    321   if (cgi->files == NULL)
    322   {
    323     err = uListInit (&(cgi->files), 10, 0);
    324     if (err)
    325     {
    326       fclose(fp);
    327       return nerr_pass(err);
    328     }
    329   }
    330   err = uListAppend (cgi->files, fp);
    331   if (err)
    332   {
    333     fclose (fp);
    334     return nerr_pass(err);
    335   }
    336   if (!unlink_files) {
    337     if (cgi->filenames == NULL)
    338     {
    339       err = uListInit (&(cgi->filenames), 10, 0);
    340       if (err)
    341       {
    342 	fclose(fp);
    343 	return nerr_pass(err);
    344       }
    345     }
    346     err = uListAppend (cgi->filenames, strdup(path));
    347     if (err)
    348     {
    349       fclose (fp);
    350       return nerr_pass(err);
    351     }
    352   }
    353   *fpw = fp;
    354   return STATUS_OK;
    355 }
    356 
    357 static NEOERR * _read_part (CGI *cgi, char *boundary, int *done)
    358 {
    359   NEOERR *err = STATUS_OK;
    360   STRING str;
    361   HDF *child, *obj = NULL;
    362   FILE *fp = NULL;
    363   char buf[256];
    364   char *p;
    365   char *name = NULL, *filename = NULL;
    366   char *type = NULL, *tmp = NULL;
    367   char *last = NULL;
    368   int unlink_files = hdf_get_int_value(cgi->hdf, "Config.Upload.Unlink", 1);
    369 
    370   string_init (&str);
    371 
    372   while (1)
    373   {
    374     err = _read_header_line (cgi, &str, done);
    375     if (err) break;
    376     if (*done) break;
    377     if (str.buf == NULL || str.buf[0] == '\0') break;
    378     p = strchr (str.buf, ':');
    379     if (p)
    380     {
    381       *p = '\0';
    382       if (!strcasecmp(str.buf, "content-disposition"))
    383       {
    384 	err = _header_attr (p+1, "name", &name);
    385 	if (err) break;
    386 	err = _header_attr (p+1, "filename", &filename);
    387 	if (err) break;
    388       }
    389       else if (!strcasecmp(str.buf, "content-type"))
    390       {
    391 	err = _header_value (p+1, &type);
    392 	if (err) break;
    393       }
    394       else if (!strcasecmp(str.buf, "content-encoding"))
    395       {
    396 	err = _header_value (p+1, &tmp);
    397 	if (err) break;
    398 	if (tmp && strcmp(tmp, "7bit") && strcmp(tmp, "8bit") &&
    399 	    strcmp(tmp, "binary"))
    400 	{
    401 	  free(tmp);
    402 	  err = nerr_raise (NERR_ASSERT, "form-data encoding is not supported");
    403 	  break;
    404 	}
    405 	free(tmp);
    406       }
    407     }
    408     string_set(&str, "");
    409   }
    410   if (err)
    411   {
    412     string_clear(&str);
    413     if (name) free(name);
    414     if (filename) free(filename);
    415     if (type) free(type);
    416     return nerr_pass (err);
    417   }
    418 
    419   do
    420   {
    421     if (filename)
    422     {
    423       err = open_upload(cgi, unlink_files, &fp);
    424       if (err) break;
    425     }
    426 
    427     string_set(&str, "");
    428     while (!(*done))
    429     {
    430       char *s;
    431       int l, w;
    432 
    433       err = _read_line (cgi, &s, &l, done);
    434       if (err) break;
    435       if (*done || (l == 0)) break;
    436       if (_is_boundary(boundary, s, l, done)) break;
    437       if (filename)
    438       {
    439 	if (last) fwrite (last, sizeof(char), strlen(last), fp);
    440 	if (l > 1 && s[l-1] == '\n' && s[l-2] == '\r')
    441 	{
    442 	  last = "\r\n";
    443 	  l-=2;
    444 	}
    445 	else if (l > 0 && s[l-1] == '\n')
    446 	{
    447 	  last = "\n";
    448 	  l--;
    449 	}
    450 	else last = NULL;
    451 	w = fwrite (s, sizeof(char), l, fp);
    452 	if (w != l)
    453 	{
    454 	  err = nerr_raise_errno (NERR_IO,
    455 	      "Short write on file %s upload %d < %d", filename, w, l);
    456 	  break;
    457 	}
    458       }
    459       else
    460       {
    461 	err = string_appendn(&str, s, l);
    462 	if (err) break;
    463       }
    464     }
    465     if (err) break;
    466   } while (0);
    467 
    468   /* Set up the cgi data */
    469   if (!err)
    470   {
    471     do {
    472       /* FIXME: Hmm, if we've seen the same name here before, what should we do?
    473        */
    474       if (filename)
    475       {
    476 	fseek(fp, 0, SEEK_SET);
    477 	snprintf (buf, sizeof(buf), "Query.%s", name);
    478 	err = hdf_set_value (cgi->hdf, buf, filename);
    479 	if (!err && type)
    480 	{
    481 	  snprintf (buf, sizeof(buf), "Query.%s.Type", name);
    482 	  err = hdf_set_value (cgi->hdf, buf, type);
    483 	}
    484 	if (!err)
    485 	{
    486 	  snprintf (buf, sizeof(buf), "Query.%s.FileHandle", name);
    487 	  err = hdf_set_int_value (cgi->hdf, buf, uListLength(cgi->files));
    488 	}
    489 	if (!err && !unlink_files)
    490 	{
    491 	  char *path;
    492 	  snprintf (buf, sizeof(buf), "Query.%s.FileName", name);
    493 	  err = uListGet(cgi->filenames, uListLength(cgi->filenames)-1,
    494 	      (void *)&path);
    495 	  if (!err) err = hdf_set_value (cgi->hdf, buf, path);
    496 	}
    497       }
    498       else
    499       {
    500 	snprintf (buf, sizeof(buf), "Query.%s", name);
    501 	while (str.len && isspace(str.buf[str.len-1]))
    502 	{
    503 	  str.buf[str.len-1] = '\0';
    504 	  str.len--;
    505 	}
    506 	if (!(cgi->ignore_empty_form_vars && str.len == 0))
    507 	{
    508 	  /* If we've seen it before... we force it into a list */
    509 	  obj = hdf_get_obj (cgi->hdf, buf);
    510 	  if (obj != NULL)
    511 	  {
    512 	    int i = 0;
    513 	    char buf2[10];
    514 	    char *t;
    515 	    child = hdf_obj_child (obj);
    516 	    if (child == NULL)
    517 	    {
    518 	      t = hdf_obj_value (obj);
    519 	      err = hdf_set_value (obj, "0", t);
    520 	      if (err != STATUS_OK) break;
    521 	      i = 1;
    522 	    }
    523 	    else
    524 	    {
    525 	      while (child != NULL)
    526 	      {
    527 		i++;
    528 		child = hdf_obj_next (child);
    529 		if (err != STATUS_OK) break;
    530 	      }
    531 	      if (err != STATUS_OK) break;
    532 	    }
    533 	    snprintf (buf2, sizeof(buf2), "%d", i);
    534 	    err = hdf_set_value (obj, buf2, str.buf);
    535 	    if (err != STATUS_OK) break;
    536 	  }
    537 	  err = hdf_set_value (cgi->hdf, buf, str.buf);
    538 	}
    539       }
    540     } while (0);
    541   }
    542 
    543   string_clear(&str);
    544   if (name) free(name);
    545   if (filename) free(filename);
    546   if (type) free(type);
    547 
    548   return nerr_pass (err);
    549 }
    550 
    551 NEOERR * parse_rfc2388 (CGI *cgi)
    552 {
    553   NEOERR *err;
    554   char *ct_hdr;
    555   char *boundary = NULL;
    556   int l;
    557   int done = 0;
    558 
    559   l = hdf_get_int_value (cgi->hdf, "CGI.ContentLength", -1);
    560   ct_hdr = hdf_get_value (cgi->hdf, "CGI.ContentType", NULL);
    561   if (ct_hdr == NULL)
    562     return nerr_raise (NERR_ASSERT, "No content type header?");
    563 
    564   cgi->data_expected = l;
    565   cgi->data_read = 0;
    566   if (cgi->upload_cb)
    567   {
    568     if (cgi->upload_cb (cgi, cgi->data_read, cgi->data_expected))
    569       return nerr_raise (CGIUploadCancelled, "Upload Cancelled");
    570   }
    571 
    572   err = _header_attr (ct_hdr, "boundary", &boundary);
    573   if (err) return nerr_pass (err);
    574   err = _find_boundary(cgi, boundary, &done);
    575   while (!err && !done)
    576   {
    577     err = _read_part (cgi, boundary, &done);
    578   }
    579 
    580   if (boundary) free(boundary);
    581   return nerr_pass(err);
    582 }
    583 
    584 /* this is here because it gets populated in this file */
    585 FILE *cgi_filehandle (CGI *cgi, const char *form_name)
    586 {
    587   NEOERR *err;
    588   FILE *fp;
    589   char buf[256];
    590   int n;
    591 
    592   if ((form_name == NULL) || (form_name[0] == '\0'))
    593   {
    594     /* if NULL, then its the PUT data we're looking for... */
    595     n = hdf_get_int_value (cgi->hdf, "PUT.FileHandle", -1);
    596   }
    597   else
    598   {
    599     snprintf (buf, sizeof(buf), "Query.%s.FileHandle", form_name);
    600     n = hdf_get_int_value (cgi->hdf, buf, -1);
    601   }
    602   if (n == -1) return NULL;
    603   err = uListGet(cgi->files, n-1, (void *)&fp);
    604   if (err)
    605   {
    606     nerr_ignore(&err);
    607     return NULL;
    608   }
    609   return fp;
    610 }
    611