Home | History | Annotate | Download | only in cgi
      1 /*
      2  * Copyright 2001-2004 Brandon Long
      3  * All Rights Reserved.
      4  *
      5  * ClearSilver Templating System
      6  *
      7  * This code is made available under the terms of the ClearSilver License.
      8  * http://www.clearsilver.net/license.hdf
      9  *
     10  */
     11 
     12 #include "cs_config.h"
     13 
     14 #include <stdlib.h>
     15 #include <string.h>
     16 #include <sys/types.h>
     17 #include <regex.h>
     18 #include <ctype.h>
     19 #include "util/neo_misc.h"
     20 #include "util/neo_err.h"
     21 #include "util/neo_str.h"
     22 #include "html.h"
     23 #include "cgi.h"
     24 
     25 static int has_space_formatting(const char *src, int slen)
     26 {
     27   int spaces = 0;
     28   int returns = 0;
     29   int ascii_art = 0;
     30   int x = 0;
     31 
     32   for (x = 0; x < slen; x++)
     33   {
     34     if (src[x] == '\t') return 1;
     35     if (src[x] == ' ')
     36     {
     37       spaces++;
     38       if (x && (src[x-1] == '.'))
     39 	spaces--;
     40     }
     41     else if (src[x] == '\n')
     42     {
     43       spaces = 0;
     44       returns++;
     45     }
     46     else if (strchr ("/\\<>:[]!@#$%^&*()|", src[x]))
     47     {
     48       ascii_art++;
     49       if (ascii_art > 3) return 2;
     50     }
     51     else if (src[x] != '\r')
     52     {
     53       if (returns > 2) return 1;
     54       if (spaces > 2) return 1;
     55       returns = 0;
     56       spaces = 0;
     57       ascii_art = 0;
     58     }
     59   }
     60 
     61   return 0;
     62 }
     63 
     64 /*
     65 static int has_long_lines (char *s, int l)
     66 {
     67   char *ptr;
     68   int x = 0;
     69 
     70   while (x < l)
     71   {
     72     ptr = strchr (s + x, '\n');
     73     if (ptr == NULL)
     74     {
     75       if (l - x > 75) return 1;
     76       return 0;
     77     }
     78     if (ptr - (s + x) > 75) return 1;
     79     x = ptr - s + 1;
     80   }
     81   return 0;
     82 }
     83 */
     84 
     85 /* The first step is to actually find all of the URLs and email
     86  * addresses using our handy regular expressions.  We then mark these,
     87  * and then go through convert non-special areas with straight
     88  * text->html escapes, and convert special parts as special parts
     89  */
     90 struct _parts {
     91   int begin;
     92   int end;
     93   int type;
     94 };
     95 
     96 #define SC_TYPE_TEXT  1
     97 #define SC_TYPE_URL   2
     98 #define SC_TYPE_EMAIL 3
     99 
    100 static char *EmailRe = "[^][@:;<>\\\"()[:space:][:cntrl:]]+@[-+a-zA-Z0-9]+\\.[-+a-zA-Z0-9\\.]+[-+a-zA-Z0-9]";
    101 static char *URLRe = "((http|https|ftp|mailto):(//)?[^[:space:]>\"\t]*|www\\.[-a-z0-9\\.]+)[^[:space:];\t\">]*";
    102 
    103 static NEOERR *split_and_convert (const char *src, int slen,
    104                                   STRING *out, HTML_CONVERT_OPTS *opts)
    105 {
    106   NEOERR *err = STATUS_OK;
    107   static int compiled = 0;
    108   static regex_t email_re, url_re;
    109   regmatch_t email_match, url_match;
    110   int errcode;
    111   char *ptr, *esc;
    112   char errbuf[256];
    113   struct _parts *parts;
    114   int part_count;
    115   int part;
    116   int x, i;
    117   int spaces = 0;
    118 
    119   if (!compiled)
    120   {
    121     if ((errcode = regcomp (&email_re, EmailRe, REG_ICASE | REG_EXTENDED)))
    122     {
    123       regerror (errcode, &email_re, errbuf, sizeof(errbuf));
    124       return nerr_raise (NERR_PARSE, "Unable to compile EmailRE: %s", errbuf);
    125     }
    126     if ((errcode = regcomp (&url_re, URLRe, REG_ICASE | REG_EXTENDED)))
    127     {
    128       regerror (errcode, &url_re, errbuf, sizeof(errbuf));
    129       return nerr_raise (NERR_PARSE, "Unable to compile URLRe: %s", errbuf);
    130     }
    131     compiled = 1;
    132   }
    133 
    134   part_count = 20;
    135   parts = (struct _parts *) malloc (sizeof(struct _parts) * part_count);
    136   part = 0;
    137 
    138   x = 0;
    139   if (regexec (&email_re, src+x, 1, &email_match, 0) != 0)
    140   {
    141     email_match.rm_so = -1;
    142     email_match.rm_eo = -1;
    143   }
    144   else
    145   {
    146     email_match.rm_so += x;
    147     email_match.rm_eo += x;
    148   }
    149   if (regexec (&url_re, src+x, 1, &url_match, 0) != 0)
    150   {
    151     url_match.rm_so = -1;
    152     url_match.rm_eo = -1;
    153   }
    154   else
    155   {
    156     url_match.rm_so += x;
    157     url_match.rm_eo += x;
    158   }
    159   while ((x < slen) && !((email_match.rm_so == -1) && (url_match.rm_so == -1)))
    160   {
    161     if (part >= part_count)
    162     {
    163       part_count *= 2;
    164       parts = (struct _parts *) realloc (parts, sizeof(struct _parts) * part_count);
    165     }
    166     if ((url_match.rm_so != -1) && ((email_match.rm_so == -1) || (url_match.rm_so <= email_match.rm_so)))
    167     {
    168       parts[part].begin = url_match.rm_so;
    169       parts[part].end = url_match.rm_eo;
    170       parts[part].type = SC_TYPE_URL;
    171       x = parts[part].end + 1;
    172       part++;
    173       if (x < slen)
    174       {
    175 	if (regexec (&url_re, src+x, 1, &url_match, 0) != 0)
    176 	{
    177 	  url_match.rm_so = -1;
    178 	  url_match.rm_eo = -1;
    179 	}
    180 	else
    181 	{
    182 	  url_match.rm_so += x;
    183 	  url_match.rm_eo += x;
    184 	}
    185 	if ((email_match.rm_so != -1) && (x > email_match.rm_so))
    186 	{
    187 	  if (regexec (&email_re, src+x, 1, &email_match, 0) != 0)
    188 	  {
    189 	    email_match.rm_so = -1;
    190 	    email_match.rm_eo = -1;
    191 	  }
    192 	  else
    193 	  {
    194 	    email_match.rm_so += x;
    195 	    email_match.rm_eo += x;
    196 	  }
    197 	}
    198       }
    199     }
    200     else
    201     {
    202       parts[part].begin = email_match.rm_so;
    203       parts[part].end = email_match.rm_eo;
    204       parts[part].type = SC_TYPE_EMAIL;
    205       x = parts[part].end + 1;
    206       part++;
    207       if (x < slen)
    208       {
    209 	if (regexec (&email_re, src+x, 1, &email_match, 0) != 0)
    210 	{
    211 	  email_match.rm_so = -1;
    212 	  email_match.rm_eo = -1;
    213 	}
    214 	else
    215 	{
    216 	  email_match.rm_so += x;
    217 	  email_match.rm_eo += x;
    218 	}
    219 	if ((url_match.rm_so != -1) && (x > url_match.rm_so))
    220 	{
    221 	  if (regexec (&url_re, src+x, 1, &url_match, 0) != 0)
    222 	  {
    223 	    url_match.rm_so = -1;
    224 	    url_match.rm_eo = -1;
    225 	  }
    226 	  else
    227 	  {
    228 	    url_match.rm_so += x;
    229 	    url_match.rm_eo += x;
    230 	  }
    231 	}
    232       }
    233     }
    234   }
    235 
    236   i = 0;
    237   x = 0;
    238   while (x < slen)
    239   {
    240     if ((i >= part) || (x < parts[i].begin))
    241     {
    242       ptr = strpbrk(src + x, "&<>\r\n ");
    243       if (ptr == NULL)
    244       {
    245 	if (spaces)
    246 	{
    247 	  int sp;
    248 	  for (sp = 0; sp < spaces - 1; sp++)
    249 	  {
    250 	    err = string_append (out, "&nbsp;");
    251 	    if (err != STATUS_OK) break;
    252 	  }
    253 	  if (err != STATUS_OK) break;
    254 	  err = string_append_char (out, ' ');
    255 	}
    256 	spaces = 0;
    257 	if (i < part)
    258 	{
    259 	  err = string_appendn (out, src + x, parts[i].begin - x);
    260 	  x = parts[i].begin;
    261 	}
    262 	else
    263 	{
    264 	  err = string_append (out, src + x);
    265 	  x = slen;
    266 	}
    267       }
    268       else
    269       {
    270 	if ((i >= part) || ((ptr - src) < parts[i].begin))
    271 	{
    272 	  if (spaces)
    273 	  {
    274 	    int sp;
    275 	    for (sp = 0; sp < spaces - 1; sp++)
    276 	    {
    277 	      err = string_append (out, "&nbsp;");
    278 	      if (err != STATUS_OK) break;
    279 	    }
    280 	    if (err != STATUS_OK) break;
    281 	    err = string_append_char (out, ' ');
    282 	  }
    283 	  spaces = 0;
    284 	  err = string_appendn (out, src + x, (ptr - src) - x);
    285 	  if (err != STATUS_OK) break;
    286 	  x = ptr - src;
    287 	  if (src[x] == ' ')
    288 	  {
    289 	    if (opts->space_convert)
    290 	    {
    291 	      spaces++;
    292 	    }
    293 	    else
    294 	      err = string_append_char (out, ' ');
    295 	  }
    296 	  else
    297 	  {
    298 	    if (src[x] != '\n' && spaces)
    299 	    {
    300 	      int sp;
    301 	      for (sp = 0; sp < spaces - 1; sp++)
    302 	      {
    303 		err = string_append (out, "&nbsp;");
    304 		if (err != STATUS_OK) break;
    305 	      }
    306 	      if (err != STATUS_OK) break;
    307 	      err = string_append_char (out, ' ');
    308 	    }
    309 	    spaces = 0;
    310 
    311 	    if (src[x] == '&')
    312 	      err = string_append (out, "&amp;");
    313 	    else if (src[x] == '<')
    314 	      err = string_append (out, "&lt;");
    315 	    else if (src[x] == '>')
    316 	      err = string_append (out, "&gt;");
    317 	    else if (src[x] == '\n')
    318 	      if (opts->newlines_convert)
    319 		err = string_append (out, "<br/>\n");
    320 	      else if (x && src[x-1] == '\n')
    321 		err = string_append (out, "<p/>\n");
    322 	      else
    323 		err = string_append_char (out, '\n');
    324 	    else if (src[x] != '\r')
    325 	      err = nerr_raise (NERR_ASSERT, "src[x] == '%c'", src[x]);
    326 	  }
    327 	  x++;
    328 	}
    329 	else
    330 	{
    331 	  if (spaces)
    332 	  {
    333 	    int sp;
    334 	    for (sp = 0; sp < spaces - 1; sp++)
    335 	    {
    336 	      err = string_append (out, "&nbsp;");
    337 	      if (err != STATUS_OK) break;
    338 	    }
    339 	    if (err != STATUS_OK) break;
    340 	    err = string_append_char (out, ' ');
    341 	  }
    342 	  spaces = 0;
    343 	  err = string_appendn (out, src + x, parts[i].begin - x);
    344 	  x = parts[i].begin;
    345 	}
    346       }
    347     }
    348     else
    349     {
    350       if (spaces)
    351       {
    352 	int sp;
    353 	for (sp = 0; sp < spaces - 1; sp++)
    354 	{
    355 	  err = string_append (out, "&nbsp;");
    356 	  if (err != STATUS_OK) break;
    357 	}
    358 	if (err != STATUS_OK) break;
    359 	err = string_append_char (out, ' ');
    360       }
    361       spaces = 0;
    362       if (parts[i].type == SC_TYPE_URL)
    363       {
    364         char last_char = src[parts[i].end-1];
    365         int suffix=0;
    366         if (last_char == '.' || last_char == ',') { suffix=1; }
    367 	err = string_append (out, " <a ");
    368 	if (err != STATUS_OK) break;
    369 	if (opts->url_class)
    370 	{
    371 	    err = string_appendf (out, "class=%s ", opts->url_class);
    372 	    if (err) break;
    373 	}
    374 	if (opts->url_target)
    375 	{
    376 	  err = string_appendf (out, "target=\"%s\" ", opts->url_target);
    377 	  if (err) break;
    378 	}
    379 	err = string_append(out, "href=\"");
    380 	if (err) break;
    381 	if (opts->bounce_url)
    382 	{
    383 	  char *url, *esc_url, *new_url;
    384 	  int url_len;
    385 	  if (!strncasecmp(src + x, "www.", 4))
    386 	  {
    387 	    url_len = 7 + parts[i].end - x - suffix;
    388 	    url = (char *) malloc(url_len+1);
    389 	    if (url == NULL)
    390 	    {
    391 	      err = nerr_raise(NERR_NOMEM,
    392 		  "Unable to allocate memory to convert url");
    393 	      break;
    394 	    }
    395 	    strcpy(url, "http://");
    396 	    strncat(url, src + x, parts[i].end - x - suffix);
    397 	  }
    398 	  else
    399 	  {
    400 	    url_len = parts[i].end - x - suffix;
    401 	    url = (char *) malloc(url_len+1);
    402 	    if (url == NULL)
    403 	    {
    404 	      err = nerr_raise(NERR_NOMEM,
    405 		  "Unable to allocate memory to convert url");
    406 	      break;
    407 	    }
    408 	    strncpy(url, src + x, parts[i].end - x - suffix);
    409 	    url[url_len] = '\0';
    410 	  }
    411 	  err = cgi_url_escape(url, &esc_url);
    412 	  free(url);
    413 	  if (err) {
    414 	    free(esc_url);
    415 	    break;
    416 	  }
    417 
    418 	  new_url = sprintf_alloc(opts->bounce_url, esc_url);
    419 	  free(esc_url);
    420 	  if (new_url == NULL)
    421 	  {
    422 	    err = nerr_raise(NERR_NOMEM, "Unable to allocate memory to convert url");
    423 	    break;
    424 	  }
    425 	  err = string_append (out, new_url);
    426 	  free(new_url);
    427 	  if (err) break;
    428 	}
    429 	else
    430 	{
    431 	  if (!strncasecmp(src + x, "www.", 4))
    432 	  {
    433 	    err = string_append (out, "http://");
    434 	    if (err != STATUS_OK) break;
    435 	  }
    436 	  err = string_appendn (out, src + x, parts[i].end - x - suffix);
    437 	  if (err != STATUS_OK) break;
    438 	}
    439 	err = string_append (out, "\">");
    440 	if (err != STATUS_OK) break;
    441         if (opts->link_name) {
    442           err = html_escape_alloc((opts->link_name),
    443                                   strlen(opts->link_name), &esc);
    444         } else {
    445           err = html_escape_alloc((src + x), parts[i].end - x - suffix, &esc);
    446         }
    447 	if (err != STATUS_OK) break;
    448 	err = string_append (out, esc);
    449 	free(esc);
    450 	if (err != STATUS_OK) break;
    451 	err = string_append (out, "</a>");
    452         if (suffix) {
    453             err  = string_appendn(out,src + parts[i].end - 1,1);
    454 	    if (err != STATUS_OK) break;
    455         }
    456       }
    457       else /* type == SC_TYPE_EMAIL */
    458       {
    459 	err = string_append (out, "<a ");
    460 	if (err != STATUS_OK) break;
    461 	if (opts->mailto_class)
    462 	{
    463 	    err = string_appendf (out, "class=%s ", opts->mailto_class);
    464 	    if (err) break;
    465 	}
    466 	err = string_append(out, "href=\"mailto:");
    467 	if (err) break;
    468 	err = string_appendn (out, src + x, parts[i].end - x);
    469 	if (err != STATUS_OK) break;
    470 	err = string_append (out, "\">");
    471 	if (err != STATUS_OK) break;
    472 	err = html_escape_alloc(src + x, parts[i].end - x, &esc);
    473 	if (err != STATUS_OK) break;
    474 	err = string_append (out, esc);
    475 	free(esc);
    476 	if (err != STATUS_OK) break;
    477 	err = string_append (out, "</a>");
    478       }
    479       x = parts[i].end;
    480       i++;
    481     }
    482     if (err != STATUS_OK) break;
    483   }
    484   free (parts);
    485   return err;
    486 }
    487 
    488 static void strip_white_space_end (STRING *str)
    489 {
    490   int x = 0;
    491   int ol = str->len;
    492   char *ptr;
    493   int i;
    494 
    495   while (x < str->len)
    496   {
    497     ptr = strchr(str->buf + x, '\n');
    498     if (ptr == NULL)
    499     {
    500       /* just strip the white space at the end of the string */
    501       ol = strlen(str->buf);
    502       while (ol && isspace(str->buf[ol-1]))
    503       {
    504 	str->buf[ol - 1] = '\0';
    505 	ol--;
    506       }
    507       str->len = ol;
    508       return;
    509     }
    510     else
    511     {
    512       x = i = ptr - str->buf;
    513       if (x)
    514       {
    515 	x--;
    516 	while (x && isspace(str->buf[x]) && (str->buf[x] != '\n')) x--;
    517 	if (x) x++;
    518 	memmove (str->buf + x, ptr, ol - i + 1);
    519 	x++;
    520 	str->len -= ((i - x) + 1);
    521 	str->buf[str->len] = '\0';
    522 	ol = str->len;
    523       }
    524     }
    525   }
    526 }
    527 
    528 NEOERR *convert_text_html_alloc (const char *src, int slen,
    529                                  char **out)
    530 {
    531     return nerr_pass(convert_text_html_alloc_options(src, slen, out, NULL));
    532 }
    533 
    534 NEOERR *convert_text_html_alloc_options (const char *src, int slen,
    535                                          char **out,
    536                                          HTML_CONVERT_OPTS *opts)
    537 {
    538   NEOERR *err;
    539   STRING out_s;
    540   int formatting = 0;
    541   HTML_CONVERT_OPTS my_opts;
    542 
    543   string_init(&out_s);
    544 
    545   if (opts == NULL)
    546   {
    547     opts = &my_opts;
    548     opts->bounce_url = NULL;
    549     opts->url_class = NULL;
    550     opts->url_target = "_blank";
    551     opts->mailto_class = NULL;
    552     opts->long_lines = 0;
    553     opts->space_convert = 0;
    554     opts->newlines_convert = 1;
    555     opts->longline_width = 75; /* This hasn't been used in a while, actually */
    556     opts->check_ascii_art = 1;
    557     opts->link_name = NULL;
    558   }
    559 
    560   do
    561   {
    562     if  (opts->check_ascii_art)
    563     {
    564 	formatting = has_space_formatting (src, slen);
    565 	if (formatting) opts->space_convert = 1;
    566     }
    567     if (formatting == 2)
    568     {
    569       /* Do <pre> formatting */
    570       opts->newlines_convert = 1;
    571       err = string_append (&out_s, "<tt>");
    572       if (err != STATUS_OK) break;
    573       err = split_and_convert(src, slen, &out_s, opts);
    574       if (err != STATUS_OK) break;
    575       err = string_append (&out_s, "</tt>");
    576       if (err != STATUS_OK) break;
    577       /* Strip white space at end of lines */
    578       strip_white_space_end (&out_s);
    579     }
    580     else
    581     {
    582       /* int nl = has_long_lines (src, slen); */
    583       err = split_and_convert(src, slen, &out_s, opts);
    584     }
    585   } while (0);
    586   if (err != STATUS_OK)
    587   {
    588     string_clear (&out_s);
    589     return nerr_pass (err);
    590   }
    591   if (out_s.buf == NULL)
    592   {
    593     *out = strdup("");
    594   }
    595   else
    596   {
    597     *out = out_s.buf;
    598   }
    599   return STATUS_OK;
    600 }
    601 
    602 NEOERR *html_escape_alloc (const char *src, int slen,
    603                            char **out)
    604 {
    605   return nerr_pass(neos_html_escape(src, slen, out));
    606 }
    607 
    608 /* Replace ampersand with iso-8859-1 character code */
    609 static unsigned char _expand_amp_8859_1_char (const char *s)
    610 {
    611   if (s[0] == '\0')
    612     return 0;
    613 
    614   switch (s[0]) {
    615     case '#':
    616       if (s[1] == 'x') return strtol (s+2, NULL, 16);
    617       return strtol (s+1, NULL, 10);
    618     case 'a':
    619       if (!strcmp(s, "agrave")) return 0xe0; /*  */
    620       if (!strcmp(s, "aacute")) return 0xe1; /*  */
    621       if (!strcmp(s, "acirc")) return 0xe2; /*  */
    622       if (!strcmp(s, "atilde")) return 0xe3; /*  */
    623       if (!strcmp(s, "auml")) return 0xe4; /*  */
    624       if (!strcmp(s, "aring")) return 0xe5; /*  */
    625       if (!strcmp(s, "aelig")) return 0xe6; /*  */
    626       if (!strcmp(s, "amp")) return '&';
    627       return 0;
    628     case 'c':
    629       if (!strcmp(s, "ccedil")) return 0xe7; /*  */
    630       return 0;
    631     case 'e':
    632       if (!strcmp(s, "egrave")) return 0xe8; /*  */
    633       if (!strcmp(s, "eacute")) return 0xe9; /*  */
    634       if (!strcmp(s, "ecirc")) return 0xea; /*  */
    635       if (!strcmp(s, "euml")) return 0xeb; /*  */
    636       if (!strcmp(s, "eth")) return 0xf0; /*  */
    637       return 0;
    638     case 'i':
    639       if (!strcmp(s, "igrave")) return 0xec; /*  */
    640       if (!strcmp(s, "iacute")) return 0xed; /*  */
    641       if (!strcmp(s, "icirc")) return 0xee; /*  */
    642       if (!strcmp(s, "iuml")) return 0xef; /*  */
    643       return 0;
    644     case 'g':
    645       if (!strcmp(s, "gt")) return '>';
    646       return 0;
    647     case 'l':
    648       if (!strcmp(s, "lt")) return '<';
    649       return 0;
    650     case 'n':
    651       if (!strcmp(s, "ntilde")) return 0xf1; /*  */
    652       if (!strcmp(s, "nbsp")) return ' ';
    653       return 0;
    654     case 'o':
    655       if (!strcmp(s, "ograve")) return 0xf2; /*  */
    656       if (!strcmp(s, "oacute")) return 0xf3; /*  */
    657       if (!strcmp(s, "ocirc")) return 0xf4; /*  */
    658       if (!strcmp(s, "otilde")) return 0xf5; /*  */
    659       if (!strcmp(s, "ouml")) return 0xf6; /*  */
    660       if (!strcmp(s, "oslash")) return 0xf8; /*  */
    661       return 0;
    662     case 'q': /* quot */
    663       if (!strcmp(s, "quot")) return '"';
    664       return 0;
    665     case 's':
    666       if (!strcmp(s, "szlig")) return 0xdf; /*  */
    667       return 0;
    668     case 't':
    669       if (!strcmp(s, "thorn")) return 0xfe; /*  */
    670       return 0;
    671     case 'u':
    672       if (!strcmp(s, "ugrave")) return 0xf9; /*  */
    673       if (!strcmp(s, "uacute")) return 0xfa; /*  */
    674       if (!strcmp(s, "ucirc")) return 0xfb; /*  */
    675       if (!strcmp(s, "uuml")) return 0xfc; /*  */
    676       return 0;
    677     case 'y':
    678       if (!strcmp(s, "yacute")) return 0xfd; /*  */
    679 
    680   }
    681   return 0;
    682 }
    683 
    684 char *html_expand_amp_8859_1(const char *amp,
    685                                       char *buf)
    686 {
    687   unsigned char ch;
    688 
    689   ch = _expand_amp_8859_1_char(amp);
    690   if (ch == '\0')
    691   {
    692     if (!strcmp(amp, "copy")) return "(C)";
    693     return "";
    694   }
    695   else {
    696     buf[0] = (char)ch;
    697     buf[1] = '\0';
    698     return buf;
    699   }
    700 }
    701 
    702 NEOERR *html_strip_alloc(const char *src, int slen,
    703                          char **out)
    704 {
    705   NEOERR *err = STATUS_OK;
    706   STRING out_s;
    707   int x = 0;
    708   int strip_match = -1;
    709   int state = 0;
    710   char amp[10];
    711   int amp_start = 0;
    712   char buf[10];
    713   int ampl = 0;
    714 
    715   string_init(&out_s);
    716   err = string_append (&out_s, "");
    717   if (err) return nerr_pass (err);
    718 
    719   while (x < slen)
    720   {
    721     switch (state) {
    722       case 0:
    723 	/* Default */
    724 	if (src[x] == '&')
    725 	{
    726 	  state = 3;
    727 	  ampl = 0;
    728 	  amp_start = x;
    729 	}
    730 	else if (src[x] == '<')
    731 	{
    732 	  state = 1;
    733 	}
    734 	else
    735 	{
    736 	  if (strip_match == -1)
    737 	  {
    738 	    err = string_append_char(&out_s, src[x]);
    739 	    if (err) break;
    740 	  }
    741 	}
    742 	x++;
    743 	break;
    744       case 1:
    745 	/* Starting TAG */
    746 	if (src[x] == '>')
    747 	{
    748 	  state = 0;
    749 	}
    750 	else if (src[x] == '/')
    751 	{
    752 	}
    753 	else
    754 	{
    755 	}
    756 	x++;
    757 	break;
    758       case 2:
    759 	/* In TAG */
    760 	if (src[x] == '>')
    761 	{
    762 	  state = 0;
    763 	}
    764 	x++;
    765 	break;
    766       case 3:
    767 	/* In AMP */
    768 	if (src[x] == ';')
    769 	{
    770 	  amp[ampl] = '\0';
    771 	  state = 0;
    772 	  err = string_append(&out_s, html_expand_amp_8859_1(amp, buf));
    773 	  if (err) break;
    774 	}
    775 	else
    776 	{
    777 	  if (ampl < sizeof(amp)-1)
    778 	    amp[ampl++] = tolower(src[x]);
    779 	  else
    780 	  {
    781 	    /* broken html... just back up */
    782 	    x = amp_start;
    783 	    err = string_append_char(&out_s, src[x]);
    784 	    if (err) break;
    785 	    state = 0;
    786 	  }
    787 	}
    788 	x++;
    789 	break;
    790     }
    791     if (err) break;
    792   }
    793 
    794 
    795   if (err)
    796   {
    797     string_clear (&out_s);
    798     return nerr_pass (err);
    799   }
    800   *out = out_s.buf;
    801   return STATUS_OK;
    802 }
    803