Home | History | Annotate | Download | only in gas
      1 /* This is the Assembler Pre-Processor
      2    Copyright (C) 1987-2016 Free Software Foundation, Inc.
      3 
      4    This file is part of GAS, the GNU Assembler.
      5 
      6    GAS is free software; you can redistribute it and/or modify
      7    it under the terms of the GNU General Public License as published by
      8    the Free Software Foundation; either version 3, or (at your option)
      9    any later version.
     10 
     11    GAS is distributed in the hope that it will be useful, but WITHOUT
     12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     14    License for more details.
     15 
     16    You should have received a copy of the GNU General Public License
     17    along with GAS; see the file COPYING.  If not, write to the Free
     18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
     19    02110-1301, USA.  */
     20 
     21 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
     22 /* App, the assembler pre-processor.  This pre-processor strips out
     23    excess spaces, turns single-quoted characters into a decimal
     24    constant, and turns the # in # <number> <filename> <garbage> into a
     25    .linefile.  This needs better error-handling.  */
     26 
     27 #include "as.h"
     28 
     29 #if (__STDC__ != 1)
     30 #ifndef const
     31 #define const  /* empty */
     32 #endif
     33 #endif
     34 
     35 #ifdef H_TICK_HEX
     36 int enable_h_tick_hex = 0;
     37 #endif
     38 
     39 #ifdef TC_M68K
     40 /* Whether we are scrubbing in m68k MRI mode.  This is different from
     41    flag_m68k_mri, because the two flags will be affected by the .mri
     42    pseudo-op at different times.  */
     43 static int scrub_m68k_mri;
     44 
     45 /* The pseudo-op which switches in and out of MRI mode.  See the
     46    comment in do_scrub_chars.  */
     47 static const char mri_pseudo[] = ".mri 0";
     48 #else
     49 #define scrub_m68k_mri 0
     50 #endif
     51 
     52 #if defined TC_ARM && defined OBJ_ELF
     53 /* The pseudo-op for which we need to special-case `@' characters.
     54    See the comment in do_scrub_chars.  */
     55 static const char   symver_pseudo[] = ".symver";
     56 static const char * symver_state;
     57 #endif
     58 
     59 static char lex[256];
     60 static const char symbol_chars[] =
     61 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
     62 
     63 #define LEX_IS_SYMBOL_COMPONENT		1
     64 #define LEX_IS_WHITESPACE		2
     65 #define LEX_IS_LINE_SEPARATOR		3
     66 #define LEX_IS_COMMENT_START		4
     67 #define LEX_IS_LINE_COMMENT_START	5
     68 #define	LEX_IS_TWOCHAR_COMMENT_1ST	6
     69 #define	LEX_IS_STRINGQUOTE		8
     70 #define	LEX_IS_COLON			9
     71 #define	LEX_IS_NEWLINE			10
     72 #define	LEX_IS_ONECHAR_QUOTE		11
     73 #ifdef TC_V850
     74 #define LEX_IS_DOUBLEDASH_1ST		12
     75 #endif
     76 #ifdef TC_M32R
     77 #define DOUBLEBAR_PARALLEL
     78 #endif
     79 #ifdef DOUBLEBAR_PARALLEL
     80 #define LEX_IS_DOUBLEBAR_1ST		13
     81 #endif
     82 #define LEX_IS_PARALLEL_SEPARATOR	14
     83 #ifdef H_TICK_HEX
     84 #define LEX_IS_H			15
     85 #endif
     86 #define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
     87 #define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
     88 #define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
     89 #define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
     90 #define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
     91 #define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
     92 #define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
     93 
     94 static int process_escape (int);
     95 
     96 /* FIXME-soon: The entire lexer/parser thingy should be
     97    built statically at compile time rather than dynamically
     98    each and every time the assembler is run.  xoxorich.  */
     99 
    100 void
    101 do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
    102 {
    103   const char *p;
    104   int c;
    105 
    106   lex[' '] = LEX_IS_WHITESPACE;
    107   lex['\t'] = LEX_IS_WHITESPACE;
    108   lex['\r'] = LEX_IS_WHITESPACE;
    109   lex['\n'] = LEX_IS_NEWLINE;
    110   lex[':'] = LEX_IS_COLON;
    111 
    112 #ifdef TC_M68K
    113   scrub_m68k_mri = m68k_mri;
    114 
    115   if (! m68k_mri)
    116 #endif
    117     {
    118       lex['"'] = LEX_IS_STRINGQUOTE;
    119 
    120 #if ! defined (TC_HPPA) && ! defined (TC_I370)
    121       /* I370 uses single-quotes to delimit integer, float constants.  */
    122       lex['\''] = LEX_IS_ONECHAR_QUOTE;
    123 #endif
    124 
    125 #ifdef SINGLE_QUOTE_STRINGS
    126       lex['\''] = LEX_IS_STRINGQUOTE;
    127 #endif
    128     }
    129 
    130   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
    131      in state 5 of do_scrub_chars must be changed.  */
    132 
    133   /* Note that these override the previous defaults, e.g. if ';' is a
    134      comment char, then it isn't a line separator.  */
    135   for (p = symbol_chars; *p; ++p)
    136     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
    137 
    138   for (c = 128; c < 256; ++c)
    139     lex[c] = LEX_IS_SYMBOL_COMPONENT;
    140 
    141 #ifdef tc_symbol_chars
    142   /* This macro permits the processor to specify all characters which
    143      may appears in an operand.  This will prevent the scrubber from
    144      discarding meaningful whitespace in certain cases.  The i386
    145      backend uses this to support prefixes, which can confuse the
    146      scrubber as to whether it is parsing operands or opcodes.  */
    147   for (p = tc_symbol_chars; *p; ++p)
    148     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
    149 #endif
    150 
    151   /* The m68k backend wants to be able to change comment_chars.  */
    152 #ifndef tc_comment_chars
    153 #define tc_comment_chars comment_chars
    154 #endif
    155   for (p = tc_comment_chars; *p; p++)
    156     lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
    157 
    158   for (p = line_comment_chars; *p; p++)
    159     lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
    160 
    161 #ifndef tc_line_separator_chars
    162 #define tc_line_separator_chars line_separator_chars
    163 #endif
    164   for (p = tc_line_separator_chars; *p; p++)
    165     lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
    166 
    167 #ifdef tc_parallel_separator_chars
    168   /* This macro permits the processor to specify all characters which
    169      separate parallel insns on the same line.  */
    170   for (p = tc_parallel_separator_chars; *p; p++)
    171     lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
    172 #endif
    173 
    174   /* Only allow slash-star comments if slash is not in use.
    175      FIXME: This isn't right.  We should always permit them.  */
    176   if (lex['/'] == 0)
    177     lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
    178 
    179 #ifdef TC_M68K
    180   if (m68k_mri)
    181     {
    182       lex['\''] = LEX_IS_STRINGQUOTE;
    183       lex[';'] = LEX_IS_COMMENT_START;
    184       lex['*'] = LEX_IS_LINE_COMMENT_START;
    185       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
    186 	 then it can't be used in an expression.  */
    187       lex['!'] = LEX_IS_LINE_COMMENT_START;
    188     }
    189 #endif
    190 
    191 #ifdef TC_V850
    192   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
    193 #endif
    194 #ifdef DOUBLEBAR_PARALLEL
    195   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
    196 #endif
    197 #ifdef TC_D30V
    198   /* Must do this is we want VLIW instruction with "->" or "<-".  */
    199   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
    200 #endif
    201 
    202 #ifdef H_TICK_HEX
    203   if (enable_h_tick_hex)
    204     {
    205       lex['h'] = LEX_IS_H;
    206       lex['H'] = LEX_IS_H;
    207     }
    208 #endif
    209 }
    210 
    211 /* Saved state of the scrubber.  */
    212 static int state;
    213 static int old_state;
    214 static const char *out_string;
    215 static char out_buf[20];
    216 static int add_newlines;
    217 static char *saved_input;
    218 static size_t saved_input_len;
    219 static char input_buffer[32 * 1024];
    220 static const char *mri_state;
    221 static char mri_last_ch;
    222 
    223 /* Data structure for saving the state of app across #include's.  Note that
    224    app is called asynchronously to the parsing of the .include's, so our
    225    state at the time .include is interpreted is completely unrelated.
    226    That's why we have to save it all.  */
    227 
    228 struct app_save
    229 {
    230   int          state;
    231   int          old_state;
    232   const char * out_string;
    233   char         out_buf[sizeof (out_buf)];
    234   int          add_newlines;
    235   char *       saved_input;
    236   size_t       saved_input_len;
    237 #ifdef TC_M68K
    238   int          scrub_m68k_mri;
    239 #endif
    240   const char * mri_state;
    241   char         mri_last_ch;
    242 #if defined TC_ARM && defined OBJ_ELF
    243   const char * symver_state;
    244 #endif
    245 };
    246 
    247 char *
    248 app_push (void)
    249 {
    250   struct app_save *saved;
    251 
    252   saved = XNEW (struct app_save);
    253   saved->state = state;
    254   saved->old_state = old_state;
    255   saved->out_string = out_string;
    256   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
    257   saved->add_newlines = add_newlines;
    258   if (saved_input == NULL)
    259     saved->saved_input = NULL;
    260   else
    261     {
    262       saved->saved_input = XNEWVEC (char, saved_input_len);
    263       memcpy (saved->saved_input, saved_input, saved_input_len);
    264       saved->saved_input_len = saved_input_len;
    265     }
    266 #ifdef TC_M68K
    267   saved->scrub_m68k_mri = scrub_m68k_mri;
    268 #endif
    269   saved->mri_state = mri_state;
    270   saved->mri_last_ch = mri_last_ch;
    271 #if defined TC_ARM && defined OBJ_ELF
    272   saved->symver_state = symver_state;
    273 #endif
    274 
    275   /* do_scrub_begin() is not useful, just wastes time.  */
    276 
    277   state = 0;
    278   saved_input = NULL;
    279   add_newlines = 0;
    280 
    281   return (char *) saved;
    282 }
    283 
    284 void
    285 app_pop (char *arg)
    286 {
    287   struct app_save *saved = (struct app_save *) arg;
    288 
    289   /* There is no do_scrub_end ().  */
    290   state = saved->state;
    291   old_state = saved->old_state;
    292   out_string = saved->out_string;
    293   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
    294   add_newlines = saved->add_newlines;
    295   if (saved->saved_input == NULL)
    296     saved_input = NULL;
    297   else
    298     {
    299       gas_assert (saved->saved_input_len <= sizeof (input_buffer));
    300       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
    301       saved_input = input_buffer;
    302       saved_input_len = saved->saved_input_len;
    303       free (saved->saved_input);
    304     }
    305 #ifdef TC_M68K
    306   scrub_m68k_mri = saved->scrub_m68k_mri;
    307 #endif
    308   mri_state = saved->mri_state;
    309   mri_last_ch = saved->mri_last_ch;
    310 #if defined TC_ARM && defined OBJ_ELF
    311   symver_state = saved->symver_state;
    312 #endif
    313 
    314   free (arg);
    315 }
    316 
    317 /* @@ This assumes that \n &c are the same on host and target.  This is not
    318    necessarily true.  */
    319 
    320 static int
    321 process_escape (int ch)
    322 {
    323   switch (ch)
    324     {
    325     case 'b':
    326       return '\b';
    327     case 'f':
    328       return '\f';
    329     case 'n':
    330       return '\n';
    331     case 'r':
    332       return '\r';
    333     case 't':
    334       return '\t';
    335     case '\'':
    336       return '\'';
    337     case '"':
    338       return '\"';
    339     default:
    340       return ch;
    341     }
    342 }
    343 
    344 /* This function is called to process input characters.  The GET
    345    parameter is used to retrieve more input characters.  GET should
    346    set its parameter to point to a buffer, and return the length of
    347    the buffer; it should return 0 at end of file.  The scrubbed output
    348    characters are put into the buffer starting at TOSTART; the TOSTART
    349    buffer is TOLEN bytes in length.  The function returns the number
    350    of scrubbed characters put into TOSTART.  This will be TOLEN unless
    351    end of file was seen.  This function is arranged as a state
    352    machine, and saves its state so that it may return at any point.
    353    This is the way the old code used to work.  */
    354 
    355 size_t
    356 do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
    357 {
    358   char *to = tostart;
    359   char *toend = tostart + tolen;
    360   char *from;
    361   char *fromend;
    362   size_t fromlen;
    363   int ch, ch2 = 0;
    364   /* Character that started the string we're working on.  */
    365   static char quotechar;
    366 
    367   /*State 0: beginning of normal line
    368 	  1: After first whitespace on line (flush more white)
    369 	  2: After first non-white (opcode) on line (keep 1white)
    370 	  3: after second white on line (into operands) (flush white)
    371 	  4: after putting out a .linefile, put out digits
    372 	  5: parsing a string, then go to old-state
    373 	  6: putting out \ escape in a "d string.
    374 	  7: no longer used
    375 	  8: no longer used
    376 	  9: After seeing symbol char in state 3 (keep 1white after symchar)
    377 	 10: After seeing whitespace in state 9 (keep white before symchar)
    378 	 11: After seeing a symbol character in state 0 (eg a label definition)
    379 	 -1: output string in out_string and go to the state in old_state
    380 	 -2: flush text until a '*' '/' is seen, then go to state old_state
    381 #ifdef TC_V850
    382 	 12: After seeing a dash, looking for a second dash as a start
    383 	     of comment.
    384 #endif
    385 #ifdef DOUBLEBAR_PARALLEL
    386 	 13: After seeing a vertical bar, looking for a second
    387 	     vertical bar as a parallel expression separator.
    388 #endif
    389 #ifdef TC_PREDICATE_START_CHAR
    390 	 14: After seeing a predicate start character at state 0, looking
    391 	     for a predicate end character as predicate.
    392 	 15: After seeing a predicate start character at state 1, looking
    393 	     for a predicate end character as predicate.
    394 #endif
    395 #ifdef TC_Z80
    396 	 16: After seeing an 'a' or an 'A' at the start of a symbol
    397 	 17: After seeing an 'f' or an 'F' in state 16
    398 #endif
    399 	  */
    400 
    401   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
    402      constructs like ``.loc 1 20''.  This was turning into ``.loc
    403      120''.  States 9 and 10 ensure that a space is never dropped in
    404      between characters which could appear in an identifier.  Ian
    405      Taylor, ian (at) cygnus.com.
    406 
    407      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
    408      correctly on the PA (and any other target where colons are optional).
    409      Jeff Law, law (at) cs.utah.edu.
    410 
    411      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
    412      get squashed into "cmp r1,r2||trap#1", with the all important space
    413      between the 'trap' and the '#1' being eliminated.  nickc (at) cygnus.com  */
    414 
    415   /* This macro gets the next input character.  */
    416 
    417 #define GET()							\
    418   (from < fromend						\
    419    ? * (unsigned char *) (from++)				\
    420    : (saved_input = NULL,					\
    421       fromlen = (*get) (input_buffer, sizeof input_buffer),	\
    422       from = input_buffer,					\
    423       fromend = from + fromlen,					\
    424       (fromlen == 0						\
    425        ? EOF							\
    426        : * (unsigned char *) (from++))))
    427 
    428   /* This macro pushes a character back on the input stream.  */
    429 
    430 #define UNGET(uch) (*--from = (uch))
    431 
    432   /* This macro puts a character into the output buffer.  If this
    433      character fills the output buffer, this macro jumps to the label
    434      TOFULL.  We use this rather ugly approach because we need to
    435      handle two different termination conditions: EOF on the input
    436      stream, and a full output buffer.  It would be simpler if we
    437      always read in the entire input stream before processing it, but
    438      I don't want to make such a significant change to the assembler's
    439      memory usage.  */
    440 
    441 #define PUT(pch)				\
    442   do						\
    443     {						\
    444       *to++ = (pch);				\
    445       if (to >= toend)				\
    446 	goto tofull;				\
    447     }						\
    448   while (0)
    449 
    450   if (saved_input != NULL)
    451     {
    452       from = saved_input;
    453       fromend = from + saved_input_len;
    454     }
    455   else
    456     {
    457       fromlen = (*get) (input_buffer, sizeof input_buffer);
    458       if (fromlen == 0)
    459 	return 0;
    460       from = input_buffer;
    461       fromend = from + fromlen;
    462     }
    463 
    464   while (1)
    465     {
    466       /* The cases in this switch end with continue, in order to
    467 	 branch back to the top of this while loop and generate the
    468 	 next output character in the appropriate state.  */
    469       switch (state)
    470 	{
    471 	case -1:
    472 	  ch = *out_string++;
    473 	  if (*out_string == '\0')
    474 	    {
    475 	      state = old_state;
    476 	      old_state = 3;
    477 	    }
    478 	  PUT (ch);
    479 	  continue;
    480 
    481 	case -2:
    482 	  for (;;)
    483 	    {
    484 	      do
    485 		{
    486 		  ch = GET ();
    487 
    488 		  if (ch == EOF)
    489 		    {
    490 		      as_warn (_("end of file in comment"));
    491 		      goto fromeof;
    492 		    }
    493 
    494 		  if (ch == '\n')
    495 		    PUT ('\n');
    496 		}
    497 	      while (ch != '*');
    498 
    499 	      while ((ch = GET ()) == '*')
    500 		;
    501 
    502 	      if (ch == EOF)
    503 		{
    504 		  as_warn (_("end of file in comment"));
    505 		  goto fromeof;
    506 		}
    507 
    508 	      if (ch == '/')
    509 		break;
    510 
    511 	      UNGET (ch);
    512 	    }
    513 
    514 	  state = old_state;
    515 	  UNGET (' ');
    516 	  continue;
    517 
    518 	case 4:
    519 	  ch = GET ();
    520 	  if (ch == EOF)
    521 	    goto fromeof;
    522 	  else if (ch >= '0' && ch <= '9')
    523 	    PUT (ch);
    524 	  else
    525 	    {
    526 	      while (ch != EOF && IS_WHITESPACE (ch))
    527 		ch = GET ();
    528 	      if (ch == '"')
    529 		{
    530 		  quotechar = ch;
    531 		  state = 5;
    532 		  old_state = 3;
    533 		  PUT (ch);
    534 		}
    535 	      else
    536 		{
    537 		  while (ch != EOF && ch != '\n')
    538 		    ch = GET ();
    539 		  state = 0;
    540 		  PUT (ch);
    541 		}
    542 	    }
    543 	  continue;
    544 
    545 	case 5:
    546 	  /* We are going to copy everything up to a quote character,
    547 	     with special handling for a backslash.  We try to
    548 	     optimize the copying in the simple case without using the
    549 	     GET and PUT macros.  */
    550 	  {
    551 	    char *s;
    552 	    ptrdiff_t len;
    553 
    554 	    for (s = from; s < fromend; s++)
    555 	      {
    556 		ch = *s;
    557 		if (ch == '\\'
    558 		    || ch == quotechar
    559 		    || ch == '\n')
    560 		  break;
    561 	      }
    562 	    len = s - from;
    563 	    if (len > toend - to)
    564 	      len = toend - to;
    565 	    if (len > 0)
    566 	      {
    567 		memcpy (to, from, len);
    568 		to += len;
    569 		from += len;
    570 		if (to >= toend)
    571 		  goto tofull;
    572 	      }
    573 	  }
    574 
    575 	  ch = GET ();
    576 	  if (ch == EOF)
    577 	    {
    578 	      /* This buffer is here specifically so
    579 		 that the UNGET below will work.  */
    580 	      static char one_char_buf[1];
    581 
    582 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
    583 	      state = old_state;
    584 	      from = fromend = one_char_buf + 1;
    585 	      fromlen = 1;
    586 	      UNGET ('\n');
    587 	      PUT (quotechar);
    588 	    }
    589 	  else if (ch == quotechar)
    590 	    {
    591 	      state = old_state;
    592 	      PUT (ch);
    593 	    }
    594 #ifndef NO_STRING_ESCAPES
    595 	  else if (ch == '\\')
    596 	    {
    597 	      state = 6;
    598 	      PUT (ch);
    599 	    }
    600 #endif
    601 	  else if (scrub_m68k_mri && ch == '\n')
    602 	    {
    603 	      /* Just quietly terminate the string.  This permits lines like
    604 		   bne	label	loop if we haven't reach end yet.  */
    605 	      state = old_state;
    606 	      UNGET (ch);
    607 	      PUT ('\'');
    608 	    }
    609 	  else
    610 	    {
    611 	      PUT (ch);
    612 	    }
    613 	  continue;
    614 
    615 	case 6:
    616 	  state = 5;
    617 	  ch = GET ();
    618 	  switch (ch)
    619 	    {
    620 	      /* Handle strings broken across lines, by turning '\n' into
    621 		 '\\' and 'n'.  */
    622 	    case '\n':
    623 	      UNGET ('n');
    624 	      add_newlines++;
    625 	      PUT ('\\');
    626 	      continue;
    627 
    628 	    case EOF:
    629 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
    630 	      PUT (quotechar);
    631 	      continue;
    632 
    633 	    case '"':
    634 	    case '\\':
    635 	    case 'b':
    636 	    case 'f':
    637 	    case 'n':
    638 	    case 'r':
    639 	    case 't':
    640 	    case 'v':
    641 	    case 'x':
    642 	    case 'X':
    643 	    case '0':
    644 	    case '1':
    645 	    case '2':
    646 	    case '3':
    647 	    case '4':
    648 	    case '5':
    649 	    case '6':
    650 	    case '7':
    651 	      break;
    652 
    653 	    default:
    654 #ifdef ONLY_STANDARD_ESCAPES
    655 	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
    656 #endif
    657 	      break;
    658 	    }
    659 	  PUT (ch);
    660 	  continue;
    661 
    662 #ifdef DOUBLEBAR_PARALLEL
    663 	case 13:
    664 	  ch = GET ();
    665 	  if (ch != '|')
    666 	    abort ();
    667 
    668 	  /* Reset back to state 1 and pretend that we are parsing a
    669 	     line from just after the first white space.  */
    670 	  state = 1;
    671 	  PUT ('|');
    672 #ifdef TC_TIC6X
    673 	  /* "||^" is used for SPMASKed instructions.  */
    674 	  ch = GET ();
    675 	  if (ch == EOF)
    676 	    goto fromeof;
    677 	  else if (ch == '^')
    678 	    PUT ('^');
    679 	  else
    680 	    UNGET (ch);
    681 #endif
    682 	  continue;
    683 #endif
    684 #ifdef TC_Z80
    685 	case 16:
    686 	  /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
    687 	  ch = GET ();
    688 	  if (ch == 'f' || ch == 'F')
    689 	    {
    690 	      state = 17;
    691 	      PUT (ch);
    692 	    }
    693 	  else
    694 	    {
    695 	      state = 9;
    696 	      break;
    697 	    }
    698 	case 17:
    699 	  /* We have seen "af" at the start of a symbol,
    700 	     a ' here is a part of that symbol.  */
    701 	  ch = GET ();
    702 	  state = 9;
    703 	  if (ch == '\'')
    704 	    /* Change to avoid warning about unclosed string.  */
    705 	    PUT ('`');
    706 	  else if (ch != EOF)
    707 	    UNGET (ch);
    708 	  break;
    709 #endif
    710 	}
    711 
    712       /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
    713 
    714       /* flushchar: */
    715       ch = GET ();
    716 
    717 #ifdef TC_PREDICATE_START_CHAR
    718       if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
    719 	{
    720 	  state += 14;
    721 	  PUT (ch);
    722 	  continue;
    723 	}
    724       else if (state == 14 || state == 15)
    725 	{
    726 	  if (ch == TC_PREDICATE_END_CHAR)
    727 	    {
    728 	      state -= 14;
    729 	      PUT (ch);
    730 	      ch = GET ();
    731 	    }
    732 	  else
    733 	    {
    734 	      PUT (ch);
    735 	      continue;
    736 	    }
    737 	}
    738 #endif
    739 
    740     recycle:
    741 
    742 #if defined TC_ARM && defined OBJ_ELF
    743       /* We need to watch out for .symver directives.  See the comment later
    744 	 in this function.  */
    745       if (symver_state == NULL)
    746 	{
    747 	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
    748 	    symver_state = symver_pseudo + 1;
    749 	}
    750       else
    751 	{
    752 	  /* We advance to the next state if we find the right
    753 	     character.  */
    754 	  if (ch != '\0' && (*symver_state == ch))
    755 	    ++symver_state;
    756 	  else if (*symver_state != '\0')
    757 	    /* We did not get the expected character, or we didn't
    758 	       get a valid terminating character after seeing the
    759 	       entire pseudo-op, so we must go back to the beginning.  */
    760 	    symver_state = NULL;
    761 	  else
    762 	    {
    763 	      /* We've read the entire pseudo-op.  If this is the end
    764 		 of the line, go back to the beginning.  */
    765 	      if (IS_NEWLINE (ch))
    766 		symver_state = NULL;
    767 	    }
    768 	}
    769 #endif /* TC_ARM && OBJ_ELF */
    770 
    771 #ifdef TC_M68K
    772       /* We want to have pseudo-ops which control whether we are in
    773 	 MRI mode or not.  Unfortunately, since m68k MRI mode affects
    774 	 the scrubber, that means that we need a special purpose
    775 	 recognizer here.  */
    776       if (mri_state == NULL)
    777 	{
    778 	  if ((state == 0 || state == 1)
    779 	      && ch == mri_pseudo[0])
    780 	    mri_state = mri_pseudo + 1;
    781 	}
    782       else
    783 	{
    784 	  /* We advance to the next state if we find the right
    785 	     character, or if we need a space character and we get any
    786 	     whitespace character, or if we need a '0' and we get a
    787 	     '1' (this is so that we only need one state to handle
    788 	     ``.mri 0'' and ``.mri 1'').  */
    789 	  if (ch != '\0'
    790 	      && (*mri_state == ch
    791 		  || (*mri_state == ' '
    792 		      && lex[ch] == LEX_IS_WHITESPACE)
    793 		  || (*mri_state == '0'
    794 		      && ch == '1')))
    795 	    {
    796 	      mri_last_ch = ch;
    797 	      ++mri_state;
    798 	    }
    799 	  else if (*mri_state != '\0'
    800 		   || (lex[ch] != LEX_IS_WHITESPACE
    801 		       && lex[ch] != LEX_IS_NEWLINE))
    802 	    {
    803 	      /* We did not get the expected character, or we didn't
    804 		 get a valid terminating character after seeing the
    805 		 entire pseudo-op, so we must go back to the
    806 		 beginning.  */
    807 	      mri_state = NULL;
    808 	    }
    809 	  else
    810 	    {
    811 	      /* We've read the entire pseudo-op.  mips_last_ch is
    812 		 either '0' or '1' indicating whether to enter or
    813 		 leave MRI mode.  */
    814 	      do_scrub_begin (mri_last_ch == '1');
    815 	      mri_state = NULL;
    816 
    817 	      /* We continue handling the character as usual.  The
    818 		 main gas reader must also handle the .mri pseudo-op
    819 		 to control expression parsing and the like.  */
    820 	    }
    821 	}
    822 #endif
    823 
    824       if (ch == EOF)
    825 	{
    826 	  if (state != 0)
    827 	    {
    828 	      as_warn (_("end of file not at end of a line; newline inserted"));
    829 	      state = 0;
    830 	      PUT ('\n');
    831 	    }
    832 	  goto fromeof;
    833 	}
    834 
    835       switch (lex[ch])
    836 	{
    837 	case LEX_IS_WHITESPACE:
    838 	  do
    839 	    {
    840 	      ch = GET ();
    841 	    }
    842 	  while (ch != EOF && IS_WHITESPACE (ch));
    843 	  if (ch == EOF)
    844 	    goto fromeof;
    845 
    846 	  if (state == 0)
    847 	    {
    848 	      /* Preserve a single whitespace character at the
    849 		 beginning of a line.  */
    850 	      state = 1;
    851 	      UNGET (ch);
    852 	      PUT (' ');
    853 	      break;
    854 	    }
    855 
    856 #ifdef KEEP_WHITE_AROUND_COLON
    857 	  if (lex[ch] == LEX_IS_COLON)
    858 	    {
    859 	      /* Only keep this white if there's no white *after* the
    860 		 colon.  */
    861 	      ch2 = GET ();
    862 	      if (ch2 != EOF)
    863 		UNGET (ch2);
    864 	      if (!IS_WHITESPACE (ch2))
    865 		{
    866 		  state = 9;
    867 		  UNGET (ch);
    868 		  PUT (' ');
    869 		  break;
    870 		}
    871 	    }
    872 #endif
    873 	  if (IS_COMMENT (ch)
    874 	      || ch == '/'
    875 	      || IS_LINE_SEPARATOR (ch)
    876 	      || IS_PARALLEL_SEPARATOR (ch))
    877 	    {
    878 	      if (scrub_m68k_mri)
    879 		{
    880 		  /* In MRI mode, we keep these spaces.  */
    881 		  UNGET (ch);
    882 		  PUT (' ');
    883 		  break;
    884 		}
    885 	      goto recycle;
    886 	    }
    887 
    888 	  /* If we're in state 2 or 11, we've seen a non-white
    889 	     character followed by whitespace.  If the next character
    890 	     is ':', this is whitespace after a label name which we
    891 	     normally must ignore.  In MRI mode, though, spaces are
    892 	     not permitted between the label and the colon.  */
    893 	  if ((state == 2 || state == 11)
    894 	      && lex[ch] == LEX_IS_COLON
    895 	      && ! scrub_m68k_mri)
    896 	    {
    897 	      state = 1;
    898 	      PUT (ch);
    899 	      break;
    900 	    }
    901 
    902 	  switch (state)
    903 	    {
    904 	    case 1:
    905 	      /* We can arrive here if we leave a leading whitespace
    906 		 character at the beginning of a line.  */
    907 	      goto recycle;
    908 	    case 2:
    909 	      state = 3;
    910 	      if (to + 1 < toend)
    911 		{
    912 		  /* Optimize common case by skipping UNGET/GET.  */
    913 		  PUT (' ');	/* Sp after opco */
    914 		  goto recycle;
    915 		}
    916 	      UNGET (ch);
    917 	      PUT (' ');
    918 	      break;
    919 	    case 3:
    920 #ifndef TC_KEEP_OPERAND_SPACES
    921 	      /* For TI C6X, we keep these spaces as they may separate
    922 		 functional unit specifiers from operands.  */
    923 	      if (scrub_m68k_mri)
    924 #endif
    925 		{
    926 		  /* In MRI mode, we keep these spaces.  */
    927 		  UNGET (ch);
    928 		  PUT (' ');
    929 		  break;
    930 		}
    931 	      goto recycle;	/* Sp in operands */
    932 	    case 9:
    933 	    case 10:
    934 #ifndef TC_KEEP_OPERAND_SPACES
    935 	      if (scrub_m68k_mri)
    936 #endif
    937 		{
    938 		  /* In MRI mode, we keep these spaces.  */
    939 		  state = 3;
    940 		  UNGET (ch);
    941 		  PUT (' ');
    942 		  break;
    943 		}
    944 	      state = 10;	/* Sp after symbol char */
    945 	      goto recycle;
    946 	    case 11:
    947 	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
    948 		state = 1;
    949 	      else
    950 		{
    951 		  /* We know that ch is not ':', since we tested that
    952 		     case above.  Therefore this is not a label, so it
    953 		     must be the opcode, and we've just seen the
    954 		     whitespace after it.  */
    955 		  state = 3;
    956 		}
    957 	      UNGET (ch);
    958 	      PUT (' ');	/* Sp after label definition.  */
    959 	      break;
    960 	    default:
    961 	      BAD_CASE (state);
    962 	    }
    963 	  break;
    964 
    965 	case LEX_IS_TWOCHAR_COMMENT_1ST:
    966 	  ch2 = GET ();
    967 	  if (ch2 == '*')
    968 	    {
    969 	      for (;;)
    970 		{
    971 		  do
    972 		    {
    973 		      ch2 = GET ();
    974 		      if (ch2 != EOF && IS_NEWLINE (ch2))
    975 			add_newlines++;
    976 		    }
    977 		  while (ch2 != EOF && ch2 != '*');
    978 
    979 		  while (ch2 == '*')
    980 		    ch2 = GET ();
    981 
    982 		  if (ch2 == EOF || ch2 == '/')
    983 		    break;
    984 
    985 		  /* This UNGET will ensure that we count newlines
    986 		     correctly.  */
    987 		  UNGET (ch2);
    988 		}
    989 
    990 	      if (ch2 == EOF)
    991 		as_warn (_("end of file in multiline comment"));
    992 
    993 	      ch = ' ';
    994 	      goto recycle;
    995 	    }
    996 #ifdef DOUBLESLASH_LINE_COMMENTS
    997 	  else if (ch2 == '/')
    998 	    {
    999 	      do
   1000 		{
   1001 		  ch = GET ();
   1002 		}
   1003 	      while (ch != EOF && !IS_NEWLINE (ch));
   1004 	      if (ch == EOF)
   1005 		as_warn ("end of file in comment; newline inserted");
   1006 	      state = 0;
   1007 	      PUT ('\n');
   1008 	      break;
   1009 	    }
   1010 #endif
   1011 	  else
   1012 	    {
   1013 	      if (ch2 != EOF)
   1014 		UNGET (ch2);
   1015 	      if (state == 9 || state == 10)
   1016 		state = 3;
   1017 	      PUT (ch);
   1018 	    }
   1019 	  break;
   1020 
   1021 	case LEX_IS_STRINGQUOTE:
   1022 	  quotechar = ch;
   1023 	  if (state == 10)
   1024 	    {
   1025 	      /* Preserve the whitespace in foo "bar".  */
   1026 	      UNGET (ch);
   1027 	      state = 3;
   1028 	      PUT (' ');
   1029 
   1030 	      /* PUT didn't jump out.  We could just break, but we
   1031 		 know what will happen, so optimize a bit.  */
   1032 	      ch = GET ();
   1033 	      old_state = 3;
   1034 	    }
   1035 	  else if (state == 9)
   1036 	    old_state = 3;
   1037 	  else
   1038 	    old_state = state;
   1039 	  state = 5;
   1040 	  PUT (ch);
   1041 	  break;
   1042 
   1043 #ifndef IEEE_STYLE
   1044 	case LEX_IS_ONECHAR_QUOTE:
   1045 #ifdef H_TICK_HEX
   1046 	  if (state == 9 && enable_h_tick_hex)
   1047 	    {
   1048 	      char c;
   1049 
   1050 	      c = GET ();
   1051 	      as_warn ("'%c found after symbol", c);
   1052 	      UNGET (c);
   1053 	    }
   1054 #endif
   1055 	  if (state == 10)
   1056 	    {
   1057 	      /* Preserve the whitespace in foo 'b'.  */
   1058 	      UNGET (ch);
   1059 	      state = 3;
   1060 	      PUT (' ');
   1061 	      break;
   1062 	    }
   1063 	  ch = GET ();
   1064 	  if (ch == EOF)
   1065 	    {
   1066 	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
   1067 	      ch = 0;
   1068 	    }
   1069 	  if (ch == '\\')
   1070 	    {
   1071 	      ch = GET ();
   1072 	      if (ch == EOF)
   1073 		{
   1074 		  as_warn (_("end of file in escape character"));
   1075 		  ch = '\\';
   1076 		}
   1077 	      else
   1078 		ch = process_escape (ch);
   1079 	    }
   1080 	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
   1081 
   1082 	  /* None of these 'x constants for us.  We want 'x'.  */
   1083 	  if ((ch = GET ()) != '\'')
   1084 	    {
   1085 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
   1086 	      as_warn (_("missing close quote; (assumed)"));
   1087 #else
   1088 	      if (ch != EOF)
   1089 		UNGET (ch);
   1090 #endif
   1091 	    }
   1092 	  if (strlen (out_buf) == 1)
   1093 	    {
   1094 	      PUT (out_buf[0]);
   1095 	      break;
   1096 	    }
   1097 	  if (state == 9)
   1098 	    old_state = 3;
   1099 	  else
   1100 	    old_state = state;
   1101 	  state = -1;
   1102 	  out_string = out_buf;
   1103 	  PUT (*out_string++);
   1104 	  break;
   1105 #endif
   1106 
   1107 	case LEX_IS_COLON:
   1108 #ifdef KEEP_WHITE_AROUND_COLON
   1109 	  state = 9;
   1110 #else
   1111 	  if (state == 9 || state == 10)
   1112 	    state = 3;
   1113 	  else if (state != 3)
   1114 	    state = 1;
   1115 #endif
   1116 	  PUT (ch);
   1117 	  break;
   1118 
   1119 	case LEX_IS_NEWLINE:
   1120 	  /* Roll out a bunch of newlines from inside comments, etc.  */
   1121 	  if (add_newlines)
   1122 	    {
   1123 	      --add_newlines;
   1124 	      UNGET (ch);
   1125 	    }
   1126 	  /* Fall through.  */
   1127 
   1128 	case LEX_IS_LINE_SEPARATOR:
   1129 	  state = 0;
   1130 	  PUT (ch);
   1131 	  break;
   1132 
   1133 	case LEX_IS_PARALLEL_SEPARATOR:
   1134 	  state = 1;
   1135 	  PUT (ch);
   1136 	  break;
   1137 
   1138 #ifdef TC_V850
   1139 	case LEX_IS_DOUBLEDASH_1ST:
   1140 	  ch2 = GET ();
   1141 	  if (ch2 != '-')
   1142 	    {
   1143 	      if (ch2 != EOF)
   1144 		UNGET (ch2);
   1145 	      goto de_fault;
   1146 	    }
   1147 	  /* Read and skip to end of line.  */
   1148 	  do
   1149 	    {
   1150 	      ch = GET ();
   1151 	    }
   1152 	  while (ch != EOF && ch != '\n');
   1153 
   1154 	  if (ch == EOF)
   1155 	    as_warn (_("end of file in comment; newline inserted"));
   1156 
   1157 	  state = 0;
   1158 	  PUT ('\n');
   1159 	  break;
   1160 #endif
   1161 #ifdef DOUBLEBAR_PARALLEL
   1162 	case LEX_IS_DOUBLEBAR_1ST:
   1163 	  ch2 = GET ();
   1164 	  if (ch2 != EOF)
   1165 	    UNGET (ch2);
   1166 	  if (ch2 != '|')
   1167 	    goto de_fault;
   1168 
   1169 	  /* Handle '||' in two states as invoking PUT twice might
   1170 	     result in the first one jumping out of this loop.  We'd
   1171 	     then lose track of the state and one '|' char.  */
   1172 	  state = 13;
   1173 	  PUT ('|');
   1174 	  break;
   1175 #endif
   1176 	case LEX_IS_LINE_COMMENT_START:
   1177 	  /* FIXME-someday: The two character comment stuff was badly
   1178 	     thought out.  On i386, we want '/' as line comment start
   1179 	     AND we want C style comments.  hence this hack.  The
   1180 	     whole lexical process should be reworked.  xoxorich.  */
   1181 	  if (ch == '/')
   1182 	    {
   1183 	      ch2 = GET ();
   1184 	      if (ch2 == '*')
   1185 		{
   1186 		  old_state = 3;
   1187 		  state = -2;
   1188 		  break;
   1189 		}
   1190 	      else
   1191 		{
   1192 		  UNGET (ch2);
   1193 		}
   1194 	    }
   1195 
   1196 	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
   1197 	    {
   1198 	      int startch;
   1199 
   1200 	      startch = ch;
   1201 
   1202 	      do
   1203 		{
   1204 		  ch = GET ();
   1205 		}
   1206 	      while (ch != EOF && IS_WHITESPACE (ch));
   1207 
   1208 	      if (ch == EOF)
   1209 		{
   1210 		  as_warn (_("end of file in comment; newline inserted"));
   1211 		  PUT ('\n');
   1212 		  break;
   1213 		}
   1214 
   1215 	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
   1216 		{
   1217 		  /* Not a cpp line.  */
   1218 		  while (ch != EOF && !IS_NEWLINE (ch))
   1219 		    ch = GET ();
   1220 		  if (ch == EOF)
   1221 		    {
   1222 		      as_warn (_("end of file in comment; newline inserted"));
   1223 		      PUT ('\n');
   1224 		    }
   1225 		  else /* IS_NEWLINE (ch) */
   1226 		    {
   1227 		      /* To process non-zero add_newlines.  */
   1228 		      UNGET (ch);
   1229 		    }
   1230 		  state = 0;
   1231 		  break;
   1232 		}
   1233 	      /* Looks like `# 123 "filename"' from cpp.  */
   1234 	      UNGET (ch);
   1235 	      old_state = 4;
   1236 	      state = -1;
   1237 	      if (scrub_m68k_mri)
   1238 		out_string = "\tlinefile ";
   1239 	      else
   1240 		out_string = "\t.linefile ";
   1241 	      PUT (*out_string++);
   1242 	      break;
   1243 	    }
   1244 
   1245 #ifdef TC_D10V
   1246 	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
   1247 	     Trap is the only short insn that has a first operand that is
   1248 	     neither register nor label.
   1249 	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
   1250 	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
   1251 	     already LEX_IS_LINE_COMMENT_START.  However, it is the
   1252 	     only character in line_comment_chars for d10v, hence we
   1253 	     can recognize it as such.  */
   1254 	  /* An alternative approach would be to reset the state to 1 when
   1255 	     we see '||', '<'- or '->', but that seems to be overkill.  */
   1256 	  if (state == 10)
   1257 	    PUT (' ');
   1258 #endif
   1259 	  /* We have a line comment character which is not at the
   1260 	     start of a line.  If this is also a normal comment
   1261 	     character, fall through.  Otherwise treat it as a default
   1262 	     character.  */
   1263 	  if (strchr (tc_comment_chars, ch) == NULL
   1264 	      && (! scrub_m68k_mri
   1265 		  || (ch != '!' && ch != '*')))
   1266 	    goto de_fault;
   1267 	  if (scrub_m68k_mri
   1268 	      && (ch == '!' || ch == '*' || ch == '#')
   1269 	      && state != 1
   1270 	      && state != 10)
   1271 	    goto de_fault;
   1272 	  /* Fall through.  */
   1273 	case LEX_IS_COMMENT_START:
   1274 #if defined TC_ARM && defined OBJ_ELF
   1275 	  /* On the ARM, `@' is the comment character.
   1276 	     Unfortunately this is also a special character in ELF .symver
   1277 	     directives (and .type, though we deal with those another way).
   1278 	     So we check if this line is such a directive, and treat
   1279 	     the character as default if so.  This is a hack.  */
   1280 	  if ((symver_state != NULL) && (*symver_state == 0))
   1281 	    goto de_fault;
   1282 #endif
   1283 
   1284 #ifdef TC_ARM
   1285 	  /* For the ARM, care is needed not to damage occurrences of \@
   1286 	     by stripping the @ onwards.  Yuck.  */
   1287 	  if (to > tostart && *(to - 1) == '\\')
   1288 	    /* Do not treat the @ as a start-of-comment.  */
   1289 	    goto de_fault;
   1290 #endif
   1291 
   1292 #ifdef WARN_COMMENTS
   1293 	  if (!found_comment)
   1294 	    found_comment_file = as_where (&found_comment);
   1295 #endif
   1296 	  do
   1297 	    {
   1298 	      ch = GET ();
   1299 	    }
   1300 	  while (ch != EOF && !IS_NEWLINE (ch));
   1301 	  if (ch == EOF)
   1302 	    as_warn (_("end of file in comment; newline inserted"));
   1303 	  state = 0;
   1304 	  PUT ('\n');
   1305 	  break;
   1306 
   1307 #ifdef H_TICK_HEX
   1308 	case LEX_IS_H:
   1309 	  /* Look for strings like H'[0-9A-Fa-f] and if found, replace
   1310 	     the H' with 0x to make them gas-style hex characters.  */
   1311 	  if (enable_h_tick_hex)
   1312 	    {
   1313 	      char quot;
   1314 
   1315 	      quot = GET ();
   1316 	      if (quot == '\'')
   1317 		{
   1318 		  UNGET ('x');
   1319 		  ch = '0';
   1320 		}
   1321 	      else
   1322 		UNGET (quot);
   1323 	    }
   1324 	  /* FALL THROUGH */
   1325 #endif
   1326 
   1327 	case LEX_IS_SYMBOL_COMPONENT:
   1328 	  if (state == 10)
   1329 	    {
   1330 	      /* This is a symbol character following another symbol
   1331 		 character, with whitespace in between.  We skipped
   1332 		 the whitespace earlier, so output it now.  */
   1333 	      UNGET (ch);
   1334 	      state = 3;
   1335 	      PUT (' ');
   1336 	      break;
   1337 	    }
   1338 
   1339 #ifdef TC_Z80
   1340 	  /* "af'" is a symbol containing '\''.  */
   1341 	  if (state == 3 && (ch == 'a' || ch == 'A'))
   1342 	    {
   1343 	      state = 16;
   1344 	      PUT (ch);
   1345 	      ch = GET ();
   1346 	      if (ch == 'f' || ch == 'F')
   1347 		{
   1348 		  state = 17;
   1349 		  PUT (ch);
   1350 		  break;
   1351 		}
   1352 	      else
   1353 		{
   1354 		  state = 9;
   1355 		  if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
   1356 		    {
   1357 		      if (ch != EOF)
   1358 			UNGET (ch);
   1359 		      break;
   1360 		    }
   1361 		}
   1362 	    }
   1363 #endif
   1364 	  if (state == 3)
   1365 	    state = 9;
   1366 
   1367 	  /* This is a common case.  Quickly copy CH and all the
   1368 	     following symbol component or normal characters.  */
   1369 	  if (to + 1 < toend
   1370 	      && mri_state == NULL
   1371 #if defined TC_ARM && defined OBJ_ELF
   1372 	      && symver_state == NULL
   1373 #endif
   1374 	      )
   1375 	    {
   1376 	      char *s;
   1377 	      ptrdiff_t len;
   1378 
   1379 	      for (s = from; s < fromend; s++)
   1380 		{
   1381 		  int type;
   1382 
   1383 		  ch2 = *(unsigned char *) s;
   1384 		  type = lex[ch2];
   1385 		  if (type != 0
   1386 		      && type != LEX_IS_SYMBOL_COMPONENT)
   1387 		    break;
   1388 		}
   1389 
   1390 	      if (s > from)
   1391 		/* Handle the last character normally, for
   1392 		   simplicity.  */
   1393 		--s;
   1394 
   1395 	      len = s - from;
   1396 
   1397 	      if (len > (toend - to) - 1)
   1398 		len = (toend - to) - 1;
   1399 
   1400 	      if (len > 0)
   1401 		{
   1402 		  PUT (ch);
   1403 		  memcpy (to, from, len);
   1404 		  to += len;
   1405 		  from += len;
   1406 		  if (to >= toend)
   1407 		    goto tofull;
   1408 		  ch = GET ();
   1409 		}
   1410 	    }
   1411 
   1412 	  /* Fall through.  */
   1413 	default:
   1414 	de_fault:
   1415 	  /* Some relatively `normal' character.  */
   1416 	  if (state == 0)
   1417 	    {
   1418 	      state = 11;	/* Now seeing label definition.  */
   1419 	    }
   1420 	  else if (state == 1)
   1421 	    {
   1422 	      state = 2;	/* Ditto.  */
   1423 	    }
   1424 	  else if (state == 9)
   1425 	    {
   1426 	      if (!IS_SYMBOL_COMPONENT (ch))
   1427 		state = 3;
   1428 	    }
   1429 	  else if (state == 10)
   1430 	    {
   1431 	      if (ch == '\\')
   1432 		{
   1433 		  /* Special handling for backslash: a backslash may
   1434 		     be the beginning of a formal parameter (of a
   1435 		     macro) following another symbol character, with
   1436 		     whitespace in between.  If that is the case, we
   1437 		     output a space before the parameter.  Strictly
   1438 		     speaking, correct handling depends upon what the
   1439 		     macro parameter expands into; if the parameter
   1440 		     expands into something which does not start with
   1441 		     an operand character, then we don't want to keep
   1442 		     the space.  We don't have enough information to
   1443 		     make the right choice, so here we are making the
   1444 		     choice which is more likely to be correct.  */
   1445 		  if (to + 1 >= toend)
   1446 		    {
   1447 		      /* If we're near the end of the buffer, save the
   1448 		         character for the next time round.  Otherwise
   1449 		         we'll lose our state.  */
   1450 		      UNGET (ch);
   1451 		      goto tofull;
   1452 		    }
   1453 		  *to++ = ' ';
   1454 		}
   1455 
   1456 	      state = 3;
   1457 	    }
   1458 	  PUT (ch);
   1459 	  break;
   1460 	}
   1461     }
   1462 
   1463   /*NOTREACHED*/
   1464 
   1465  fromeof:
   1466   /* We have reached the end of the input.  */
   1467   return to - tostart;
   1468 
   1469  tofull:
   1470   /* The output buffer is full.  Save any input we have not yet
   1471      processed.  */
   1472   if (fromend > from)
   1473     {
   1474       saved_input = from;
   1475       saved_input_len = fromend - from;
   1476     }
   1477   else
   1478     saved_input = NULL;
   1479 
   1480   return to - tostart;
   1481 }
   1482