Home | History | Annotate | Download | only in include
      1 /* Copyright (C) 1996, 1997, 1998, 1999, 2004 Free Software Foundation, Inc.
      2    This file is part of the GNU C Library.
      3    Contributed by Ulrich Drepper <drepper (at) cygnus.com>, 1996.
      4 
      5    The GNU C Library is free software; you can redistribute it and/or
      6    modify it under the terms of the GNU Lesser General Public
      7    License as published by the Free Software Foundation; either
      8    version 2.1 of the License, or (at your option) any later version.
      9 
     10    The GNU C Library is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13    Lesser General Public License for more details.
     14 
     15    You should have received a copy of the GNU Lesser General Public
     16    License along with the GNU C Library; if not, write to the Free
     17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     18    02111-1307 USA.  */
     19 
     20 #ifndef _REGEXP_H
     21 #define _REGEXP_H	1
     22 
     23 /* The contents of this header file was first standardized in X/Open
     24    System Interface and Headers Issue 2, originally coming from SysV.
     25    In issue 4, version 2, it is marked as TO BE WITDRAWN, and it has
     26    been withdrawn in SUSv3.
     27 
     28    This code shouldn't be used in any newly written code.  It is
     29    included only for compatibility reasons.  Use the POSIX definition
     30    in <regex.h> for portable applications and a reasonable interface.  */
     31 
     32 #include <features.h>
     33 #include <alloca.h>
     34 #include <regex.h>
     35 #include <stdlib.h>
     36 #include <string.h>
     37 
     38 /* The implementation provided here emulates the needed functionality
     39    by mapping to the POSIX regular expression matcher.  The interface
     40    for the here included function is weird (this really is a harmless
     41    word).
     42 
     43    The user has to provide six macros before this header file can be
     44    included:
     45 
     46    INIT		Declarations vor variables which can be used by the
     47 		other macros.
     48 
     49    GETC()	Return the value of the next character in the regular
     50 		expression pattern.  Successive calls should return
     51 		successive characters.
     52 
     53    PEEKC()	Return the value of the next character in the regular
     54 		expression pattern.  Immediately successive calls to
     55 		PEEKC() should return the same character which should
     56 		also be the next character returned by GETC().
     57 
     58    UNGETC(c)	Cause `c' to be returned by the next call to GETC() and
     59 		PEEKC().
     60 
     61    RETURN(ptr)	Used for normal exit of the `compile' function.  `ptr'
     62 		is a pointer to the character after the last character of
     63 		the compiled regular expression.
     64 
     65    ERROR(val)	Used for abnormal return from `compile'.  `val' is the
     66 		error number.  The error codes are:
     67 		11	Range endpoint too large.
     68 		16	Bad number.
     69 		25	\digit out of range.
     70 		36	Illegal or missing delimiter.
     71 		41	No remembered search string.
     72 		42	\( \) imbalance.
     73 		43	Too many \(.
     74 		44	More tan two numbers given in \{ \}.
     75 		45	} expected after \.
     76 		46	First number exceeds second in \{ \}.
     77 		49	[ ] imbalance.
     78 		50	Regular expression overflow.
     79 
     80   */
     81 
     82 __BEGIN_DECLS
     83 
     84 /* Interface variables.  They contain the results of the successful
     85    calls to `setp' and `advance'.  */
     86 extern char *loc1;
     87 extern char *loc2;
     88 
     89 /* The use of this variable in the `advance' function is not
     90    supported.  */
     91 extern char *locs;
     92 
     93 
     94 #ifndef __DO_NOT_DEFINE_COMPILE
     95 /* Get and compile the user supplied pattern up to end of line or
     96    string or until EOF is seen, whatever happens first.  The result is
     97    placed in the buffer starting at EXPBUF and delimited by ENDBUF.
     98 
     99    This function cannot be defined in the libc itself since it depends
    100    on the macros.  */
    101 char *
    102 compile (char *__restrict instring, char *__restrict expbuf,
    103 	 __const char *__restrict endbuf, int eof)
    104 {
    105   char *__input_buffer = NULL;
    106   size_t __input_size = 0;
    107   size_t __current_size = 0;
    108   int __ch;
    109   int __error;
    110   INIT
    111 
    112   /* Align the expression buffer according to the needs for an object
    113      of type `regex_t'.  Then check for minimum size of the buffer for
    114      the compiled regular expression.  */
    115   regex_t *__expr_ptr;
    116 # if defined __GNUC__ && __GNUC__ >= 2
    117   const size_t __req = __alignof__ (regex_t *);
    118 # else
    119   /* How shall we find out?  We simply guess it and can change it is
    120      this really proofs to be wrong.  */
    121   const size_t __req = 8;
    122 # endif
    123   expbuf += __req;
    124   expbuf -= (expbuf - ((char *) 0)) % __req;
    125   if (endbuf < expbuf + sizeof (regex_t))
    126     {
    127       ERROR (50);
    128     }
    129   __expr_ptr = (regex_t *) expbuf;
    130   /* The remaining space in the buffer can be used for the compiled
    131      pattern.  */
    132   __expr_ptr->buffer = expbuf + sizeof (regex_t);
    133   __expr_ptr->allocated = endbuf -  (char *) __expr_ptr->buffer;
    134 
    135   while ((__ch = (GETC ())) != eof)
    136     {
    137       if (__ch == '\0' || __ch == '\n')
    138 	{
    139 	  UNGETC (__ch);
    140 	  break;
    141 	}
    142 
    143       if (__current_size + 1 >= __input_size)
    144 	{
    145 	  size_t __new_size = __input_size ? 2 * __input_size : 128;
    146 	  char *__new_room = (char *) alloca (__new_size);
    147 	  /* See whether we can use the old buffer.  */
    148 	  if (__new_room + __new_size == __input_buffer)
    149 	    {
    150 	      __input_size += __new_size;
    151 	      __input_buffer = (char *) memcpy (__new_room, __input_buffer,
    152 					       __current_size);
    153 	    }
    154 	  else if (__input_buffer + __input_size == __new_room)
    155 	    __input_size += __new_size;
    156 	  else
    157 	    {
    158 	      __input_size = __new_size;
    159 	      __input_buffer = (char *) memcpy (__new_room, __input_buffer,
    160 						__current_size);
    161 	    }
    162 	}
    163       __input_buffer[__current_size++] = __ch;
    164     }
    165   __input_buffer[__current_size++] = '\0';
    166 
    167   /* Now compile the pattern.  */
    168   __error = regcomp (__expr_ptr, __input_buffer, REG_NEWLINE);
    169   if (__error != 0)
    170     /* Oh well, we have to translate POSIX error codes.  */
    171     switch (__error)
    172       {
    173       case REG_BADPAT:
    174       case REG_ECOLLATE:
    175       case REG_ECTYPE:
    176       case REG_EESCAPE:
    177       case REG_BADRPT:
    178       case REG_EEND:
    179       case REG_ERPAREN:
    180       default:
    181 	/* There is no matching error code.  */
    182 	RETURN (36);
    183       case REG_ESUBREG:
    184 	RETURN (25);
    185       case REG_EBRACK:
    186 	RETURN (49);
    187       case REG_EPAREN:
    188 	RETURN (42);
    189       case REG_EBRACE:
    190 	RETURN (44);
    191       case REG_BADBR:
    192 	RETURN (46);
    193       case REG_ERANGE:
    194 	RETURN (11);
    195       case REG_ESPACE:
    196       case REG_ESIZE:
    197 	ERROR (50);
    198       }
    199 
    200   /* Everything is ok.  */
    201   RETURN ((char *) (__expr_ptr->buffer + __expr_ptr->used));
    202 }
    203 #endif
    204 
    205 
    206 /* Find the next match in STRING.  The compiled regular expression is
    207    found in the buffer starting at EXPBUF.  `loc1' will return the
    208    first character matched and `loc2' points to the next unmatched
    209    character.  */
    210 extern int step (__const char *__restrict __string,
    211 		 __const char *__restrict __expbuf) __THROW;
    212 
    213 /* Match the beginning of STRING with the compiled regular expression
    214    in EXPBUF.  If the match is successful `loc2' will contain the
    215    position of the first unmatched character.  */
    216 extern int advance (__const char *__restrict __string,
    217 		    __const char *__restrict __expbuf) __THROW;
    218 
    219 
    220 __END_DECLS
    221 
    222 #endif /* regexp.h */
    223