Home | History | Annotate | Download | only in re2
      1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Unicode character groups.
      6 
      7 // The codes get split into ranges of 16-bit codes
      8 // and ranges of 32-bit codes.  It would be simpler
      9 // to use only 32-bit ranges, but these tables are large
     10 // enough to warrant extra care.
     11 //
     12 // Using just 32-bit ranges gives 27 kB of data.
     13 // Adding 16-bit ranges gives 18 kB of data.
     14 // Adding an extra table of 16-bit singletons would reduce
     15 // to 16.5 kB of data but make the data harder to use;
     16 // we don't bother.
     17 
     18 #ifndef RE2_UNICODE_GROUPS_H__
     19 #define RE2_UNICODE_GROUPS_H__
     20 
     21 #include "util/util.h"
     22 
     23 namespace re2 {
     24 
     25 struct URange16
     26 {
     27   uint16 lo;
     28   uint16 hi;
     29 };
     30 
     31 struct URange32
     32 {
     33   uint32 lo;
     34   uint32 hi;
     35 };
     36 
     37 struct UGroup
     38 {
     39   const char *name;
     40   int sign;  // +1 for [abc], -1 for [^abc]
     41   URange16 *r16;
     42   int nr16;
     43   URange32 *r32;
     44   int nr32;
     45 };
     46 
     47 // Named by property or script name (e.g., "Nd", "N", "Han").
     48 // Negated groups are not included.
     49 extern UGroup unicode_groups[];
     50 extern int num_unicode_groups;
     51 
     52 // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]").
     53 // Negated groups are included.
     54 extern UGroup posix_groups[];
     55 extern int num_posix_groups;
     56 
     57 // Named by Perl name (e.g., "\\d", "\\D").
     58 // Negated groups are included.
     59 extern UGroup perl_groups[];
     60 extern int num_perl_groups;
     61 
     62 }  // namespace re2
     63 
     64 #endif  // RE2_UNICODE_GROUPS_H__
     65