Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10          New API code Copyright (c) 2016 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 
     42 #ifndef _PCRE2_UCP_H
     43 #define _PCRE2_UCP_H
     44 
     45 /* This file contains definitions of the property values that are returned by
     46 the UCD access macros. New values that are added for new releases of Unicode
     47 should always be at the end of each enum, for backwards compatibility.
     48 
     49 IMPORTANT: Note also that the specific numeric values of the enums have to be
     50 the same as the values that are generated by the maint/MultiStage2.py script,
     51 where the equivalent property descriptive names are listed in vectors.
     52 
     53 ALSO: The specific values of the first two enums are assumed for the table
     54 called catposstab in pcre2_compile.c. */
     55 
     56 /* These are the general character categories. */
     57 
     58 enum {
     59   ucp_C,     /* Other */
     60   ucp_L,     /* Letter */
     61   ucp_M,     /* Mark */
     62   ucp_N,     /* Number */
     63   ucp_P,     /* Punctuation */
     64   ucp_S,     /* Symbol */
     65   ucp_Z      /* Separator */
     66 };
     67 
     68 /* These are the particular character categories. */
     69 
     70 enum {
     71   ucp_Cc,    /* Control */
     72   ucp_Cf,    /* Format */
     73   ucp_Cn,    /* Unassigned */
     74   ucp_Co,    /* Private use */
     75   ucp_Cs,    /* Surrogate */
     76   ucp_Ll,    /* Lower case letter */
     77   ucp_Lm,    /* Modifier letter */
     78   ucp_Lo,    /* Other letter */
     79   ucp_Lt,    /* Title case letter */
     80   ucp_Lu,    /* Upper case letter */
     81   ucp_Mc,    /* Spacing mark */
     82   ucp_Me,    /* Enclosing mark */
     83   ucp_Mn,    /* Non-spacing mark */
     84   ucp_Nd,    /* Decimal number */
     85   ucp_Nl,    /* Letter number */
     86   ucp_No,    /* Other number */
     87   ucp_Pc,    /* Connector punctuation */
     88   ucp_Pd,    /* Dash punctuation */
     89   ucp_Pe,    /* Close punctuation */
     90   ucp_Pf,    /* Final punctuation */
     91   ucp_Pi,    /* Initial punctuation */
     92   ucp_Po,    /* Other punctuation */
     93   ucp_Ps,    /* Open punctuation */
     94   ucp_Sc,    /* Currency symbol */
     95   ucp_Sk,    /* Modifier symbol */
     96   ucp_Sm,    /* Mathematical symbol */
     97   ucp_So,    /* Other symbol */
     98   ucp_Zl,    /* Line separator */
     99   ucp_Zp,    /* Paragraph separator */
    100   ucp_Zs     /* Space separator */
    101 };
    102 
    103 /* These are grapheme break properties. Note that the code for processing them
    104 assumes that the values are less than 16. If more values are added that take
    105 the number to 16 or more, the code will have to be rewritten. */
    106 
    107 enum {
    108   ucp_gbCR,                /*  0 */
    109   ucp_gbLF,                /*  1 */
    110   ucp_gbControl,           /*  2 */
    111   ucp_gbExtend,            /*  3 */
    112   ucp_gbPrepend,           /*  4 */
    113   ucp_gbSpacingMark,       /*  5 */
    114   ucp_gbL,                 /*  6 Hangul syllable type L */
    115   ucp_gbV,                 /*  7 Hangul syllable type V */
    116   ucp_gbT,                 /*  8 Hangul syllable type T */
    117   ucp_gbLV,                /*  9 Hangul syllable type LV */
    118   ucp_gbLVT,               /* 10 Hangul syllable type LVT */
    119   ucp_gbRegionalIndicator, /* 11 */
    120   ucp_gbOther              /* 12 */
    121 };
    122 
    123 /* These are the script identifications. */
    124 
    125 enum {
    126   ucp_Arabic,
    127   ucp_Armenian,
    128   ucp_Bengali,
    129   ucp_Bopomofo,
    130   ucp_Braille,
    131   ucp_Buginese,
    132   ucp_Buhid,
    133   ucp_Canadian_Aboriginal,
    134   ucp_Cherokee,
    135   ucp_Common,
    136   ucp_Coptic,
    137   ucp_Cypriot,
    138   ucp_Cyrillic,
    139   ucp_Deseret,
    140   ucp_Devanagari,
    141   ucp_Ethiopic,
    142   ucp_Georgian,
    143   ucp_Glagolitic,
    144   ucp_Gothic,
    145   ucp_Greek,
    146   ucp_Gujarati,
    147   ucp_Gurmukhi,
    148   ucp_Han,
    149   ucp_Hangul,
    150   ucp_Hanunoo,
    151   ucp_Hebrew,
    152   ucp_Hiragana,
    153   ucp_Inherited,
    154   ucp_Kannada,
    155   ucp_Katakana,
    156   ucp_Kharoshthi,
    157   ucp_Khmer,
    158   ucp_Lao,
    159   ucp_Latin,
    160   ucp_Limbu,
    161   ucp_Linear_B,
    162   ucp_Malayalam,
    163   ucp_Mongolian,
    164   ucp_Myanmar,
    165   ucp_New_Tai_Lue,
    166   ucp_Ogham,
    167   ucp_Old_Italic,
    168   ucp_Old_Persian,
    169   ucp_Oriya,
    170   ucp_Osmanya,
    171   ucp_Runic,
    172   ucp_Shavian,
    173   ucp_Sinhala,
    174   ucp_Syloti_Nagri,
    175   ucp_Syriac,
    176   ucp_Tagalog,
    177   ucp_Tagbanwa,
    178   ucp_Tai_Le,
    179   ucp_Tamil,
    180   ucp_Telugu,
    181   ucp_Thaana,
    182   ucp_Thai,
    183   ucp_Tibetan,
    184   ucp_Tifinagh,
    185   ucp_Ugaritic,
    186   ucp_Yi,
    187   /* New for Unicode 5.0: */
    188   ucp_Balinese,
    189   ucp_Cuneiform,
    190   ucp_Nko,
    191   ucp_Phags_Pa,
    192   ucp_Phoenician,
    193   /* New for Unicode 5.1: */
    194   ucp_Carian,
    195   ucp_Cham,
    196   ucp_Kayah_Li,
    197   ucp_Lepcha,
    198   ucp_Lycian,
    199   ucp_Lydian,
    200   ucp_Ol_Chiki,
    201   ucp_Rejang,
    202   ucp_Saurashtra,
    203   ucp_Sundanese,
    204   ucp_Vai,
    205   /* New for Unicode 5.2: */
    206   ucp_Avestan,
    207   ucp_Bamum,
    208   ucp_Egyptian_Hieroglyphs,
    209   ucp_Imperial_Aramaic,
    210   ucp_Inscriptional_Pahlavi,
    211   ucp_Inscriptional_Parthian,
    212   ucp_Javanese,
    213   ucp_Kaithi,
    214   ucp_Lisu,
    215   ucp_Meetei_Mayek,
    216   ucp_Old_South_Arabian,
    217   ucp_Old_Turkic,
    218   ucp_Samaritan,
    219   ucp_Tai_Tham,
    220   ucp_Tai_Viet,
    221   /* New for Unicode 6.0.0: */
    222   ucp_Batak,
    223   ucp_Brahmi,
    224   ucp_Mandaic,
    225   /* New for Unicode 6.1.0: */
    226   ucp_Chakma,
    227   ucp_Meroitic_Cursive,
    228   ucp_Meroitic_Hieroglyphs,
    229   ucp_Miao,
    230   ucp_Sharada,
    231   ucp_Sora_Sompeng,
    232   ucp_Takri,
    233   /* New for Unicode 7.0.0: */
    234   ucp_Bassa_Vah,
    235   ucp_Caucasian_Albanian,
    236   ucp_Duployan,
    237   ucp_Elbasan,
    238   ucp_Grantha,
    239   ucp_Khojki,
    240   ucp_Khudawadi,
    241   ucp_Linear_A,
    242   ucp_Mahajani,
    243   ucp_Manichaean,
    244   ucp_Mende_Kikakui,
    245   ucp_Modi,
    246   ucp_Mro,
    247   ucp_Nabataean,
    248   ucp_Old_North_Arabian,
    249   ucp_Old_Permic,
    250   ucp_Pahawh_Hmong,
    251   ucp_Palmyrene,
    252   ucp_Psalter_Pahlavi,
    253   ucp_Pau_Cin_Hau,
    254   ucp_Siddham,
    255   ucp_Tirhuta,
    256   ucp_Warang_Citi,
    257   /* New for Unicode 8.0.0: */
    258   ucp_Ahom,
    259   ucp_Anatolian_Hieroglyphs,
    260   ucp_Hatran,
    261   ucp_Multani,
    262   ucp_Old_Hungarian,
    263   ucp_SignWriting
    264 };
    265 
    266 #endif
    267 
    268 /* End of pcre2_ucp.h */
    269