Home | History | Annotate | Download | only in intltest
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4  *******************************************************************************
      5  *
      6  *   Copyright (C) 2003-2011, International Business Machines
      7  *   Corporation and others.  All Rights Reserved.
      8  *
      9  *******************************************************************************
     10  *   file name:  nptrans.h
     11  *   encoding:   UTF-8
     12  *   tab size:   8 (not used)
     13  *   indentation:4
     14  *
     15  *   created on: 2003feb1
     16  *   created by: Ram Viswanadha
     17  */
     18 
     19 #ifndef NPTRANS_H
     20 #define NPTRANS_H
     21 
     22 #include "unicode/utypes.h"
     23 
     24 #if !UCONFIG_NO_IDNA
     25 #if !UCONFIG_NO_TRANSLITERATION
     26 
     27 #include "unicode/uniset.h"
     28 #include "unicode/ures.h"
     29 #include "unicode/translit.h"
     30 
     31 #include "intltest.h"
     32 
     33 
     34 #define ASCII_SPACE 0x0020
     35 
     36 class NamePrepTransform {
     37 
     38 private :
     39     Transliterator *mapping;
     40     UnicodeSet unassigned;
     41     UnicodeSet prohibited;
     42     UnicodeSet labelSeparatorSet;
     43     UResourceBundle *bundle;
     44     NamePrepTransform(UParseError& parseError, UErrorCode& status);
     45 
     46 
     47 public :
     48 
     49     static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
     50 
     51     virtual ~NamePrepTransform();
     52 
     53 
     54     inline UBool isProhibited(UChar32 ch);
     55 
     56     /**
     57      * ICU "poor man's RTTI", returns a UClassID for the actual class.
     58      */
     59     inline UClassID getDynamicClassID() const { return getStaticClassID(); }
     60 
     61     /**
     62      * ICU "poor man's RTTI", returns a UClassID for this class.
     63      */
     64     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
     65 
     66     /**
     67      * Map every character in input stream with mapping character
     68      * in the mapping table and populate the output stream.
     69      * For any individual character the mapping table may specify
     70      * that that a character be mapped to nothing, mapped to one
     71      * other character or to a string of other characters.
     72      *
     73      * @param src           Pointer to UChar buffer containing a single label
     74      * @param srcLength     Number of characters in the source label
     75      * @param dest          Pointer to the destination buffer to receive the output
     76      * @param destCapacity  The capacity of destination array
     77      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
     78      *                          If TRUE unassigned values are treated as normal Unicode code point.
     79      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
     80      * @param status        ICU error code in/out parameter.
     81      *                      Must fulfill U_SUCCESS before the function call.
     82      * @return The number of UChars in the destination buffer
     83      */
     84     int32_t map(const UChar* src, int32_t srcLength,
     85                         UChar* dest, int32_t destCapacity,
     86                         UBool allowUnassigned,
     87                         UParseError* parseError,
     88                         UErrorCode& status );
     89 
     90     /**
     91      * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
     92      * checks for prohited and BiDi characters in the order defined by RFC 3454
     93      *
     94      * @param src           Pointer to UChar buffer containing a single label
     95      * @param srcLength     Number of characters in the source label
     96      * @param dest          Pointer to the destination buffer to receive the output
     97      * @param destCapacity  The capacity of destination array
     98      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
     99      *                          If TRUE unassigned values are treated as normal Unicode code point.
    100      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
    101      * @param status        ICU error code in/out parameter.
    102      *                      Must fulfill U_SUCCESS before the function call.
    103      * @return The number of UChars in the destination buffer
    104      */
    105     int32_t process(const UChar* src, int32_t srcLength,
    106                             UChar* dest, int32_t destCapacity,
    107                             UBool allowUnassigned,
    108                             UParseError* parseError,
    109                             UErrorCode& status );
    110 
    111     /**
    112      * Ascertain if the given code point is a label separator as specified by IDNA
    113      *
    114      * @return TRUE is the code point is a label separator
    115      */
    116     UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
    117 
    118     inline UBool isLDHChar(UChar32 ch);
    119 
    120 private:
    121     /**
    122      * The address of this static class variable serves as this class's ID
    123      * for ICU "poor man's RTTI".
    124      */
    125     static const char fgClassID;
    126 };
    127 
    128 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
    129     // high runner case
    130     if(ch>0x007A){
    131         return FALSE;
    132     }
    133     //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
    134     if( (ch==0x002D) ||
    135         (0x0030 <= ch && ch <= 0x0039) ||
    136         (0x0041 <= ch && ch <= 0x005A) ||
    137         (0x0061 <= ch && ch <= 0x007A)
    138       ){
    139         return TRUE;
    140     }
    141     return FALSE;
    142 }
    143 
    144 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    145 #else
    146 class NamePrepTransform {
    147 };
    148 #endif /* #if !UCONFIG_NO_IDNA */
    149 
    150 #endif
    151 
    152 /*
    153  * Hey, Emacs, please set the following:
    154  *
    155  * Local Variables:
    156  * indent-tabs-mode: nil
    157  * End:
    158  *
    159  */
    160