Home | History | Annotate | Download | only in intltest
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2006, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  nptrans.h
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003feb1
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #ifndef NPTRANS_H
     18 #define NPTRANS_H
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_IDNA
     23 #if !UCONFIG_NO_TRANSLITERATION
     24 
     25 #include "unicode/uniset.h"
     26 #include "unicode/ures.h"
     27 #include "unicode/translit.h"
     28 
     29 #include "intltest.h"
     30 
     31 
     32 #define ASCII_SPACE 0x0020
     33 
     34 class NamePrepTransform {
     35 
     36 private :
     37     Transliterator *mapping;
     38     UnicodeSet unassigned;
     39     UnicodeSet prohibited;
     40     UnicodeSet labelSeparatorSet;
     41     UResourceBundle *bundle;
     42     NamePrepTransform(UParseError& parseError, UErrorCode& status);
     43 
     44 
     45 public :
     46 
     47     static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
     48 
     49     virtual ~NamePrepTransform();
     50 
     51 
     52     inline UBool isProhibited(UChar32 ch);
     53 
     54     /**
     55      * ICU "poor man's RTTI", returns a UClassID for the actual class.
     56      *
     57      * @draft ICU 2.6
     58      */
     59     inline UClassID getDynamicClassID() const { return getStaticClassID(); }
     60 
     61     /**
     62      * ICU "poor man's RTTI", returns a UClassID for this class.
     63      *
     64      * @draft ICU 2.6
     65      */
     66     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
     67 
     68     /**
     69      * Map every character in input stream with mapping character
     70      * in the mapping table and populate the output stream.
     71      * For any individual character the mapping table may specify
     72      * that that a character be mapped to nothing, mapped to one
     73      * other character or to a string of other characters.
     74      *
     75      * @param src           Pointer to UChar buffer containing a single label
     76      * @param srcLength     Number of characters in the source label
     77      * @param dest          Pointer to the destination buffer to receive the output
     78      * @param destCapacity  The capacity of destination array
     79      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
     80      *                          If TRUE unassigned values are treated as normal Unicode code point.
     81      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
     82      * @param status        ICU error code in/out parameter.
     83      *                      Must fulfill U_SUCCESS before the function call.
     84      * @return The number of UChars in the destination buffer
     85      *
     86      */
     87     int32_t map(const UChar* src, int32_t srcLength,
     88                         UChar* dest, int32_t destCapacity,
     89                         UBool allowUnassigned,
     90                         UParseError* parseError,
     91                         UErrorCode& status );
     92 
     93     /**
     94      * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
     95      * checks for prohited and BiDi characters in the order defined by RFC 3454
     96      *
     97      * @param src           Pointer to UChar buffer containing a single label
     98      * @param srcLength     Number of characters in the source label
     99      * @param dest          Pointer to the destination buffer to receive the output
    100      * @param destCapacity  The capacity of destination array
    101      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
    102      *                          If TRUE unassigned values are treated as normal Unicode code point.
    103      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
    104      * @param status        ICU error code in/out parameter.
    105      *                      Must fulfill U_SUCCESS before the function call.
    106      * @return The number of UChars in the destination buffer
    107      */
    108     int32_t process(const UChar* src, int32_t srcLength,
    109                             UChar* dest, int32_t destCapacity,
    110                             UBool allowUnassigned,
    111                             UParseError* parseError,
    112                             UErrorCode& status );
    113 
    114     /**
    115      * Ascertain if the given code point is a label separator as specified by IDNA
    116      *
    117      * @return TRUE is the code point is a label separator
    118      *
    119      *
    120      */
    121     UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
    122 
    123 
    124     inline UBool isLDHChar(UChar32 ch);
    125 private:
    126     /**
    127      * The address of this static class variable serves as this class's ID
    128      * for ICU "poor man's RTTI".
    129      */
    130     static const char fgClassID;
    131 };
    132 
    133 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
    134     // high runner case
    135     if(ch>0x007A){
    136         return FALSE;
    137     }
    138     //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
    139     if( (ch==0x002D) ||
    140         (0x0030 <= ch && ch <= 0x0039) ||
    141         (0x0041 <= ch && ch <= 0x005A) ||
    142         (0x0061 <= ch && ch <= 0x007A)
    143       ){
    144         return TRUE;
    145     }
    146     return FALSE;
    147 }
    148 
    149 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    150 #else
    151 class NamePrepTransform {
    152 };
    153 #endif /* #if !UCONFIG_NO_IDNA */
    154 
    155 #endif
    156 
    157 /*
    158  * Hey, Emacs, please set the following:
    159  *
    160  * Local Variables:
    161  * indent-tabs-mode: nil
    162  * End:
    163  *
    164  */
    165