Home | History | Annotate | Download | only in intltest
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2011, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  nptrans.h
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003feb1
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #ifndef NPTRANS_H
     18 #define NPTRANS_H
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_IDNA
     23 #if !UCONFIG_NO_TRANSLITERATION
     24 
     25 #include "unicode/uniset.h"
     26 #include "unicode/ures.h"
     27 #include "unicode/translit.h"
     28 
     29 #include "intltest.h"
     30 
     31 
     32 #define ASCII_SPACE 0x0020
     33 
     34 class NamePrepTransform {
     35 
     36 private :
     37     Transliterator *mapping;
     38     UnicodeSet unassigned;
     39     UnicodeSet prohibited;
     40     UnicodeSet labelSeparatorSet;
     41     UResourceBundle *bundle;
     42     NamePrepTransform(UParseError& parseError, UErrorCode& status);
     43 
     44 
     45 public :
     46 
     47     static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
     48 
     49     virtual ~NamePrepTransform();
     50 
     51 
     52     inline UBool isProhibited(UChar32 ch);
     53 
     54     /**
     55      * ICU "poor man's RTTI", returns a UClassID for the actual class.
     56      */
     57     inline UClassID getDynamicClassID() const { return getStaticClassID(); }
     58 
     59     /**
     60      * ICU "poor man's RTTI", returns a UClassID for this class.
     61      */
     62     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
     63 
     64     /**
     65      * Map every character in input stream with mapping character
     66      * in the mapping table and populate the output stream.
     67      * For any individual character the mapping table may specify
     68      * that that a character be mapped to nothing, mapped to one
     69      * other character or to a string of other characters.
     70      *
     71      * @param src           Pointer to UChar buffer containing a single label
     72      * @param srcLength     Number of characters in the source label
     73      * @param dest          Pointer to the destination buffer to receive the output
     74      * @param destCapacity  The capacity of destination array
     75      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
     76      *                          If TRUE unassigned values are treated as normal Unicode code point.
     77      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
     78      * @param status        ICU error code in/out parameter.
     79      *                      Must fulfill U_SUCCESS before the function call.
     80      * @return The number of UChars in the destination buffer
     81      */
     82     int32_t map(const UChar* src, int32_t srcLength,
     83                         UChar* dest, int32_t destCapacity,
     84                         UBool allowUnassigned,
     85                         UParseError* parseError,
     86                         UErrorCode& status );
     87 
     88     /**
     89      * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
     90      * checks for prohited and BiDi characters in the order defined by RFC 3454
     91      *
     92      * @param src           Pointer to UChar buffer containing a single label
     93      * @param srcLength     Number of characters in the source label
     94      * @param dest          Pointer to the destination buffer to receive the output
     95      * @param destCapacity  The capacity of destination array
     96      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
     97      *                          If TRUE unassigned values are treated as normal Unicode code point.
     98      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
     99      * @param status        ICU error code in/out parameter.
    100      *                      Must fulfill U_SUCCESS before the function call.
    101      * @return The number of UChars in the destination buffer
    102      */
    103     int32_t process(const UChar* src, int32_t srcLength,
    104                             UChar* dest, int32_t destCapacity,
    105                             UBool allowUnassigned,
    106                             UParseError* parseError,
    107                             UErrorCode& status );
    108 
    109     /**
    110      * Ascertain if the given code point is a label separator as specified by IDNA
    111      *
    112      * @return TRUE is the code point is a label separator
    113      */
    114     UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
    115 
    116     inline UBool isLDHChar(UChar32 ch);
    117 
    118 private:
    119     /**
    120      * The address of this static class variable serves as this class's ID
    121      * for ICU "poor man's RTTI".
    122      */
    123     static const char fgClassID;
    124 };
    125 
    126 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
    127     // high runner case
    128     if(ch>0x007A){
    129         return FALSE;
    130     }
    131     //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
    132     if( (ch==0x002D) ||
    133         (0x0030 <= ch && ch <= 0x0039) ||
    134         (0x0041 <= ch && ch <= 0x005A) ||
    135         (0x0061 <= ch && ch <= 0x007A)
    136       ){
    137         return TRUE;
    138     }
    139     return FALSE;
    140 }
    141 
    142 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    143 #else
    144 class NamePrepTransform {
    145 };
    146 #endif /* #if !UCONFIG_NO_IDNA */
    147 
    148 #endif
    149 
    150 /*
    151  * Hey, Emacs, please set the following:
    152  *
    153  * Local Variables:
    154  * indent-tabs-mode: nil
    155  * End:
    156  *
    157  */
    158