Home | History | Annotate | Download | only in genrb
      1 /*
      2  *******************************************************************************
      3  *   Copyright (C) 2003-2014, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  *******************************************************************************
      6  *
      7  * File prscmnts.cpp
      8  *
      9  * Modification History:
     10  *
     11  *   Date          Name        Description
     12  *   08/22/2003    ram         Creation.
     13  *******************************************************************************
     14  */
     15 
     16 // Safer use of UnicodeString.
     17 #ifndef UNISTR_FROM_CHAR_EXPLICIT
     18 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
     19 #endif
     20 
     21 // Less important, but still a good idea.
     22 #ifndef UNISTR_FROM_STRING_EXPLICIT
     23 #   define UNISTR_FROM_STRING_EXPLICIT explicit
     24 #endif
     25 
     26 #include "unicode/regex.h"
     27 #include "unicode/unistr.h"
     28 #include "unicode/parseerr.h"
     29 #include "prscmnts.h"
     30 #include <stdio.h>
     31 #include <stdlib.h>
     32 
     33 U_NAMESPACE_USE
     34 
     35 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
     36 
     37 #define MAX_SPLIT_STRINGS 20
     38 
     39 const char *patternStrings[UPC_LIMIT]={
     40     "^translate\\s*(.*)",
     41     "^note\\s*(.*)"
     42 };
     43 
     44 U_CFUNC int32_t
     45 removeText(UChar *source, int32_t srcLen,
     46            UnicodeString patString,uint32_t options,
     47            UnicodeString replaceText, UErrorCode *status){
     48 
     49     if(status == NULL || U_FAILURE(*status)){
     50         return 0;
     51     }
     52 
     53     UnicodeString src(source, srcLen);
     54 
     55     RegexMatcher    myMatcher(patString, src, options, *status);
     56     if(U_FAILURE(*status)){
     57         return 0;
     58     }
     59     UnicodeString dest;
     60 
     61 
     62     dest = myMatcher.replaceAll(replaceText,*status);
     63 
     64 
     65     return dest.extract(source, srcLen, *status);
     66 
     67 }
     68 U_CFUNC int32_t
     69 trim(UChar *src, int32_t srcLen, UErrorCode *status){
     70      srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
     71      srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
     72      srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
     73      return srcLen;
     74 }
     75 
     76 U_CFUNC int32_t
     77 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
     78     srcLen = trim(source, srcLen, status);
     79     UnicodeString patString("^\\s*?\\*\\s*?");  // remove pattern like " * " at the begining of the line
     80     srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
     81     return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
     82 }
     83 
     84 U_CFUNC int32_t
     85 getText(const UChar* source, int32_t srcLen,
     86         UChar** dest, int32_t destCapacity,
     87         UnicodeString patternString,
     88         UErrorCode* status){
     89 
     90     if(status == NULL || U_FAILURE(*status)){
     91         return 0;
     92     }
     93 
     94     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
     95     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
     96     UnicodeString src (source,srcLen);
     97 
     98     if (U_FAILURE(*status)) {
     99         return 0;
    100     }
    101     pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    102 
    103     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    104     if (U_FAILURE(*status)) {
    105         return 0;
    106     }
    107     for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
    108         matcher.reset(stringArray[i]);
    109         if(matcher.lookingAt(*status)){
    110             UnicodeString out = matcher.group(1, *status);
    111 
    112             return out.extract(*dest, destCapacity,*status);
    113         }
    114     }
    115     return 0;
    116 }
    117 
    118 
    119 #define AT_SIGN  0x0040
    120 
    121 U_CFUNC int32_t
    122 getDescription( const UChar* source, int32_t srcLen,
    123                 UChar** dest, int32_t destCapacity,
    124                 UErrorCode* status){
    125     if(status == NULL || U_FAILURE(*status)){
    126         return 0;
    127     }
    128 
    129     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    130     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    131     UnicodeString src(source, srcLen);
    132 
    133     if (U_FAILURE(*status)) {
    134         return 0;
    135     }
    136     pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
    137 
    138     if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
    139         int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
    140         return trim(*dest, destLen, status);
    141     }
    142     return 0;
    143 }
    144 
    145 U_CFUNC int32_t
    146 getCount(const UChar* source, int32_t srcLen,
    147          UParseCommentsOption option, UErrorCode *status){
    148 
    149     if(status == NULL || U_FAILURE(*status)){
    150         return 0;
    151     }
    152 
    153     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    154     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    155     UnicodeString src (source, srcLen);
    156 
    157 
    158     if (U_FAILURE(*status)) {
    159         return 0;
    160     }
    161     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    162 
    163     UnicodeString patternString(patternStrings[option]);
    164     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    165     if (U_FAILURE(*status)) {
    166         return 0;
    167     }
    168     int32_t count = 0;
    169     for(int32_t i=0; i<retLen; i++){
    170         matcher.reset(stringArray[i]);
    171         if(matcher.lookingAt(*status)){
    172             count++;
    173         }
    174     }
    175     if(option == UPC_TRANSLATE && count > 1){
    176         fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
    177         exit(U_UNSUPPORTED_ERROR);
    178     }
    179     return count;
    180 }
    181 
    182 U_CFUNC int32_t
    183 getAt(const UChar* source, int32_t srcLen,
    184         UChar** dest, int32_t destCapacity,
    185         int32_t index,
    186         UParseCommentsOption option,
    187         UErrorCode* status){
    188 
    189     if(status == NULL || U_FAILURE(*status)){
    190         return 0;
    191     }
    192 
    193     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    194     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    195     UnicodeString src (source, srcLen);
    196 
    197 
    198     if (U_FAILURE(*status)) {
    199         return 0;
    200     }
    201     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    202 
    203     UnicodeString patternString(patternStrings[option]);
    204     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    205     if (U_FAILURE(*status)) {
    206         return 0;
    207     }
    208     int32_t count = 0;
    209     for(int32_t i=0; i<retLen; i++){
    210         matcher.reset(stringArray[i]);
    211         if(matcher.lookingAt(*status)){
    212             if(count == index){
    213                 UnicodeString out = matcher.group(1, *status);
    214                 return out.extract(*dest, destCapacity,*status);
    215             }
    216             count++;
    217 
    218         }
    219     }
    220     return 0;
    221 
    222 }
    223 
    224 U_CFUNC int32_t
    225 getTranslate( const UChar* source, int32_t srcLen,
    226               UChar** dest, int32_t destCapacity,
    227               UErrorCode* status){
    228     UnicodeString     notePatternString("^translate\\s*?(.*)");
    229 
    230     int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
    231     return trim(*dest, destLen, status);
    232 }
    233 
    234 U_CFUNC int32_t
    235 getNote(const UChar* source, int32_t srcLen,
    236         UChar** dest, int32_t destCapacity,
    237         UErrorCode* status){
    238 
    239     UnicodeString     notePatternString("^note\\s*?(.*)");
    240     int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
    241     return trim(*dest, destLen, status);
    242 
    243 }
    244 
    245 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
    246 
    247