Home | History | Annotate | Download | only in genrb
      1 /*
      2  *******************************************************************************
      3  *   Copyright (C) 2003-2007, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  *******************************************************************************
      6  *
      7  * File prscmnts.cpp
      8  *
      9  * Modification History:
     10  *
     11  *   Date          Name        Description
     12  *   08/22/2003    ram         Creation.
     13  *******************************************************************************
     14  */
     15 
     16 #include "unicode/regex.h"
     17 #include "unicode/unistr.h"
     18 #include "unicode/parseerr.h"
     19 #include "prscmnts.h"
     20 #include <stdio.h>
     21 #include <stdlib.h>
     22 
     23 U_NAMESPACE_USE
     24 
     25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
     26 
     27 #define MAX_SPLIT_STRINGS 20
     28 
     29 const char *patternStrings[UPC_LIMIT]={
     30     "^translate\\s*(.*)",
     31     "^note\\s*(.*)"
     32 };
     33 
     34 U_CFUNC int32_t
     35 removeText(UChar *source, int32_t srcLen,
     36            UnicodeString patString,uint32_t options,
     37            UnicodeString replaceText, UErrorCode *status){
     38 
     39     if(status == NULL || U_FAILURE(*status)){
     40         return 0;
     41     }
     42 
     43     UnicodeString src(source, srcLen);
     44 
     45     RegexMatcher    myMatcher(patString, src, options, *status);
     46     if(U_FAILURE(*status)){
     47         return 0;
     48     }
     49     UnicodeString dest;
     50 
     51 
     52     dest = myMatcher.replaceAll(replaceText,*status);
     53 
     54 
     55     return dest.extract(source, srcLen, *status);
     56 
     57 }
     58 U_CFUNC int32_t
     59 trim(UChar *src, int32_t srcLen, UErrorCode *status){
     60      srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
     61      srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
     62      srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
     63      return srcLen;
     64 }
     65 
     66 U_CFUNC int32_t
     67 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
     68     srcLen = trim(source, srcLen, status);
     69     UnicodeString     patString = "^\\s*?\\*\\s*?";     // remove pattern like " * " at the begining of the line
     70     srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
     71     return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
     72 }
     73 
     74 U_CFUNC int32_t
     75 getText(const UChar* source, int32_t srcLen,
     76         UChar** dest, int32_t destCapacity,
     77         UnicodeString patternString,
     78         UErrorCode* status){
     79 
     80     if(status == NULL || U_FAILURE(*status)){
     81         return 0;
     82     }
     83 
     84     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
     85     RegexPattern      *pattern = RegexPattern::compile("@", 0, *status);
     86     UnicodeString src (source,srcLen);
     87 
     88     if (U_FAILURE(*status)) {
     89         return 0;
     90     }
     91     pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
     92 
     93     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
     94     if (U_FAILURE(*status)) {
     95         return 0;
     96     }
     97     for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
     98         matcher.reset(stringArray[i]);
     99         if(matcher.lookingAt(*status)){
    100             UnicodeString out = matcher.group(1, *status);
    101 
    102             return out.extract(*dest, destCapacity,*status);
    103         }
    104     }
    105     return 0;
    106 }
    107 
    108 
    109 #define AT_SIGN  0x0040
    110 
    111 U_CFUNC int32_t
    112 getDescription( const UChar* source, int32_t srcLen,
    113                 UChar** dest, int32_t destCapacity,
    114                 UErrorCode* status){
    115     if(status == NULL || U_FAILURE(*status)){
    116         return 0;
    117     }
    118 
    119     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    120     RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
    121     UnicodeString src(source, srcLen);
    122 
    123     if (U_FAILURE(*status)) {
    124         return 0;
    125     }
    126     pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
    127 
    128     if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
    129         int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
    130         return trim(*dest, destLen, status);
    131     }
    132     return 0;
    133 }
    134 
    135 U_CFUNC int32_t
    136 getCount(const UChar* source, int32_t srcLen,
    137          UParseCommentsOption option, UErrorCode *status){
    138 
    139     if(status == NULL || U_FAILURE(*status)){
    140         return 0;
    141     }
    142 
    143     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    144     RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
    145     UnicodeString src (source, srcLen);
    146 
    147 
    148     if (U_FAILURE(*status)) {
    149         return 0;
    150     }
    151     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    152 
    153     RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
    154     if (U_FAILURE(*status)) {
    155         return 0;
    156     }
    157     int32_t count = 0;
    158     for(int32_t i=0; i<retLen; i++){
    159         matcher.reset(stringArray[i]);
    160         if(matcher.lookingAt(*status)){
    161             count++;
    162         }
    163     }
    164     if(option == UPC_TRANSLATE && count > 1){
    165         fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
    166         exit(U_UNSUPPORTED_ERROR);
    167     }
    168     return count;
    169 }
    170 
    171 U_CFUNC int32_t
    172 getAt(const UChar* source, int32_t srcLen,
    173         UChar** dest, int32_t destCapacity,
    174         int32_t index,
    175         UParseCommentsOption option,
    176         UErrorCode* status){
    177 
    178     if(status == NULL || U_FAILURE(*status)){
    179         return 0;
    180     }
    181 
    182     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    183     RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
    184     UnicodeString src (source, srcLen);
    185 
    186 
    187     if (U_FAILURE(*status)) {
    188         return 0;
    189     }
    190     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    191 
    192     RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
    193     if (U_FAILURE(*status)) {
    194         return 0;
    195     }
    196     int32_t count = 0;
    197     for(int32_t i=0; i<retLen; i++){
    198         matcher.reset(stringArray[i]);
    199         if(matcher.lookingAt(*status)){
    200             if(count == index){
    201                 UnicodeString out = matcher.group(1, *status);
    202                 return out.extract(*dest, destCapacity,*status);
    203             }
    204             count++;
    205 
    206         }
    207     }
    208     return 0;
    209 
    210 }
    211 
    212 U_CFUNC int32_t
    213 getTranslate( const UChar* source, int32_t srcLen,
    214               UChar** dest, int32_t destCapacity,
    215               UErrorCode* status){
    216     UnicodeString     notePatternString = "^translate\\s*?(.*)";
    217 
    218     int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
    219     return trim(*dest, destLen, status);
    220 }
    221 
    222 U_CFUNC int32_t
    223 getNote(const UChar* source, int32_t srcLen,
    224         UChar** dest, int32_t destCapacity,
    225         UErrorCode* status){
    226 
    227     UnicodeString     notePatternString = "^note\\s*?(.*)";
    228     int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
    229     return trim(*dest, destLen, status);
    230 
    231 }
    232 
    233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
    234 
    235