Home | History | Annotate | Download | only in genrb
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4  *******************************************************************************
      5  *   Copyright (C) 2003-2014, International Business Machines
      6  *   Corporation and others.  All Rights Reserved.
      7  *******************************************************************************
      8  *
      9  * File prscmnts.cpp
     10  *
     11  * Modification History:
     12  *
     13  *   Date          Name        Description
     14  *   08/22/2003    ram         Creation.
     15  *******************************************************************************
     16  */
     17 
     18 // Safer use of UnicodeString.
     19 #ifndef UNISTR_FROM_CHAR_EXPLICIT
     20 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
     21 #endif
     22 
     23 // Less important, but still a good idea.
     24 #ifndef UNISTR_FROM_STRING_EXPLICIT
     25 #   define UNISTR_FROM_STRING_EXPLICIT explicit
     26 #endif
     27 
     28 #include "unicode/regex.h"
     29 #include "unicode/unistr.h"
     30 #include "unicode/parseerr.h"
     31 #include "prscmnts.h"
     32 #include <stdio.h>
     33 #include <stdlib.h>
     34 
     35 U_NAMESPACE_USE
     36 
     37 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
     38 
     39 #define MAX_SPLIT_STRINGS 20
     40 
     41 const char *patternStrings[UPC_LIMIT]={
     42     "^translate\\s*(.*)",
     43     "^note\\s*(.*)"
     44 };
     45 
     46 U_CFUNC int32_t
     47 removeText(UChar *source, int32_t srcLen,
     48            UnicodeString patString,uint32_t options,
     49            UnicodeString replaceText, UErrorCode *status){
     50 
     51     if(status == NULL || U_FAILURE(*status)){
     52         return 0;
     53     }
     54 
     55     UnicodeString src(source, srcLen);
     56 
     57     RegexMatcher    myMatcher(patString, src, options, *status);
     58     if(U_FAILURE(*status)){
     59         return 0;
     60     }
     61     UnicodeString dest;
     62 
     63 
     64     dest = myMatcher.replaceAll(replaceText,*status);
     65 
     66 
     67     return dest.extract(source, srcLen, *status);
     68 
     69 }
     70 U_CFUNC int32_t
     71 trim(UChar *src, int32_t srcLen, UErrorCode *status){
     72      srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
     73      srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
     74      srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
     75      return srcLen;
     76 }
     77 
     78 U_CFUNC int32_t
     79 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
     80     srcLen = trim(source, srcLen, status);
     81     UnicodeString patString("^\\s*?\\*\\s*?");  // remove pattern like " * " at the begining of the line
     82     srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
     83     return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
     84 }
     85 
     86 U_CFUNC int32_t
     87 getText(const UChar* source, int32_t srcLen,
     88         UChar** dest, int32_t destCapacity,
     89         UnicodeString patternString,
     90         UErrorCode* status){
     91 
     92     if(status == NULL || U_FAILURE(*status)){
     93         return 0;
     94     }
     95 
     96     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
     97     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
     98     UnicodeString src (source,srcLen);
     99 
    100     if (U_FAILURE(*status)) {
    101         return 0;
    102     }
    103     pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    104 
    105     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    106     if (U_FAILURE(*status)) {
    107         return 0;
    108     }
    109     for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
    110         matcher.reset(stringArray[i]);
    111         if(matcher.lookingAt(*status)){
    112             UnicodeString out = matcher.group(1, *status);
    113 
    114             return out.extract(*dest, destCapacity,*status);
    115         }
    116     }
    117     return 0;
    118 }
    119 
    120 
    121 #define AT_SIGN  0x0040
    122 
    123 U_CFUNC int32_t
    124 getDescription( const UChar* source, int32_t srcLen,
    125                 UChar** dest, int32_t destCapacity,
    126                 UErrorCode* status){
    127     if(status == NULL || U_FAILURE(*status)){
    128         return 0;
    129     }
    130 
    131     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    132     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    133     UnicodeString src(source, srcLen);
    134 
    135     if (U_FAILURE(*status)) {
    136         return 0;
    137     }
    138     pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
    139 
    140     if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
    141         int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
    142         return trim(*dest, destLen, status);
    143     }
    144     return 0;
    145 }
    146 
    147 U_CFUNC int32_t
    148 getCount(const UChar* source, int32_t srcLen,
    149          UParseCommentsOption option, UErrorCode *status){
    150 
    151     if(status == NULL || U_FAILURE(*status)){
    152         return 0;
    153     }
    154 
    155     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    156     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    157     UnicodeString src (source, srcLen);
    158 
    159 
    160     if (U_FAILURE(*status)) {
    161         return 0;
    162     }
    163     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    164 
    165     UnicodeString patternString(patternStrings[option]);
    166     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    167     if (U_FAILURE(*status)) {
    168         return 0;
    169     }
    170     int32_t count = 0;
    171     for(int32_t i=0; i<retLen; i++){
    172         matcher.reset(stringArray[i]);
    173         if(matcher.lookingAt(*status)){
    174             count++;
    175         }
    176     }
    177     if(option == UPC_TRANSLATE && count > 1){
    178         fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
    179         exit(U_UNSUPPORTED_ERROR);
    180     }
    181     return count;
    182 }
    183 
    184 U_CFUNC int32_t
    185 getAt(const UChar* source, int32_t srcLen,
    186         UChar** dest, int32_t destCapacity,
    187         int32_t index,
    188         UParseCommentsOption option,
    189         UErrorCode* status){
    190 
    191     if(status == NULL || U_FAILURE(*status)){
    192         return 0;
    193     }
    194 
    195     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    196     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    197     UnicodeString src (source, srcLen);
    198 
    199 
    200     if (U_FAILURE(*status)) {
    201         return 0;
    202     }
    203     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    204 
    205     UnicodeString patternString(patternStrings[option]);
    206     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    207     if (U_FAILURE(*status)) {
    208         return 0;
    209     }
    210     int32_t count = 0;
    211     for(int32_t i=0; i<retLen; i++){
    212         matcher.reset(stringArray[i]);
    213         if(matcher.lookingAt(*status)){
    214             if(count == index){
    215                 UnicodeString out = matcher.group(1, *status);
    216                 return out.extract(*dest, destCapacity,*status);
    217             }
    218             count++;
    219 
    220         }
    221     }
    222     return 0;
    223 
    224 }
    225 
    226 U_CFUNC int32_t
    227 getTranslate( const UChar* source, int32_t srcLen,
    228               UChar** dest, int32_t destCapacity,
    229               UErrorCode* status){
    230     UnicodeString     notePatternString("^translate\\s*?(.*)");
    231 
    232     int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
    233     return trim(*dest, destLen, status);
    234 }
    235 
    236 U_CFUNC int32_t
    237 getNote(const UChar* source, int32_t srcLen,
    238         UChar** dest, int32_t destCapacity,
    239         UErrorCode* status){
    240 
    241     UnicodeString     notePatternString("^note\\s*?(.*)");
    242     int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
    243     return trim(*dest, destLen, status);
    244 
    245 }
    246 
    247 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
    248 
    249