1 /* 2 ******************************************************************************* 3 * Copyright (C) 2003-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File prscmnts.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 08/22/2003 ram Creation. 13 ******************************************************************************* 14 */ 15 16 // Safer use of UnicodeString. 17 #ifndef UNISTR_FROM_CHAR_EXPLICIT 18 # define UNISTR_FROM_CHAR_EXPLICIT explicit 19 #endif 20 21 // Less important, but still a good idea. 22 #ifndef UNISTR_FROM_STRING_EXPLICIT 23 # define UNISTR_FROM_STRING_EXPLICIT explicit 24 #endif 25 26 #include "unicode/regex.h" 27 #include "unicode/unistr.h" 28 #include "unicode/parseerr.h" 29 #include "prscmnts.h" 30 #include <stdio.h> 31 #include <stdlib.h> 32 33 U_NAMESPACE_USE 34 35 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ 36 37 #define MAX_SPLIT_STRINGS 20 38 39 const char *patternStrings[UPC_LIMIT]={ 40 "^translate\\s*(.*)", 41 "^note\\s*(.*)" 42 }; 43 44 U_CFUNC int32_t 45 removeText(UChar *source, int32_t srcLen, 46 UnicodeString patString,uint32_t options, 47 UnicodeString replaceText, UErrorCode *status){ 48 49 if(status == NULL || U_FAILURE(*status)){ 50 return 0; 51 } 52 53 UnicodeString src(source, srcLen); 54 55 RegexMatcher myMatcher(patString, src, options, *status); 56 if(U_FAILURE(*status)){ 57 return 0; 58 } 59 UnicodeString dest; 60 61 62 dest = myMatcher.replaceAll(replaceText,*status); 63 64 65 return dest.extract(source, srcLen, *status); 66 67 } 68 U_CFUNC int32_t 69 trim(UChar *src, int32_t srcLen, UErrorCode *status){ 70 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines 71 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces 72 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes 73 return srcLen; 74 } 75 76 U_CFUNC int32_t 77 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ 78 srcLen = trim(source, srcLen, status); 79 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line 80 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status); 81 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines; 82 } 83 84 U_CFUNC int32_t 85 getText(const UChar* source, int32_t srcLen, 86 UChar** dest, int32_t destCapacity, 87 UnicodeString patternString, 88 UErrorCode* status){ 89 90 if(status == NULL || U_FAILURE(*status)){ 91 return 0; 92 } 93 94 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 95 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status); 96 UnicodeString src (source,srcLen); 97 98 if (U_FAILURE(*status)) { 99 return 0; 100 } 101 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 102 103 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 104 if (U_FAILURE(*status)) { 105 return 0; 106 } 107 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ 108 matcher.reset(stringArray[i]); 109 if(matcher.lookingAt(*status)){ 110 UnicodeString out = matcher.group(1, *status); 111 112 return out.extract(*dest, destCapacity,*status); 113 } 114 } 115 return 0; 116 } 117 118 119 #define AT_SIGN 0x0040 120 121 U_CFUNC int32_t 122 getDescription( const UChar* source, int32_t srcLen, 123 UChar** dest, int32_t destCapacity, 124 UErrorCode* status){ 125 if(status == NULL || U_FAILURE(*status)){ 126 return 0; 127 } 128 129 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 130 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); 131 UnicodeString src(source, srcLen); 132 133 if (U_FAILURE(*status)) { 134 return 0; 135 } 136 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); 137 138 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ 139 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); 140 return trim(*dest, destLen, status); 141 } 142 return 0; 143 } 144 145 U_CFUNC int32_t 146 getCount(const UChar* source, int32_t srcLen, 147 UParseCommentsOption option, UErrorCode *status){ 148 149 if(status == NULL || U_FAILURE(*status)){ 150 return 0; 151 } 152 153 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 154 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); 155 UnicodeString src (source, srcLen); 156 157 158 if (U_FAILURE(*status)) { 159 return 0; 160 } 161 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 162 163 UnicodeString patternString(patternStrings[option]); 164 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 165 if (U_FAILURE(*status)) { 166 return 0; 167 } 168 int32_t count = 0; 169 for(int32_t i=0; i<retLen; i++){ 170 matcher.reset(stringArray[i]); 171 if(matcher.lookingAt(*status)){ 172 count++; 173 } 174 } 175 if(option == UPC_TRANSLATE && count > 1){ 176 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); 177 exit(U_UNSUPPORTED_ERROR); 178 } 179 return count; 180 } 181 182 U_CFUNC int32_t 183 getAt(const UChar* source, int32_t srcLen, 184 UChar** dest, int32_t destCapacity, 185 int32_t index, 186 UParseCommentsOption option, 187 UErrorCode* status){ 188 189 if(status == NULL || U_FAILURE(*status)){ 190 return 0; 191 } 192 193 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 194 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); 195 UnicodeString src (source, srcLen); 196 197 198 if (U_FAILURE(*status)) { 199 return 0; 200 } 201 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 202 203 UnicodeString patternString(patternStrings[option]); 204 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 205 if (U_FAILURE(*status)) { 206 return 0; 207 } 208 int32_t count = 0; 209 for(int32_t i=0; i<retLen; i++){ 210 matcher.reset(stringArray[i]); 211 if(matcher.lookingAt(*status)){ 212 if(count == index){ 213 UnicodeString out = matcher.group(1, *status); 214 return out.extract(*dest, destCapacity,*status); 215 } 216 count++; 217 218 } 219 } 220 return 0; 221 222 } 223 224 U_CFUNC int32_t 225 getTranslate( const UChar* source, int32_t srcLen, 226 UChar** dest, int32_t destCapacity, 227 UErrorCode* status){ 228 UnicodeString notePatternString("^translate\\s*?(.*)"); 229 230 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); 231 return trim(*dest, destLen, status); 232 } 233 234 U_CFUNC int32_t 235 getNote(const UChar* source, int32_t srcLen, 236 UChar** dest, int32_t destCapacity, 237 UErrorCode* status){ 238 239 UnicodeString notePatternString("^note\\s*?(.*)"); 240 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); 241 return trim(*dest, destLen, status); 242 243 } 244 245 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ 246 247