1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2003-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File prscmnts.cpp 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 08/22/2003 ram Creation. 15 ******************************************************************************* 16 */ 17 18 // Safer use of UnicodeString. 19 #ifndef UNISTR_FROM_CHAR_EXPLICIT 20 # define UNISTR_FROM_CHAR_EXPLICIT explicit 21 #endif 22 23 // Less important, but still a good idea. 24 #ifndef UNISTR_FROM_STRING_EXPLICIT 25 # define UNISTR_FROM_STRING_EXPLICIT explicit 26 #endif 27 28 #include "unicode/regex.h" 29 #include "unicode/unistr.h" 30 #include "unicode/parseerr.h" 31 #include "prscmnts.h" 32 #include <stdio.h> 33 #include <stdlib.h> 34 35 U_NAMESPACE_USE 36 37 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ 38 39 #define MAX_SPLIT_STRINGS 20 40 41 const char *patternStrings[UPC_LIMIT]={ 42 "^translate\\s*(.*)", 43 "^note\\s*(.*)" 44 }; 45 46 U_CFUNC int32_t 47 removeText(UChar *source, int32_t srcLen, 48 UnicodeString patString,uint32_t options, 49 UnicodeString replaceText, UErrorCode *status){ 50 51 if(status == NULL || U_FAILURE(*status)){ 52 return 0; 53 } 54 55 UnicodeString src(source, srcLen); 56 57 RegexMatcher myMatcher(patString, src, options, *status); 58 if(U_FAILURE(*status)){ 59 return 0; 60 } 61 UnicodeString dest; 62 63 64 dest = myMatcher.replaceAll(replaceText,*status); 65 66 67 return dest.extract(source, srcLen, *status); 68 69 } 70 U_CFUNC int32_t 71 trim(UChar *src, int32_t srcLen, UErrorCode *status){ 72 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines 73 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces 74 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes 75 return srcLen; 76 } 77 78 U_CFUNC int32_t 79 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ 80 srcLen = trim(source, srcLen, status); 81 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line 82 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status); 83 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines; 84 } 85 86 U_CFUNC int32_t 87 getText(const UChar* source, int32_t srcLen, 88 UChar** dest, int32_t destCapacity, 89 UnicodeString patternString, 90 UErrorCode* status){ 91 92 if(status == NULL || U_FAILURE(*status)){ 93 return 0; 94 } 95 96 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 97 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status); 98 UnicodeString src (source,srcLen); 99 100 if (U_FAILURE(*status)) { 101 return 0; 102 } 103 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 104 105 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 106 if (U_FAILURE(*status)) { 107 return 0; 108 } 109 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ 110 matcher.reset(stringArray[i]); 111 if(matcher.lookingAt(*status)){ 112 UnicodeString out = matcher.group(1, *status); 113 114 return out.extract(*dest, destCapacity,*status); 115 } 116 } 117 return 0; 118 } 119 120 121 #define AT_SIGN 0x0040 122 123 U_CFUNC int32_t 124 getDescription( const UChar* source, int32_t srcLen, 125 UChar** dest, int32_t destCapacity, 126 UErrorCode* status){ 127 if(status == NULL || U_FAILURE(*status)){ 128 return 0; 129 } 130 131 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 132 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); 133 UnicodeString src(source, srcLen); 134 135 if (U_FAILURE(*status)) { 136 return 0; 137 } 138 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); 139 140 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ 141 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); 142 return trim(*dest, destLen, status); 143 } 144 return 0; 145 } 146 147 U_CFUNC int32_t 148 getCount(const UChar* source, int32_t srcLen, 149 UParseCommentsOption option, UErrorCode *status){ 150 151 if(status == NULL || U_FAILURE(*status)){ 152 return 0; 153 } 154 155 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 156 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); 157 UnicodeString src (source, srcLen); 158 159 160 if (U_FAILURE(*status)) { 161 return 0; 162 } 163 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 164 165 UnicodeString patternString(patternStrings[option]); 166 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 167 if (U_FAILURE(*status)) { 168 return 0; 169 } 170 int32_t count = 0; 171 for(int32_t i=0; i<retLen; i++){ 172 matcher.reset(stringArray[i]); 173 if(matcher.lookingAt(*status)){ 174 count++; 175 } 176 } 177 if(option == UPC_TRANSLATE && count > 1){ 178 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); 179 exit(U_UNSUPPORTED_ERROR); 180 } 181 return count; 182 } 183 184 U_CFUNC int32_t 185 getAt(const UChar* source, int32_t srcLen, 186 UChar** dest, int32_t destCapacity, 187 int32_t index, 188 UParseCommentsOption option, 189 UErrorCode* status){ 190 191 if(status == NULL || U_FAILURE(*status)){ 192 return 0; 193 } 194 195 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 196 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); 197 UnicodeString src (source, srcLen); 198 199 200 if (U_FAILURE(*status)) { 201 return 0; 202 } 203 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 204 205 UnicodeString patternString(patternStrings[option]); 206 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 207 if (U_FAILURE(*status)) { 208 return 0; 209 } 210 int32_t count = 0; 211 for(int32_t i=0; i<retLen; i++){ 212 matcher.reset(stringArray[i]); 213 if(matcher.lookingAt(*status)){ 214 if(count == index){ 215 UnicodeString out = matcher.group(1, *status); 216 return out.extract(*dest, destCapacity,*status); 217 } 218 count++; 219 220 } 221 } 222 return 0; 223 224 } 225 226 U_CFUNC int32_t 227 getTranslate( const UChar* source, int32_t srcLen, 228 UChar** dest, int32_t destCapacity, 229 UErrorCode* status){ 230 UnicodeString notePatternString("^translate\\s*?(.*)"); 231 232 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); 233 return trim(*dest, destLen, status); 234 } 235 236 U_CFUNC int32_t 237 getNote(const UChar* source, int32_t srcLen, 238 UChar** dest, int32_t destCapacity, 239 UErrorCode* status){ 240 241 UnicodeString notePatternString("^note\\s*?(.*)"); 242 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); 243 return trim(*dest, destLen, status); 244 245 } 246 247 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ 248 249