1 #include "pseudolocalize.h" 2 3 using namespace std; 4 5 // String basis to generate expansion 6 static const String16 k_expansion_string = String16("one two three " 7 "four five six seven eight nine ten eleven twelve thirteen " 8 "fourteen fiveteen sixteen seventeen nineteen twenty"); 9 10 // Special unicode characters to override directionality of the words 11 static const String16 k_rlm = String16("\xe2\x80\x8f"); 12 static const String16 k_rlo = String16("\xE2\x80\xae"); 13 static const String16 k_pdf = String16("\xE2\x80\xac"); 14 15 // Placeholder marks 16 static const String16 k_placeholder_open = String16("\xc2\xbb"); 17 static const String16 k_placeholder_close = String16("\xc2\xab"); 18 19 static const char* 20 pseudolocalize_char(const char16_t c) 21 { 22 switch (c) { 23 case 'a': return "\xc3\xa5"; 24 case 'b': return "\xc9\x93"; 25 case 'c': return "\xc3\xa7"; 26 case 'd': return "\xc3\xb0"; 27 case 'e': return "\xc3\xa9"; 28 case 'f': return "\xc6\x92"; 29 case 'g': return "\xc4\x9d"; 30 case 'h': return "\xc4\xa5"; 31 case 'i': return "\xc3\xae"; 32 case 'j': return "\xc4\xb5"; 33 case 'k': return "\xc4\xb7"; 34 case 'l': return "\xc4\xbc"; 35 case 'm': return "\xe1\xb8\xbf"; 36 case 'n': return "\xc3\xb1"; 37 case 'o': return "\xc3\xb6"; 38 case 'p': return "\xc3\xbe"; 39 case 'q': return "\x51"; 40 case 'r': return "\xc5\x95"; 41 case 's': return "\xc5\xa1"; 42 case 't': return "\xc5\xa3"; 43 case 'u': return "\xc3\xbb"; 44 case 'v': return "\x56"; 45 case 'w': return "\xc5\xb5"; 46 case 'x': return "\xd1\x85"; 47 case 'y': return "\xc3\xbd"; 48 case 'z': return "\xc5\xbe"; 49 case 'A': return "\xc3\x85"; 50 case 'B': return "\xce\xb2"; 51 case 'C': return "\xc3\x87"; 52 case 'D': return "\xc3\x90"; 53 case 'E': return "\xc3\x89"; 54 case 'G': return "\xc4\x9c"; 55 case 'H': return "\xc4\xa4"; 56 case 'I': return "\xc3\x8e"; 57 case 'J': return "\xc4\xb4"; 58 case 'K': return "\xc4\xb6"; 59 case 'L': return "\xc4\xbb"; 60 case 'M': return "\xe1\xb8\xbe"; 61 case 'N': return "\xc3\x91"; 62 case 'O': return "\xc3\x96"; 63 case 'P': return "\xc3\x9e"; 64 case 'Q': return "\x71"; 65 case 'R': return "\xc5\x94"; 66 case 'S': return "\xc5\xa0"; 67 case 'T': return "\xc5\xa2"; 68 case 'U': return "\xc3\x9b"; 69 case 'V': return "\xce\xbd"; 70 case 'W': return "\xc5\xb4"; 71 case 'X': return "\xc3\x97"; 72 case 'Y': return "\xc3\x9d"; 73 case 'Z': return "\xc5\xbd"; 74 case '!': return "\xc2\xa1"; 75 case '?': return "\xc2\xbf"; 76 case '$': return "\xe2\x82\xac"; 77 default: return NULL; 78 } 79 } 80 81 static bool 82 is_possible_normal_placeholder_end(const char16_t c) { 83 switch (c) { 84 case 's': return true; 85 case 'S': return true; 86 case 'c': return true; 87 case 'C': return true; 88 case 'd': return true; 89 case 'o': return true; 90 case 'x': return true; 91 case 'X': return true; 92 case 'f': return true; 93 case 'e': return true; 94 case 'E': return true; 95 case 'g': return true; 96 case 'G': return true; 97 case 'a': return true; 98 case 'A': return true; 99 case 'b': return true; 100 case 'B': return true; 101 case 'h': return true; 102 case 'H': return true; 103 case '%': return true; 104 case 'n': return true; 105 default: return false; 106 } 107 } 108 109 String16 110 pseudo_generate_expansion(const unsigned int length) { 111 String16 result = k_expansion_string; 112 const char16_t* s = result.string(); 113 if (result.size() < length) { 114 result += String16(" "); 115 result += pseudo_generate_expansion(length - result.size()); 116 } else { 117 int ext = 0; 118 // Should contain only whole words, so looking for a space 119 for (unsigned int i = length + 1; i < result.size(); ++i) { 120 ++ext; 121 if (s[i] == ' ') { 122 break; 123 } 124 } 125 result.remove(length + ext, 0); 126 } 127 return result; 128 } 129 130 /** 131 * Converts characters so they look like they've been localized. 132 * 133 * Note: This leaves escape sequences untouched so they can later be 134 * processed by ResTable::collectString in the normal way. 135 */ 136 String16 137 pseudolocalize_string(const String16& source) 138 { 139 const char16_t* s = source.string(); 140 String16 result; 141 const size_t I = source.size(); 142 for (size_t i=0; i<I; i++) { 143 char16_t c = s[i]; 144 if (c == '\\') { 145 // Escape syntax, no need to pseudolocalize 146 if (i<I-1) { 147 result += String16("\\"); 148 i++; 149 c = s[i]; 150 switch (c) { 151 case 'u': 152 // this one takes up 5 chars 153 result += String16(s+i, 5); 154 i += 4; 155 break; 156 case 't': 157 case 'n': 158 case '#': 159 case '@': 160 case '?': 161 case '"': 162 case '\'': 163 case '\\': 164 default: 165 result.append(&c, 1); 166 break; 167 } 168 } else { 169 result.append(&c, 1); 170 } 171 } else if (c == '%') { 172 // Placeholder syntax, no need to pseudolocalize 173 result += k_placeholder_open; 174 bool end = false; 175 result.append(&c, 1); 176 while (!end && i < I) { 177 ++i; 178 c = s[i]; 179 result.append(&c, 1); 180 if (is_possible_normal_placeholder_end(c)) { 181 end = true; 182 } else if (c == 't') { 183 ++i; 184 c = s[i]; 185 result.append(&c, 1); 186 end = true; 187 } 188 } 189 result += k_placeholder_close; 190 } else if (c == '<' || c == '&') { 191 // html syntax, no need to pseudolocalize 192 bool tag_closed = false; 193 while (!tag_closed && i < I) { 194 if (c == '&') { 195 String16 escape_text; 196 escape_text.append(&c, 1); 197 bool end = false; 198 size_t htmlCodePos = i; 199 while (!end && htmlCodePos < I) { 200 ++htmlCodePos; 201 c = s[htmlCodePos]; 202 escape_text.append(&c, 1); 203 // Valid html code 204 if (c == ';') { 205 end = true; 206 i = htmlCodePos; 207 } 208 // Wrong html code 209 else if (!((c == '#' || 210 (c >= 'a' && c <= 'z') || 211 (c >= 'A' && c <= 'Z') || 212 (c >= '0' && c <= '9')))) { 213 end = true; 214 } 215 } 216 result += escape_text; 217 if (escape_text != String16("<")) { 218 tag_closed = true; 219 } 220 continue; 221 } 222 if (c == '>') { 223 tag_closed = true; 224 result.append(&c, 1); 225 continue; 226 } 227 result.append(&c, 1); 228 i++; 229 c = s[i]; 230 } 231 } else { 232 // This is a pure text that should be pseudolocalized 233 const char* p = pseudolocalize_char(c); 234 if (p != NULL) { 235 result += String16(p); 236 } else { 237 result.append(&c, 1); 238 } 239 } 240 } 241 return result; 242 } 243 244 String16 245 pseudobidi_string(const String16& source) 246 { 247 const char16_t* s = source.string(); 248 String16 result; 249 result += k_rlm; 250 result += k_rlo; 251 for (size_t i=0; i<source.size(); i++) { 252 char16_t c = s[i]; 253 switch(c) { 254 case ' ': result += k_pdf; 255 result += k_rlm; 256 result.append(&c, 1); 257 result += k_rlm; 258 result += k_rlo; 259 break; 260 default: result.append(&c, 1); 261 break; 262 } 263 } 264 result += k_pdf; 265 result += k_rlm; 266 return result; 267 } 268 269