Home | History | Annotate | Download | only in aapt
      1 #include "pseudolocalize.h"
      2 
      3 using namespace std;
      4 
      5 // String basis to generate expansion
      6 static const String16 k_expansion_string = String16("one two three "
      7     "four five six seven eight nine ten eleven twelve thirteen "
      8     "fourteen fiveteen sixteen seventeen nineteen twenty");
      9 
     10 // Special unicode characters to override directionality of the words
     11 static const String16 k_rlm = String16("\xe2\x80\x8f");
     12 static const String16 k_rlo = String16("\xE2\x80\xae");
     13 static const String16 k_pdf = String16("\xE2\x80\xac");
     14 
     15 // Placeholder marks
     16 static const String16 k_placeholder_open = String16("\xc2\xbb");
     17 static const String16 k_placeholder_close = String16("\xc2\xab");
     18 
     19 static const char*
     20 pseudolocalize_char(const char16_t c)
     21 {
     22     switch (c) {
     23         case 'a':   return "\xc3\xa5";
     24         case 'b':   return "\xc9\x93";
     25         case 'c':   return "\xc3\xa7";
     26         case 'd':   return "\xc3\xb0";
     27         case 'e':   return "\xc3\xa9";
     28         case 'f':   return "\xc6\x92";
     29         case 'g':   return "\xc4\x9d";
     30         case 'h':   return "\xc4\xa5";
     31         case 'i':   return "\xc3\xae";
     32         case 'j':   return "\xc4\xb5";
     33         case 'k':   return "\xc4\xb7";
     34         case 'l':   return "\xc4\xbc";
     35         case 'm':   return "\xe1\xb8\xbf";
     36         case 'n':   return "\xc3\xb1";
     37         case 'o':   return "\xc3\xb6";
     38         case 'p':   return "\xc3\xbe";
     39         case 'q':   return "\x51";
     40         case 'r':   return "\xc5\x95";
     41         case 's':   return "\xc5\xa1";
     42         case 't':   return "\xc5\xa3";
     43         case 'u':   return "\xc3\xbb";
     44         case 'v':   return "\x56";
     45         case 'w':   return "\xc5\xb5";
     46         case 'x':   return "\xd1\x85";
     47         case 'y':   return "\xc3\xbd";
     48         case 'z':   return "\xc5\xbe";
     49         case 'A':   return "\xc3\x85";
     50         case 'B':   return "\xce\xb2";
     51         case 'C':   return "\xc3\x87";
     52         case 'D':   return "\xc3\x90";
     53         case 'E':   return "\xc3\x89";
     54         case 'G':   return "\xc4\x9c";
     55         case 'H':   return "\xc4\xa4";
     56         case 'I':   return "\xc3\x8e";
     57         case 'J':   return "\xc4\xb4";
     58         case 'K':   return "\xc4\xb6";
     59         case 'L':   return "\xc4\xbb";
     60         case 'M':   return "\xe1\xb8\xbe";
     61         case 'N':   return "\xc3\x91";
     62         case 'O':   return "\xc3\x96";
     63         case 'P':   return "\xc3\x9e";
     64         case 'Q':   return "\x71";
     65         case 'R':   return "\xc5\x94";
     66         case 'S':   return "\xc5\xa0";
     67         case 'T':   return "\xc5\xa2";
     68         case 'U':   return "\xc3\x9b";
     69         case 'V':   return "\xce\xbd";
     70         case 'W':   return "\xc5\xb4";
     71         case 'X':   return "\xc3\x97";
     72         case 'Y':   return "\xc3\x9d";
     73         case 'Z':   return "\xc5\xbd";
     74         case '!':   return "\xc2\xa1";
     75         case '?':   return "\xc2\xbf";
     76         case '$':   return "\xe2\x82\xac";
     77         default:    return NULL;
     78     }
     79 }
     80 
     81 static bool
     82 is_possible_normal_placeholder_end(const char16_t c) {
     83     switch (c) {
     84         case 's': return true;
     85         case 'S': return true;
     86         case 'c': return true;
     87         case 'C': return true;
     88         case 'd': return true;
     89         case 'o': return true;
     90         case 'x': return true;
     91         case 'X': return true;
     92         case 'f': return true;
     93         case 'e': return true;
     94         case 'E': return true;
     95         case 'g': return true;
     96         case 'G': return true;
     97         case 'a': return true;
     98         case 'A': return true;
     99         case 'b': return true;
    100         case 'B': return true;
    101         case 'h': return true;
    102         case 'H': return true;
    103         case '%': return true;
    104         case 'n': return true;
    105         default:  return false;
    106     }
    107 }
    108 
    109 String16
    110 pseudo_generate_expansion(const unsigned int length) {
    111     String16 result = k_expansion_string;
    112     const char16_t* s = result.string();
    113     if (result.size() < length) {
    114         result += String16(" ");
    115         result += pseudo_generate_expansion(length - result.size());
    116     } else {
    117         int ext = 0;
    118         // Should contain only whole words, so looking for a space
    119         for (unsigned int i = length + 1; i < result.size(); ++i) {
    120           ++ext;
    121           if (s[i] == ' ') {
    122             break;
    123           }
    124         }
    125         result.remove(length + ext, 0);
    126     }
    127     return result;
    128 }
    129 
    130 /**
    131  * Converts characters so they look like they've been localized.
    132  *
    133  * Note: This leaves escape sequences untouched so they can later be
    134  * processed by ResTable::collectString in the normal way.
    135  */
    136 String16
    137 pseudolocalize_string(const String16& source)
    138 {
    139     const char16_t* s = source.string();
    140     String16 result;
    141     const size_t I = source.size();
    142     for (size_t i=0; i<I; i++) {
    143         char16_t c = s[i];
    144         if (c == '\\') {
    145             // Escape syntax, no need to pseudolocalize
    146             if (i<I-1) {
    147                 result += String16("\\");
    148                 i++;
    149                 c = s[i];
    150                 switch (c) {
    151                     case 'u':
    152                         // this one takes up 5 chars
    153                         result += String16(s+i, 5);
    154                         i += 4;
    155                         break;
    156                     case 't':
    157                     case 'n':
    158                     case '#':
    159                     case '@':
    160                     case '?':
    161                     case '"':
    162                     case '\'':
    163                     case '\\':
    164                     default:
    165                         result.append(&c, 1);
    166                         break;
    167                 }
    168             } else {
    169                 result.append(&c, 1);
    170             }
    171         } else if (c == '%') {
    172             // Placeholder syntax, no need to pseudolocalize
    173             result += k_placeholder_open;
    174             bool end = false;
    175             result.append(&c, 1);
    176             while (!end && i < I) {
    177                 ++i;
    178                 c = s[i];
    179                 result.append(&c, 1);
    180                 if (is_possible_normal_placeholder_end(c)) {
    181                     end = true;
    182                 } else if (c == 't') {
    183                     ++i;
    184                     c = s[i];
    185                     result.append(&c, 1);
    186                     end = true;
    187                 }
    188             }
    189             result += k_placeholder_close;
    190         } else if (c == '<' || c == '&') {
    191             // html syntax, no need to pseudolocalize
    192             bool tag_closed = false;
    193             while (!tag_closed && i < I) {
    194                 if (c == '&') {
    195                     String16 escape_text;
    196                     escape_text.append(&c, 1);
    197                     bool end = false;
    198                     size_t htmlCodePos = i;
    199                     while (!end && htmlCodePos < I) {
    200                         ++htmlCodePos;
    201                         c = s[htmlCodePos];
    202                         escape_text.append(&c, 1);
    203                         // Valid html code
    204                         if (c == ';') {
    205                             end = true;
    206                             i = htmlCodePos;
    207                         }
    208                         // Wrong html code
    209                         else if (!((c == '#' ||
    210                                  (c >= 'a' && c <= 'z') ||
    211                                  (c >= 'A' && c <= 'Z') ||
    212                                  (c >= '0' && c <= '9')))) {
    213                             end = true;
    214                         }
    215                     }
    216                     result += escape_text;
    217                     if (escape_text != String16("&lt;")) {
    218                         tag_closed = true;
    219                     }
    220                     continue;
    221                 }
    222                 if (c == '>') {
    223                     tag_closed = true;
    224                     result.append(&c, 1);
    225                     continue;
    226                 }
    227                 result.append(&c, 1);
    228                 i++;
    229                 c = s[i];
    230             }
    231         } else {
    232             // This is a pure text that should be pseudolocalized
    233             const char* p = pseudolocalize_char(c);
    234             if (p != NULL) {
    235                 result += String16(p);
    236             } else {
    237                 result.append(&c, 1);
    238             }
    239         }
    240     }
    241     return result;
    242 }
    243 
    244 String16
    245 pseudobidi_string(const String16& source)
    246 {
    247     const char16_t* s = source.string();
    248     String16 result;
    249     result += k_rlm;
    250     result += k_rlo;
    251     for (size_t i=0; i<source.size(); i++) {
    252         char16_t c = s[i];
    253         switch(c) {
    254             case ' ': result += k_pdf;
    255                       result += k_rlm;
    256                       result.append(&c, 1);
    257                       result += k_rlm;
    258                       result += k_rlo;
    259                       break;
    260             default: result.append(&c, 1);
    261                      break;
    262         }
    263     }
    264     result += k_pdf;
    265     result += k_rlm;
    266     return result;
    267 }
    268 
    269