Home | History | Annotate | Download | only in dec
      1 /* Copyright 2015 Google Inc. All Rights Reserved.
      2 
      3    Distributed under MIT license.
      4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 package org.brotli.dec;
      8 
      9 import static org.brotli.dec.WordTransformType.IDENTITY;
     10 import static org.brotli.dec.WordTransformType.OMIT_FIRST_1;
     11 import static org.brotli.dec.WordTransformType.OMIT_FIRST_2;
     12 import static org.brotli.dec.WordTransformType.OMIT_FIRST_3;
     13 import static org.brotli.dec.WordTransformType.OMIT_FIRST_4;
     14 import static org.brotli.dec.WordTransformType.OMIT_FIRST_5;
     15 import static org.brotli.dec.WordTransformType.OMIT_FIRST_6;
     16 import static org.brotli.dec.WordTransformType.OMIT_FIRST_7;
     17 import static org.brotli.dec.WordTransformType.OMIT_FIRST_9;
     18 import static org.brotli.dec.WordTransformType.OMIT_LAST_1;
     19 import static org.brotli.dec.WordTransformType.OMIT_LAST_2;
     20 import static org.brotli.dec.WordTransformType.OMIT_LAST_3;
     21 import static org.brotli.dec.WordTransformType.OMIT_LAST_4;
     22 import static org.brotli.dec.WordTransformType.OMIT_LAST_5;
     23 import static org.brotli.dec.WordTransformType.OMIT_LAST_6;
     24 import static org.brotli.dec.WordTransformType.OMIT_LAST_7;
     25 import static org.brotli.dec.WordTransformType.OMIT_LAST_8;
     26 import static org.brotli.dec.WordTransformType.OMIT_LAST_9;
     27 import static org.brotli.dec.WordTransformType.UPPERCASE_ALL;
     28 import static org.brotli.dec.WordTransformType.UPPERCASE_FIRST;
     29 
     30 /**
     31  * Transformations on dictionary words.
     32  */
     33 final class Transform {
     34 
     35   private final byte[] prefix;
     36   private final int type;
     37   private final byte[] suffix;
     38 
     39   Transform(String prefix, int type, String suffix) {
     40     this.prefix = readUniBytes(prefix);
     41     this.type = type;
     42     this.suffix = readUniBytes(suffix);
     43   }
     44 
     45   static byte[] readUniBytes(String uniBytes) {
     46     byte[] result = new byte[uniBytes.length()];
     47     for (int i = 0; i < result.length; ++i) {
     48       result[i] = (byte) uniBytes.charAt(i);
     49     }
     50     return result;
     51   }
     52 
     53   static final Transform[] TRANSFORMS = {
     54       new Transform("", IDENTITY, ""),
     55       new Transform("", IDENTITY, " "),
     56       new Transform(" ", IDENTITY, " "),
     57       new Transform("", OMIT_FIRST_1, ""),
     58       new Transform("", UPPERCASE_FIRST, " "),
     59       new Transform("", IDENTITY, " the "),
     60       new Transform(" ", IDENTITY, ""),
     61       new Transform("s ", IDENTITY, " "),
     62       new Transform("", IDENTITY, " of "),
     63       new Transform("", UPPERCASE_FIRST, ""),
     64       new Transform("", IDENTITY, " and "),
     65       new Transform("", OMIT_FIRST_2, ""),
     66       new Transform("", OMIT_LAST_1, ""),
     67       new Transform(", ", IDENTITY, " "),
     68       new Transform("", IDENTITY, ", "),
     69       new Transform(" ", UPPERCASE_FIRST, " "),
     70       new Transform("", IDENTITY, " in "),
     71       new Transform("", IDENTITY, " to "),
     72       new Transform("e ", IDENTITY, " "),
     73       new Transform("", IDENTITY, "\""),
     74       new Transform("", IDENTITY, "."),
     75       new Transform("", IDENTITY, "\">"),
     76       new Transform("", IDENTITY, "\n"),
     77       new Transform("", OMIT_LAST_3, ""),
     78       new Transform("", IDENTITY, "]"),
     79       new Transform("", IDENTITY, " for "),
     80       new Transform("", OMIT_FIRST_3, ""),
     81       new Transform("", OMIT_LAST_2, ""),
     82       new Transform("", IDENTITY, " a "),
     83       new Transform("", IDENTITY, " that "),
     84       new Transform(" ", UPPERCASE_FIRST, ""),
     85       new Transform("", IDENTITY, ". "),
     86       new Transform(".", IDENTITY, ""),
     87       new Transform(" ", IDENTITY, ", "),
     88       new Transform("", OMIT_FIRST_4, ""),
     89       new Transform("", IDENTITY, " with "),
     90       new Transform("", IDENTITY, "'"),
     91       new Transform("", IDENTITY, " from "),
     92       new Transform("", IDENTITY, " by "),
     93       new Transform("", OMIT_FIRST_5, ""),
     94       new Transform("", OMIT_FIRST_6, ""),
     95       new Transform(" the ", IDENTITY, ""),
     96       new Transform("", OMIT_LAST_4, ""),
     97       new Transform("", IDENTITY, ". The "),
     98       new Transform("", UPPERCASE_ALL, ""),
     99       new Transform("", IDENTITY, " on "),
    100       new Transform("", IDENTITY, " as "),
    101       new Transform("", IDENTITY, " is "),
    102       new Transform("", OMIT_LAST_7, ""),
    103       new Transform("", OMIT_LAST_1, "ing "),
    104       new Transform("", IDENTITY, "\n\t"),
    105       new Transform("", IDENTITY, ":"),
    106       new Transform(" ", IDENTITY, ". "),
    107       new Transform("", IDENTITY, "ed "),
    108       new Transform("", OMIT_FIRST_9, ""),
    109       new Transform("", OMIT_FIRST_7, ""),
    110       new Transform("", OMIT_LAST_6, ""),
    111       new Transform("", IDENTITY, "("),
    112       new Transform("", UPPERCASE_FIRST, ", "),
    113       new Transform("", OMIT_LAST_8, ""),
    114       new Transform("", IDENTITY, " at "),
    115       new Transform("", IDENTITY, "ly "),
    116       new Transform(" the ", IDENTITY, " of "),
    117       new Transform("", OMIT_LAST_5, ""),
    118       new Transform("", OMIT_LAST_9, ""),
    119       new Transform(" ", UPPERCASE_FIRST, ", "),
    120       new Transform("", UPPERCASE_FIRST, "\""),
    121       new Transform(".", IDENTITY, "("),
    122       new Transform("", UPPERCASE_ALL, " "),
    123       new Transform("", UPPERCASE_FIRST, "\">"),
    124       new Transform("", IDENTITY, "=\""),
    125       new Transform(" ", IDENTITY, "."),
    126       new Transform(".com/", IDENTITY, ""),
    127       new Transform(" the ", IDENTITY, " of the "),
    128       new Transform("", UPPERCASE_FIRST, "'"),
    129       new Transform("", IDENTITY, ". This "),
    130       new Transform("", IDENTITY, ","),
    131       new Transform(".", IDENTITY, " "),
    132       new Transform("", UPPERCASE_FIRST, "("),
    133       new Transform("", UPPERCASE_FIRST, "."),
    134       new Transform("", IDENTITY, " not "),
    135       new Transform(" ", IDENTITY, "=\""),
    136       new Transform("", IDENTITY, "er "),
    137       new Transform(" ", UPPERCASE_ALL, " "),
    138       new Transform("", IDENTITY, "al "),
    139       new Transform(" ", UPPERCASE_ALL, ""),
    140       new Transform("", IDENTITY, "='"),
    141       new Transform("", UPPERCASE_ALL, "\""),
    142       new Transform("", UPPERCASE_FIRST, ". "),
    143       new Transform(" ", IDENTITY, "("),
    144       new Transform("", IDENTITY, "ful "),
    145       new Transform(" ", UPPERCASE_FIRST, ". "),
    146       new Transform("", IDENTITY, "ive "),
    147       new Transform("", IDENTITY, "less "),
    148       new Transform("", UPPERCASE_ALL, "'"),
    149       new Transform("", IDENTITY, "est "),
    150       new Transform(" ", UPPERCASE_FIRST, "."),
    151       new Transform("", UPPERCASE_ALL, "\">"),
    152       new Transform(" ", IDENTITY, "='"),
    153       new Transform("", UPPERCASE_FIRST, ","),
    154       new Transform("", IDENTITY, "ize "),
    155       new Transform("", UPPERCASE_ALL, "."),
    156       new Transform("\u00c2\u00a0", IDENTITY, ""),
    157       new Transform(" ", IDENTITY, ","),
    158       new Transform("", UPPERCASE_FIRST, "=\""),
    159       new Transform("", UPPERCASE_ALL, "=\""),
    160       new Transform("", IDENTITY, "ous "),
    161       new Transform("", UPPERCASE_ALL, ", "),
    162       new Transform("", UPPERCASE_FIRST, "='"),
    163       new Transform(" ", UPPERCASE_FIRST, ","),
    164       new Transform(" ", UPPERCASE_ALL, "=\""),
    165       new Transform(" ", UPPERCASE_ALL, ", "),
    166       new Transform("", UPPERCASE_ALL, ","),
    167       new Transform("", UPPERCASE_ALL, "("),
    168       new Transform("", UPPERCASE_ALL, ". "),
    169       new Transform(" ", UPPERCASE_ALL, "."),
    170       new Transform("", UPPERCASE_ALL, "='"),
    171       new Transform(" ", UPPERCASE_ALL, ". "),
    172       new Transform(" ", UPPERCASE_FIRST, "=\""),
    173       new Transform(" ", UPPERCASE_ALL, "='"),
    174       new Transform(" ", UPPERCASE_FIRST, "='")
    175   };
    176 
    177   static int transformDictionaryWord(byte[] dst, int dstOffset, byte[] word, int wordOffset,
    178       int len, Transform transform) {
    179     int offset = dstOffset;
    180 
    181     // Copy prefix.
    182     byte[] string = transform.prefix;
    183     int tmp = string.length;
    184     int i = 0;
    185     // In most cases tmp < 10 -> no benefits from System.arrayCopy
    186     while (i < tmp) {
    187       dst[offset++] = string[i++];
    188     }
    189 
    190     // Copy trimmed word.
    191     int op = transform.type;
    192     tmp = WordTransformType.getOmitFirst(op);
    193     if (tmp > len) {
    194       tmp = len;
    195     }
    196     wordOffset += tmp;
    197     len -= tmp;
    198     len -= WordTransformType.getOmitLast(op);
    199     i = len;
    200     while (i > 0) {
    201       dst[offset++] = word[wordOffset++];
    202       i--;
    203     }
    204 
    205     if (op == UPPERCASE_ALL || op == UPPERCASE_FIRST) {
    206       int uppercaseOffset = offset - len;
    207       if (op == UPPERCASE_FIRST) {
    208         len = 1;
    209       }
    210       while (len > 0) {
    211         tmp = dst[uppercaseOffset] & 0xFF;
    212         if (tmp < 0xc0) {
    213           if (tmp >= 'a' && tmp <= 'z') {
    214             dst[uppercaseOffset] ^= (byte) 32;
    215           }
    216           uppercaseOffset += 1;
    217           len -= 1;
    218         } else if (tmp < 0xe0) {
    219           dst[uppercaseOffset + 1] ^= (byte) 32;
    220           uppercaseOffset += 2;
    221           len -= 2;
    222         } else {
    223           dst[uppercaseOffset + 2] ^= (byte) 5;
    224           uppercaseOffset += 3;
    225           len -= 3;
    226         }
    227       }
    228     }
    229 
    230     // Copy suffix.
    231     string = transform.suffix;
    232     tmp = string.length;
    233     i = 0;
    234     while (i < tmp) {
    235       dst[offset++] = string[i++];
    236     }
    237 
    238     return offset - dstOffset;
    239   }
    240 }
    241