Home | History | Annotate | Download | only in spellcheck
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.spellcheck;
     18 
     19 import com.android.inputmethod.keyboard.KeyDetector;
     20 import com.android.inputmethod.keyboard.ProximityInfo;
     21 
     22 import java.util.TreeMap;
     23 
     24 public class SpellCheckerProximityInfo {
     25     /* public for test */
     26     final public static int NUL = KeyDetector.NOT_A_CODE;
     27 
     28     // This must be the same as MAX_PROXIMITY_CHARS_SIZE else it will not work inside
     29     // native code - this value is passed at creation of the binary object and reused
     30     // as the size of the passed array afterwards so they can't be different.
     31     final public static int ROW_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE;
     32 
     33     // The number of keys in a row of the grid used by the spell checker.
     34     final public static int PROXIMITY_GRID_WIDTH = 11;
     35     // The number of rows in the grid used by the spell checker.
     36     final public static int PROXIMITY_GRID_HEIGHT = 3;
     37 
     38     final private static int NOT_AN_INDEX = -1;
     39     final public static int NOT_A_COORDINATE_PAIR = -1;
     40 
     41     // Helper methods
     42     final protected static void buildProximityIndices(final int[] proximity,
     43             final TreeMap<Integer, Integer> indices) {
     44         for (int i = 0; i < proximity.length; i += ROW_SIZE) {
     45             if (NUL != proximity[i]) indices.put(proximity[i], i / ROW_SIZE);
     46         }
     47     }
     48     final protected static int computeIndex(final int characterCode,
     49             final TreeMap<Integer, Integer> indices) {
     50         final Integer result = indices.get(characterCode);
     51         if (null == result) return NOT_AN_INDEX;
     52         return result;
     53     }
     54 
     55     private static class Latin {
     56         // This is a map from the code point to the index in the PROXIMITY array.
     57         // At the time the native code to read the binary dictionary needs the proximity info be
     58         // passed as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input
     59         // character.
     60         // Since we need to build such an array, we want to be able to search in our big proximity
     61         // data quickly by character, and a map is probably the best way to do this.
     62         final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>();
     63 
     64         // The proximity here is the union of
     65         // - the proximity for a QWERTY keyboard.
     66         // - the proximity for an AZERTY keyboard.
     67         // - the proximity for a QWERTZ keyboard.
     68         // ...plus, add all characters in the ('a', 'e', 'i', 'o', 'u') set to each other.
     69         //
     70         // The reasoning behind this construction is, almost any alphabetic text we may want
     71         // to spell check has been entered with one of the keyboards above. Also, specifically
     72         // to English, many spelling errors consist of the last vowel of the word being wrong
     73         // because in English vowels tend to merge with each other in pronunciation.
     74         final static int[] PROXIMITY = {
     75             // Proximity for row 1. This must have exactly ROW_SIZE entries for each letter,
     76             // and exactly PROXIMITY_GRID_WIDTH letters for a row. Pad with NUL's.
     77             // The number of rows must be exactly PROXIMITY_GRID_HEIGHT.
     78             'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     79             'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     80             'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
     81             'r', 'e', 'd', 'f', 'g', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     82             't', 'r', 'f', 'g', 'h', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     83             'y', 't', 'g', 'h', 'j', 'u', 'a', 's', 'd', 'x', NUL, NUL, NUL, NUL, NUL, NUL,
     84             'u', 'y', 'h', 'j', 'k', 'i', 'a', 'e', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     85             'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     86             'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     87             'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     88             NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     89 
     90             // Proximity for row 2. See comment above about size.
     91             'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
     92             's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     93             'd', 'w', 's', 'x', 'c', 'v', 'f', 'r', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     94             'f', 'e', 'd', 'c', 'v', 'b', 'g', 't', 'r', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     95             'g', 'r', 'f', 'v', 'b', 'n', 'h', 'y', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     96             'h', 't', 'g', 'b', 'n', 'm', 'j', 'u', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     97             'j', 'y', 'h', 'n', 'm', 'k', 'i', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     98             'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
     99             'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    100             NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    101             NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    102 
    103             // Proximity for row 3. See comment above about size.
    104             'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL,
    105             'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    106             'c', 'x', 's', 'd', 'f', 'v', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    107             'v', 'c', 'd', 'f', 'g', 'b', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    108             'b', 'v', 'f', 'g', 'h', 'n', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    109             'n', 'b', 'g', 'h', 'j', 'm', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    110             'm', 'n', 'h', 'j', 'k', 'l', 'o', 'p', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    111             NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    112             NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    113             NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    114         };
    115         static {
    116             buildProximityIndices(PROXIMITY, INDICES);
    117         }
    118         static int getIndexOf(int characterCode) {
    119             return computeIndex(characterCode, INDICES);
    120         }
    121     }
    122 
    123     private static class Cyrillic {
    124         final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>();
    125         // TODO: The following table is solely based on the keyboard layout. Consult with Russian
    126         // speakers on commonly misspelled words/letters.
    127         final static int[] PROXIMITY = {
    128             // Proximity for row 1. This must have exactly ROW_SIZE entries for each letter,
    129             // and exactly PROXIMITY_GRID_WIDTH letters for a row. Pad with NUL's.
    130             // The number of rows must be exactly PROXIMITY_GRID_HEIGHT.
    131             '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    132             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    133             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    134             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    135             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    136             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    137             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    138             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    139             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    140             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    141             '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    142 
    143             // Proximity for row 2. See comment above about size.
    144             '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    145             '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    146             '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    147             '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    148             '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    149             '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    150             '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    151             '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    152             '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    153             '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    154             '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    155 
    156             // Proximity for row 3. See comment above about size.
    157             '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    158             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    159             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    160             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    161             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    162             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    163             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    164             '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    165             '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    166             NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    167             NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
    168         };
    169         static {
    170             buildProximityIndices(PROXIMITY, INDICES);
    171         }
    172         static int getIndexOf(int characterCode) {
    173             return computeIndex(characterCode, INDICES);
    174         }
    175     }
    176 
    177     public static int[] getProximityForScript(final int script) {
    178         switch (script) {
    179             case AndroidSpellCheckerService.SCRIPT_LATIN:
    180                 return Latin.PROXIMITY;
    181             case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
    182                 return Cyrillic.PROXIMITY;
    183             default:
    184                 throw new RuntimeException("Wrong script supplied: " + script);
    185         }
    186     }
    187 
    188     private static int getIndexOfCodeForScript(final int codePoint, final int script) {
    189         switch (script) {
    190             case AndroidSpellCheckerService.SCRIPT_LATIN:
    191                 return Latin.getIndexOf(codePoint);
    192             case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
    193                 return Cyrillic.getIndexOf(codePoint);
    194             default:
    195                 throw new RuntimeException("Wrong script supplied: " + script);
    196         }
    197     }
    198 
    199     // Returns (Y << 16) + X to avoid creating a temporary object. This is okay because
    200     // X and Y are limited to PROXIMITY_GRID_WIDTH resp. PROXIMITY_GRID_HEIGHT which is very
    201     // inferior to 1 << 16
    202     // As an exception, this returns NOT_A_COORDINATE_PAIR if the key is not on the grid
    203     public static int getXYForCodePointAndScript(final int codePoint, final int script) {
    204         final int index = getIndexOfCodeForScript(codePoint, script);
    205         if (NOT_AN_INDEX == index) return NOT_A_COORDINATE_PAIR;
    206         final int y = index / PROXIMITY_GRID_WIDTH;
    207         final int x = index % PROXIMITY_GRID_WIDTH;
    208         if (y > PROXIMITY_GRID_HEIGHT) {
    209             // Safety check, should be entirely useless
    210             throw new RuntimeException("Wrong y coordinate in spell checker proximity");
    211         }
    212         return (y << 16) + x;
    213     }
    214 }
    215