1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import com.android.inputmethod.keyboard.KeyDetector; 20 import com.android.inputmethod.keyboard.ProximityInfo; 21 22 import java.util.TreeMap; 23 24 public class SpellCheckerProximityInfo { 25 /* public for test */ 26 final public static int NUL = KeyDetector.NOT_A_CODE; 27 28 // This must be the same as MAX_PROXIMITY_CHARS_SIZE else it will not work inside 29 // native code - this value is passed at creation of the binary object and reused 30 // as the size of the passed array afterwards so they can't be different. 31 final public static int ROW_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE; 32 33 // The number of keys in a row of the grid used by the spell checker. 34 final public static int PROXIMITY_GRID_WIDTH = 11; 35 // The number of rows in the grid used by the spell checker. 36 final public static int PROXIMITY_GRID_HEIGHT = 3; 37 38 final private static int NOT_AN_INDEX = -1; 39 final public static int NOT_A_COORDINATE_PAIR = -1; 40 41 // Helper methods 42 final protected static void buildProximityIndices(final int[] proximity, 43 final TreeMap<Integer, Integer> indices) { 44 for (int i = 0; i < proximity.length; i += ROW_SIZE) { 45 if (NUL != proximity[i]) indices.put(proximity[i], i / ROW_SIZE); 46 } 47 } 48 final protected static int computeIndex(final int characterCode, 49 final TreeMap<Integer, Integer> indices) { 50 final Integer result = indices.get(characterCode); 51 if (null == result) return NOT_AN_INDEX; 52 return result; 53 } 54 55 private static class Latin { 56 // This is a map from the code point to the index in the PROXIMITY array. 57 // At the time the native code to read the binary dictionary needs the proximity info be 58 // passed as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input 59 // character. 60 // Since we need to build such an array, we want to be able to search in our big proximity 61 // data quickly by character, and a map is probably the best way to do this. 62 final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>(); 63 64 // The proximity here is the union of 65 // - the proximity for a QWERTY keyboard. 66 // - the proximity for an AZERTY keyboard. 67 // - the proximity for a QWERTZ keyboard. 68 // ...plus, add all characters in the ('a', 'e', 'i', 'o', 'u') set to each other. 69 // 70 // The reasoning behind this construction is, almost any alphabetic text we may want 71 // to spell check has been entered with one of the keyboards above. Also, specifically 72 // to English, many spelling errors consist of the last vowel of the word being wrong 73 // because in English vowels tend to merge with each other in pronunciation. 74 final static int[] PROXIMITY = { 75 // Proximity for row 1. This must have exactly ROW_SIZE entries for each letter, 76 // and exactly PROXIMITY_GRID_WIDTH letters for a row. Pad with NUL's. 77 // The number of rows must be exactly PROXIMITY_GRID_HEIGHT. 78 'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 79 'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 80 'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, 81 'r', 'e', 'd', 'f', 'g', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 82 't', 'r', 'f', 'g', 'h', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 83 'y', 't', 'g', 'h', 'j', 'u', 'a', 's', 'd', 'x', NUL, NUL, NUL, NUL, NUL, NUL, 84 'u', 'y', 'h', 'j', 'k', 'i', 'a', 'e', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 85 'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 86 'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 87 'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 88 NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 89 90 // Proximity for row 2. See comment above about size. 91 'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, 92 's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 93 'd', 'w', 's', 'x', 'c', 'v', 'f', 'r', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 94 'f', 'e', 'd', 'c', 'v', 'b', 'g', 't', 'r', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 95 'g', 'r', 'f', 'v', 'b', 'n', 'h', 'y', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 96 'h', 't', 'g', 'b', 'n', 'm', 'j', 'u', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 97 'j', 'y', 'h', 'n', 'm', 'k', 'i', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 98 'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 99 'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 100 NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 101 NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 102 103 // Proximity for row 3. See comment above about size. 104 'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL, 105 'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 106 'c', 'x', 's', 'd', 'f', 'v', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 107 'v', 'c', 'd', 'f', 'g', 'b', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 108 'b', 'v', 'f', 'g', 'h', 'n', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 109 'n', 'b', 'g', 'h', 'j', 'm', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 110 'm', 'n', 'h', 'j', 'k', 'l', 'o', 'p', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 111 NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 112 NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 113 NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 114 }; 115 static { 116 buildProximityIndices(PROXIMITY, INDICES); 117 } 118 static int getIndexOf(int characterCode) { 119 return computeIndex(characterCode, INDICES); 120 } 121 } 122 123 private static class Cyrillic { 124 final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>(); 125 // TODO: The following table is solely based on the keyboard layout. Consult with Russian 126 // speakers on commonly misspelled words/letters. 127 final static int[] PROXIMITY = { 128 // Proximity for row 1. This must have exactly ROW_SIZE entries for each letter, 129 // and exactly PROXIMITY_GRID_WIDTH letters for a row. Pad with NUL's. 130 // The number of rows must be exactly PROXIMITY_GRID_HEIGHT. 131 '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 132 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 133 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 134 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 135 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 136 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 137 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 138 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 139 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 140 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 141 '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 142 143 // Proximity for row 2. See comment above about size. 144 '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 145 '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 146 '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 147 '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 148 '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 149 '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 150 '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 151 '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 152 '', '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 153 '', '', '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 154 '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 155 156 // Proximity for row 3. See comment above about size. 157 '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 158 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 159 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 160 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 161 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 162 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 163 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 164 '', '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 165 '', '', '', '', '', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 166 NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 167 NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 168 }; 169 static { 170 buildProximityIndices(PROXIMITY, INDICES); 171 } 172 static int getIndexOf(int characterCode) { 173 return computeIndex(characterCode, INDICES); 174 } 175 } 176 177 public static int[] getProximityForScript(final int script) { 178 switch (script) { 179 case AndroidSpellCheckerService.SCRIPT_LATIN: 180 return Latin.PROXIMITY; 181 case AndroidSpellCheckerService.SCRIPT_CYRILLIC: 182 return Cyrillic.PROXIMITY; 183 default: 184 throw new RuntimeException("Wrong script supplied: " + script); 185 } 186 } 187 188 private static int getIndexOfCodeForScript(final int codePoint, final int script) { 189 switch (script) { 190 case AndroidSpellCheckerService.SCRIPT_LATIN: 191 return Latin.getIndexOf(codePoint); 192 case AndroidSpellCheckerService.SCRIPT_CYRILLIC: 193 return Cyrillic.getIndexOf(codePoint); 194 default: 195 throw new RuntimeException("Wrong script supplied: " + script); 196 } 197 } 198 199 // Returns (Y << 16) + X to avoid creating a temporary object. This is okay because 200 // X and Y are limited to PROXIMITY_GRID_WIDTH resp. PROXIMITY_GRID_HEIGHT which is very 201 // inferior to 1 << 16 202 // As an exception, this returns NOT_A_COORDINATE_PAIR if the key is not on the grid 203 public static int getXYForCodePointAndScript(final int codePoint, final int script) { 204 final int index = getIndexOfCodeForScript(codePoint, script); 205 if (NOT_AN_INDEX == index) return NOT_A_COORDINATE_PAIR; 206 final int y = index / PROXIMITY_GRID_WIDTH; 207 final int x = index % PROXIMITY_GRID_WIDTH; 208 if (y > PROXIMITY_GRID_HEIGHT) { 209 // Safety check, should be entirely useless 210 throw new RuntimeException("Wrong y coordinate in spell checker proximity"); 211 } 212 return (y << 16) + x; 213 } 214 } 215