1 2 /* 3 * Copyright (C) 2011 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package android.text.method; 19 20 import android.text.Selection; 21 import android.text.SpannableStringBuilder; 22 23 import java.text.BreakIterator; 24 import java.util.Locale; 25 26 /** 27 * Walks through cursor positions at word boundaries. Internally uses 28 * {@link BreakIterator#getWordInstance()}, and caches {@link CharSequence} 29 * for performance reasons. 30 * 31 * Also provides methods to determine word boundaries. 32 * {@hide} 33 */ 34 public class WordIterator implements Selection.PositionIterator { 35 // Size of the window for the word iterator, should be greater than the longest word's length 36 private static final int WINDOW_WIDTH = 50; 37 38 private String mString; 39 private int mOffsetShift; 40 41 private BreakIterator mIterator; 42 43 /** 44 * Constructs a WordIterator using the default locale. 45 */ 46 public WordIterator() { 47 this(Locale.getDefault()); 48 } 49 50 /** 51 * Constructs a new WordIterator for the specified locale. 52 * @param locale The locale to be used when analysing the text. 53 */ 54 public WordIterator(Locale locale) { 55 mIterator = BreakIterator.getWordInstance(locale); 56 } 57 58 public void setCharSequence(CharSequence charSequence, int start, int end) { 59 mOffsetShift = Math.max(0, start - WINDOW_WIDTH); 60 final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH); 61 62 if (charSequence instanceof SpannableStringBuilder) { 63 mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd); 64 } else { 65 mString = charSequence.subSequence(mOffsetShift, windowEnd).toString(); 66 } 67 mIterator.setText(mString); 68 } 69 70 /** {@inheritDoc} */ 71 public int preceding(int offset) { 72 int shiftedOffset = offset - mOffsetShift; 73 do { 74 shiftedOffset = mIterator.preceding(shiftedOffset); 75 if (shiftedOffset == BreakIterator.DONE) { 76 return BreakIterator.DONE; 77 } 78 if (isOnLetterOrDigit(shiftedOffset)) { 79 return shiftedOffset + mOffsetShift; 80 } 81 } while (true); 82 } 83 84 /** {@inheritDoc} */ 85 public int following(int offset) { 86 int shiftedOffset = offset - mOffsetShift; 87 do { 88 shiftedOffset = mIterator.following(shiftedOffset); 89 if (shiftedOffset == BreakIterator.DONE) { 90 return BreakIterator.DONE; 91 } 92 if (isAfterLetterOrDigit(shiftedOffset)) { 93 return shiftedOffset + mOffsetShift; 94 } 95 } while (true); 96 } 97 98 /** If <code>offset</code> is within a word, returns the index of the first character of that 99 * word, otherwise returns BreakIterator.DONE. 100 * 101 * The offsets that are considered to be part of a word are the indexes of its characters, 102 * <i>as well as</i> the index of its last character plus one. 103 * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned. 104 * 105 * Valid range for offset is [0..textLength] (note the inclusive upper bound). 106 * The returned value is within [0..offset] or BreakIterator.DONE. 107 * 108 * @throws IllegalArgumentException is offset is not valid. 109 */ 110 public int getBeginning(int offset) { 111 final int shiftedOffset = offset - mOffsetShift; 112 checkOffsetIsValid(shiftedOffset); 113 114 if (isOnLetterOrDigit(shiftedOffset)) { 115 if (mIterator.isBoundary(shiftedOffset)) { 116 return shiftedOffset + mOffsetShift; 117 } else { 118 return mIterator.preceding(shiftedOffset) + mOffsetShift; 119 } 120 } else { 121 if (isAfterLetterOrDigit(shiftedOffset)) { 122 return mIterator.preceding(shiftedOffset) + mOffsetShift; 123 } 124 } 125 return BreakIterator.DONE; 126 } 127 128 /** If <code>offset</code> is within a word, returns the index of the last character of that 129 * word plus one, otherwise returns BreakIterator.DONE. 130 * 131 * The offsets that are considered to be part of a word are the indexes of its characters, 132 * <i>as well as</i> the index of its last character plus one. 133 * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned. 134 * 135 * Valid range for offset is [0..textLength] (note the inclusive upper bound). 136 * The returned value is within [offset..textLength] or BreakIterator.DONE. 137 * 138 * @throws IllegalArgumentException is offset is not valid. 139 */ 140 public int getEnd(int offset) { 141 final int shiftedOffset = offset - mOffsetShift; 142 checkOffsetIsValid(shiftedOffset); 143 144 if (isAfterLetterOrDigit(shiftedOffset)) { 145 if (mIterator.isBoundary(shiftedOffset)) { 146 return shiftedOffset + mOffsetShift; 147 } else { 148 return mIterator.following(shiftedOffset) + mOffsetShift; 149 } 150 } else { 151 if (isOnLetterOrDigit(shiftedOffset)) { 152 return mIterator.following(shiftedOffset) + mOffsetShift; 153 } 154 } 155 return BreakIterator.DONE; 156 } 157 158 private boolean isAfterLetterOrDigit(int shiftedOffset) { 159 if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) { 160 final int codePoint = mString.codePointBefore(shiftedOffset); 161 if (Character.isLetterOrDigit(codePoint)) return true; 162 } 163 return false; 164 } 165 166 private boolean isOnLetterOrDigit(int shiftedOffset) { 167 if (shiftedOffset >= 0 && shiftedOffset < mString.length()) { 168 final int codePoint = mString.codePointAt(shiftedOffset); 169 if (Character.isLetterOrDigit(codePoint)) return true; 170 } 171 return false; 172 } 173 174 private void checkOffsetIsValid(int shiftedOffset) { 175 if (shiftedOffset < 0 || shiftedOffset > mString.length()) { 176 throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) + 177 ". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) + 178 "]"); 179 } 180 } 181 } 182