1 /* 2 * Copyright (C) 2006 Lars Knoll <lars (at) trolltech.com> 3 * Copyright (C) 2007-2009 Torch Mobile, Inc. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 * Boston, MA 02111-1307, USA. 19 * 20 */ 21 22 #include "config.h" 23 #include "TextBreakIterator.h" 24 25 #include "PlatformString.h" 26 #include <wtf/StdLibExtras.h> 27 #include <wtf/unicode/Unicode.h> 28 29 using namespace WTF::Unicode; 30 31 namespace WebCore { 32 33 // Hack, not entirely correct 34 static inline bool isCharStop(UChar c) 35 { 36 CharCategory charCategory = category(c); 37 return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00)); 38 } 39 40 static inline bool isLineStop(UChar c) 41 { 42 return category(c) != Separator_Line; 43 } 44 45 static inline bool isSentenceStop(UChar c) 46 { 47 return isPunct(c); 48 } 49 50 class TextBreakIterator { 51 public: 52 void reset(const UChar* str, int len) 53 { 54 string = str; 55 length = len; 56 currentPos = 0; 57 } 58 virtual int first() = 0; 59 virtual int next() = 0; 60 virtual int previous() = 0; 61 int following(int position) 62 { 63 currentPos = position; 64 return next(); 65 } 66 int preceding(int position) 67 { 68 currentPos = position; 69 return previous(); 70 } 71 72 int currentPos; 73 const UChar* string; 74 int length; 75 }; 76 77 struct WordBreakIterator: TextBreakIterator { 78 virtual int first(); 79 virtual int next(); 80 virtual int previous(); 81 }; 82 83 struct CharBreakIterator: TextBreakIterator { 84 virtual int first(); 85 virtual int next(); 86 virtual int previous(); 87 }; 88 89 struct LineBreakIterator: TextBreakIterator { 90 virtual int first(); 91 virtual int next(); 92 virtual int previous(); 93 }; 94 95 struct SentenceBreakIterator : TextBreakIterator { 96 virtual int first(); 97 virtual int next(); 98 virtual int previous(); 99 }; 100 101 int WordBreakIterator::first() 102 { 103 currentPos = 0; 104 return currentPos; 105 } 106 107 int WordBreakIterator::next() 108 { 109 if (currentPos == length) { 110 currentPos = -1; 111 return currentPos; 112 } 113 bool haveSpace = false; 114 while (currentPos < length) { 115 if (haveSpace && !isSpace(string[currentPos])) 116 break; 117 if (isSpace(string[currentPos])) 118 haveSpace = true; 119 ++currentPos; 120 } 121 return currentPos; 122 } 123 124 int WordBreakIterator::previous() 125 { 126 if (!currentPos) { 127 currentPos = -1; 128 return currentPos; 129 } 130 bool haveSpace = false; 131 while (currentPos > 0) { 132 if (haveSpace && !isSpace(string[currentPos])) 133 break; 134 if (isSpace(string[currentPos])) 135 haveSpace = true; 136 --currentPos; 137 } 138 return currentPos; 139 } 140 141 int CharBreakIterator::first() 142 { 143 currentPos = 0; 144 return currentPos; 145 } 146 147 int CharBreakIterator::next() 148 { 149 if (currentPos >= length) 150 return -1; 151 ++currentPos; 152 while (currentPos < length && !isCharStop(string[currentPos])) 153 ++currentPos; 154 return currentPos; 155 } 156 157 int CharBreakIterator::previous() 158 { 159 if (currentPos <= 0) 160 return -1; 161 if (currentPos > length) 162 currentPos = length; 163 --currentPos; 164 while (currentPos > 0 && !isCharStop(string[currentPos])) 165 --currentPos; 166 return currentPos; 167 } 168 169 int LineBreakIterator::first() 170 { 171 currentPos = 0; 172 return currentPos; 173 } 174 175 int LineBreakIterator::next() 176 { 177 if (currentPos == length) { 178 currentPos = -1; 179 return currentPos; 180 } 181 bool haveSpace = false; 182 while (currentPos < length) { 183 if (haveSpace && !isLineStop(string[currentPos])) 184 break; 185 if (isLineStop(string[currentPos])) 186 haveSpace = true; 187 ++currentPos; 188 } 189 return currentPos; 190 } 191 192 int LineBreakIterator::previous() 193 { 194 if (!currentPos) { 195 currentPos = -1; 196 return currentPos; 197 } 198 bool haveSpace = false; 199 while (currentPos > 0) { 200 if (haveSpace && !isLineStop(string[currentPos])) 201 break; 202 if (isLineStop(string[currentPos])) 203 haveSpace = true; 204 --currentPos; 205 } 206 return currentPos; 207 } 208 209 int SentenceBreakIterator::first() 210 { 211 currentPos = 0; 212 return currentPos; 213 } 214 215 int SentenceBreakIterator::next() 216 { 217 if (currentPos == length) { 218 currentPos = -1; 219 return currentPos; 220 } 221 bool haveSpace = false; 222 while (currentPos < length) { 223 if (haveSpace && !isSentenceStop(string[currentPos])) 224 break; 225 if (isSentenceStop(string[currentPos])) 226 haveSpace = true; 227 ++currentPos; 228 } 229 return currentPos; 230 } 231 232 int SentenceBreakIterator::previous() 233 { 234 if (!currentPos) { 235 currentPos = -1; 236 return currentPos; 237 } 238 bool haveSpace = false; 239 while (currentPos > 0) { 240 if (haveSpace && !isSentenceStop(string[currentPos])) 241 break; 242 if (isSentenceStop(string[currentPos])) 243 haveSpace = true; 244 --currentPos; 245 } 246 return currentPos; 247 } 248 249 TextBreakIterator* wordBreakIterator(const UChar* string, int length) 250 { 251 DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ()); 252 iterator.reset(string, length); 253 return &iterator; 254 } 255 256 TextBreakIterator* characterBreakIterator(const UChar* string, int length) 257 { 258 DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ()); 259 iterator.reset(string, length); 260 return &iterator; 261 } 262 263 TextBreakIterator* lineBreakIterator(const UChar* string, int length) 264 { 265 DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ()); 266 iterator.reset(string, length); 267 return &iterator; 268 } 269 270 TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) 271 { 272 DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ()); 273 iterator.reset(string, length); 274 return &iterator; 275 } 276 277 int textBreakFirst(TextBreakIterator* breakIterator) 278 { 279 return breakIterator->first(); 280 } 281 282 int textBreakNext(TextBreakIterator* breakIterator) 283 { 284 return breakIterator->next(); 285 } 286 287 int textBreakPreceding(TextBreakIterator* breakIterator, int position) 288 { 289 return breakIterator->preceding(position); 290 } 291 292 int textBreakFollowing(TextBreakIterator* breakIterator, int position) 293 { 294 return breakIterator->following(position); 295 } 296 297 int textBreakCurrent(TextBreakIterator* breakIterator) 298 { 299 return breakIterator->currentPos; 300 } 301 302 bool isTextBreak(TextBreakIterator*, int) 303 { 304 return true; 305 } 306 307 TextBreakIterator* cursorMovementIterator(const UChar* string, int length) 308 { 309 return characterBreakIterator(string, length); 310 } 311 312 } // namespace WebCore 313