1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "ui/base/ime/chromeos/character_composer.h" 6 7 #include <algorithm> 8 #include <iterator> 9 10 #include "base/strings/utf_string_conversions.h" 11 #include "base/third_party/icu/icu_utf.h" 12 // Note for Gtk removal: gdkkeysyms.h only contains a set of 13 // '#define GDK_KeyName 0xNNNN' macros and does not #include any Gtk headers. 14 #include "third_party/gtk+/gdk/gdkkeysyms.h" 15 16 #include "ui/base/glib/glib_integers.h" 17 #include "ui/events/event.h" 18 #include "ui/events/keycodes/keyboard_codes.h" 19 20 // Note for Gtk removal: gtkimcontextsimpleseqs.h does not #include any Gtk 21 // headers and only contains one big guint16 array |gtk_compose_seqs_compact| 22 // which defines the main compose table. The table has internal linkage. 23 // The order of header inclusion is out of order because 24 // gtkimcontextsimpleseqs.h depends on guint16, which is defined in 25 // "ui/base/glib/glib_integers.h". 26 #include "third_party/gtk+/gtk/gtkimcontextsimpleseqs.h" 27 28 namespace { 29 30 // A black list for not composing dead keys. Once the key combination is listed 31 // below, the dead key won't work even when this is listed in 32 // gtkimcontextsimpleseqs.h. This only supports two keyevent sequenses. 33 // TODO(nona): Remove this hack. 34 const struct BlackListedDeadKey { 35 uint32 first_key; // target first key event. 36 uint32 second_key; // target second key event. 37 uint32 output_char; // the character to be inserted if the filter is matched. 38 bool consume; // true if the original key event will be consumed. 39 } kBlackListedDeadKeys[] = { 40 { GDK_KEY_dead_acute, GDK_KEY_m, GDK_KEY_apostrophe, false }, 41 { GDK_KEY_dead_acute, GDK_KEY_s, GDK_KEY_apostrophe, false }, 42 { GDK_KEY_dead_acute, GDK_KEY_t, GDK_KEY_apostrophe, false }, 43 { GDK_KEY_dead_acute, GDK_KEY_v, GDK_KEY_apostrophe, false }, 44 { GDK_KEY_dead_acute, GDK_KEY_dead_acute, GDK_KEY_apostrophe, true }, 45 }; 46 47 typedef std::vector<unsigned int> ComposeBufferType; 48 49 // An iterator class to apply std::lower_bound for composition table. 50 class SequenceIterator 51 : public std::iterator<std::random_access_iterator_tag, const uint16*> { 52 public: 53 SequenceIterator() : ptr_(NULL), stride_(0) {} 54 SequenceIterator(const uint16* ptr, int stride) 55 : ptr_(ptr), stride_(stride) {} 56 57 const uint16* ptr() const {return ptr_;} 58 int stride() const {return stride_;} 59 60 SequenceIterator& operator++() { 61 ptr_ += stride_; 62 return *this; 63 } 64 SequenceIterator& operator+=(int n) { 65 ptr_ += stride_*n; 66 return *this; 67 } 68 69 const uint16* operator*() const {return ptr_;} 70 71 private: 72 const uint16* ptr_; 73 int stride_; 74 }; 75 76 inline SequenceIterator operator+(const SequenceIterator& l, int r) { 77 return SequenceIterator(l) += r; 78 } 79 80 inline int operator-(const SequenceIterator& l, const SequenceIterator& r) { 81 const int d = l.ptr() - r.ptr(); 82 DCHECK(l.stride() == r.stride() && l.stride() > 0 && d%l.stride() == 0); 83 return d/l.stride(); 84 } 85 86 inline bool operator==(const SequenceIterator& l, const SequenceIterator& r) { 87 DCHECK(l.stride() == r.stride()); 88 return l.ptr() == r.ptr(); 89 } 90 91 inline bool operator!=(const SequenceIterator& l, const SequenceIterator& r) { 92 return !(l == r); 93 } 94 95 // A function to compare key value. 96 inline int CompareSequenceValue(unsigned int l, unsigned int r) { 97 return (l > r) ? 1 : ((l < r) ? -1 : 0); 98 } 99 100 // A template to make |CompareFunc| work like operator<. 101 // |CompareFunc| is required to implement a member function, 102 // int operator()(const ComposeBufferType& l, const uint16* r) const. 103 template<typename CompareFunc> 104 struct ComparatorAdoptor { 105 bool operator()(const ComposeBufferType& l, const uint16* r) const { 106 return CompareFunc()(l, r) == -1; 107 } 108 bool operator()(const uint16* l, const ComposeBufferType& r) const { 109 return CompareFunc()(r, l) == 1; 110 } 111 }; 112 113 class ComposeChecker { 114 public: 115 // This class does not take the ownership of |data|, |data| should be alive 116 // for the lifetime of the object. 117 // |data| is a pointer to the head of an array of 118 // length (|max_sequence_length| + 2)*|n_sequences|. 119 // Every (|max_sequence_length| + 2) elements of |data| represent an entry. 120 // First |max_sequence_length| elements of an entry is the sequecne which 121 // composes the character represented by the last two elements of the entry. 122 ComposeChecker(const uint16* data, int max_sequence_length, int n_sequences); 123 bool CheckSequence(const ComposeBufferType& sequence, 124 uint32* composed_character) const; 125 126 private: 127 struct CompareSequence { 128 int operator()(const ComposeBufferType& l, const uint16* r) const; 129 }; 130 131 // This class does not take the ownership of |data_|, 132 // the dtor does not delete |data_|. 133 const uint16* data_; 134 int max_sequence_length_; 135 int n_sequences_; 136 int row_stride_; 137 138 DISALLOW_COPY_AND_ASSIGN(ComposeChecker); 139 }; 140 141 ComposeChecker::ComposeChecker(const uint16* data, 142 int max_sequence_length, 143 int n_sequences) 144 : data_(data), 145 max_sequence_length_(max_sequence_length), 146 n_sequences_(n_sequences), 147 row_stride_(max_sequence_length + 2) { 148 } 149 150 bool ComposeChecker::CheckSequence(const ComposeBufferType& sequence, 151 uint32* composed_character) const { 152 const int sequence_length = sequence.size(); 153 if (sequence_length > max_sequence_length_) 154 return false; 155 // Find sequence in the table. 156 const SequenceIterator begin(data_, row_stride_); 157 const SequenceIterator end = begin + n_sequences_; 158 const SequenceIterator found = std::lower_bound( 159 begin, end, sequence, ComparatorAdoptor<CompareSequence>()); 160 if (found == end || CompareSequence()(sequence, *found) != 0) 161 return false; 162 163 if (sequence_length == max_sequence_length_ || 164 (*found)[sequence_length] == 0) { 165 // |found| is not partially matching. It's fully matching. 166 if (found + 1 == end || 167 CompareSequence()(sequence, *(found + 1)) != 0) { 168 // There is no composition longer than |found| which matches to 169 // |sequence|. 170 const uint32 value = ((*found)[max_sequence_length_] << 16) | 171 (*found)[max_sequence_length_ + 1]; 172 *composed_character = value; 173 } 174 } 175 return true; 176 } 177 178 int ComposeChecker::CompareSequence::operator()(const ComposeBufferType& l, 179 const uint16* r) const { 180 for(size_t i = 0; i < l.size(); ++i) { 181 const int compare_result = CompareSequenceValue(l[i], r[i]); 182 if(compare_result) 183 return compare_result; 184 } 185 return 0; 186 } 187 188 189 class ComposeCheckerWithCompactTable { 190 public: 191 // This class does not take the ownership of |data|, |data| should be alive 192 // for the lifetime of the object. 193 // First |index_size|*|index_stride| elements of |data| are an index table. 194 // Every |index_stride| elements of an index table are an index entry. 195 // If you are checking with a sequence of length N beginning with character C, 196 // you have to find an index entry whose first element is C, then get the N-th 197 // element of the index entry as the index. 198 // The index is pointing the element of |data| where the composition table for 199 // sequences of length N beginning with C is placed. 200 201 ComposeCheckerWithCompactTable(const uint16* data, 202 int max_sequence_length, 203 int index_size, 204 int index_stride); 205 bool CheckSequence(const ComposeBufferType& sequence, 206 uint32* composed_character) const; 207 208 private: 209 struct CompareSequenceFront { 210 int operator()(const ComposeBufferType& l, const uint16* r) const; 211 }; 212 struct CompareSequenceSkipFront { 213 int operator()(const ComposeBufferType& l, const uint16* r) const; 214 }; 215 216 // This class does not take the ownership of |data_|, 217 // the dtor does not delete |data_|. 218 const uint16* data_; 219 int max_sequence_length_; 220 int index_size_; 221 int index_stride_; 222 }; 223 224 ComposeCheckerWithCompactTable::ComposeCheckerWithCompactTable( 225 const uint16* data, 226 int max_sequence_length, 227 int index_size, 228 int index_stride) 229 : data_(data), 230 max_sequence_length_(max_sequence_length), 231 index_size_(index_size), 232 index_stride_(index_stride) { 233 } 234 235 bool ComposeCheckerWithCompactTable::CheckSequence( 236 const ComposeBufferType& sequence, 237 uint32* composed_character) const { 238 const int compose_length = sequence.size(); 239 if (compose_length > max_sequence_length_) 240 return false; 241 // Find corresponding index for the first keypress. 242 const SequenceIterator index_begin(data_, index_stride_); 243 const SequenceIterator index_end = index_begin + index_size_; 244 const SequenceIterator index = 245 std::lower_bound(index_begin, index_end, sequence, 246 ComparatorAdoptor<CompareSequenceFront>()); 247 if (index == index_end || CompareSequenceFront()(sequence, *index) != 0) 248 return false; 249 if (compose_length == 1) 250 return true; 251 // Check for composition sequences. 252 for (int length = compose_length - 1; length < max_sequence_length_; 253 ++length) { 254 const uint16* table = data_ + (*index)[length]; 255 const uint16* table_next = data_ + (*index)[length + 1]; 256 if (table_next > table) { 257 // There are composition sequences for this |length|. 258 const int row_stride = length + 1; 259 const int n_sequences = (table_next - table)/row_stride; 260 const SequenceIterator table_begin(table, row_stride); 261 const SequenceIterator table_end = table_begin + n_sequences; 262 const SequenceIterator found = 263 std::lower_bound(table_begin, table_end, sequence, 264 ComparatorAdoptor<CompareSequenceSkipFront>()); 265 if (found != table_end && 266 CompareSequenceSkipFront()(sequence, *found) == 0) { 267 if (length == compose_length - 1) // Exact match. 268 *composed_character = (*found)[length]; 269 return true; 270 } 271 } 272 } 273 return false; 274 } 275 276 int ComposeCheckerWithCompactTable::CompareSequenceFront::operator()( 277 const ComposeBufferType& l, const uint16* r) const { 278 return CompareSequenceValue(l[0], r[0]); 279 } 280 281 int ComposeCheckerWithCompactTable::CompareSequenceSkipFront::operator()( 282 const ComposeBufferType& l, const uint16* r) const { 283 for(size_t i = 1; i < l.size(); ++i) { 284 const int compare_result = CompareSequenceValue(l[i], r[i - 1]); 285 if(compare_result) 286 return compare_result; 287 } 288 return 0; 289 } 290 291 292 // Additional table. 293 294 // The difference between this and the default input method is the handling 295 // of C+acute - this method produces C WITH CEDILLA rather than C WITH ACUTE. 296 // For languages that use CCedilla and not acute, this is the preferred mapping, 297 // and is particularly important for pt_BR, where the us-intl keyboard is 298 // used extensively. 299 300 const uint16 cedilla_compose_seqs[] = { 301 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA 302 GDK_KEY_dead_acute, GDK_KEY_C, 0, 0, 0, 0x00C7, 303 // LATIN_SMALL_LETTER_C_WITH_CEDILLA 304 GDK_KEY_dead_acute, GDK_KEY_c, 0, 0, 0, 0x00E7, 305 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA 306 GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_C, 0, 0, 0x00C7, 307 // LATIN_SMALL_LETTER_C_WITH_CEDILLA 308 GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_c, 0, 0, 0x00E7, 309 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA 310 GDK_KEY_Multi_key, GDK_KEY_C, GDK_KEY_apostrophe, 0, 0, 0x00C7, 311 // LATIN_SMALL_LETTER_C_WITH_CEDILLA 312 GDK_KEY_Multi_key, GDK_KEY_c, GDK_KEY_apostrophe, 0, 0, 0x00E7, 313 }; 314 315 bool KeypressShouldBeIgnored(unsigned int keyval) { 316 switch(keyval) { 317 case GDK_KEY_Shift_L: 318 case GDK_KEY_Shift_R: 319 case GDK_KEY_Control_L: 320 case GDK_KEY_Control_R: 321 case GDK_KEY_Caps_Lock: 322 case GDK_KEY_Shift_Lock: 323 case GDK_KEY_Meta_L: 324 case GDK_KEY_Meta_R: 325 case GDK_KEY_Alt_L: 326 case GDK_KEY_Alt_R: 327 case GDK_KEY_Super_L: 328 case GDK_KEY_Super_R: 329 case GDK_KEY_Hyper_L: 330 case GDK_KEY_Hyper_R: 331 case GDK_KEY_Mode_switch: 332 case GDK_KEY_ISO_Level3_Shift: 333 return true; 334 default: 335 return false; 336 } 337 } 338 339 bool CheckCharacterComposeTable(const ComposeBufferType& sequence, 340 uint32* composed_character) { 341 // Check cedilla compose table. 342 const ComposeChecker kCedillaComposeChecker( 343 cedilla_compose_seqs, 4, arraysize(cedilla_compose_seqs)/(4 + 2)); 344 if (kCedillaComposeChecker.CheckSequence(sequence, composed_character)) 345 return true; 346 347 // Check main compose table. 348 const ComposeCheckerWithCompactTable kMainComposeChecker( 349 gtk_compose_seqs_compact, 5, 24, 6); 350 if (kMainComposeChecker.CheckSequence(sequence, composed_character)) 351 return true; 352 353 return false; 354 } 355 356 // Converts |character| to UTF16 string. 357 // Returns false when |character| is not a valid character. 358 bool UTF32CharacterToUTF16(uint32 character, base::string16* output) { 359 output->clear(); 360 // Reject invalid character. (e.g. codepoint greater than 0x10ffff) 361 if (!CBU_IS_UNICODE_CHAR(character)) 362 return false; 363 if (character) { 364 output->resize(CBU16_LENGTH(character)); 365 size_t i = 0; 366 CBU16_APPEND_UNSAFE(&(*output)[0], i, character); 367 } 368 return true; 369 } 370 371 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keyval|. 372 // -1 is returned when |keyval| cannot be a hexadecimal digit. 373 int KeyvalToHexDigit(unsigned int keyval) { 374 if (GDK_KEY_0 <= keyval && keyval <= GDK_KEY_9) 375 return keyval - GDK_KEY_0; 376 if (GDK_KEY_a <= keyval && keyval <= GDK_KEY_f) 377 return keyval - GDK_KEY_a + 10; 378 if (GDK_KEY_A <= keyval && keyval <= GDK_KEY_F) 379 return keyval - GDK_KEY_A + 10; 380 return -1; // |keyval| cannot be a hexadecimal digit. 381 } 382 383 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keycode|. 384 // -1 is returned when |keycode| cannot be a hexadecimal digit. 385 int KeycodeToHexDigit(unsigned int keycode) { 386 if (ui::VKEY_0 <= keycode && keycode <= ui::VKEY_9) 387 return keycode - ui::VKEY_0; 388 if (ui::VKEY_A <= keycode && keycode <= ui::VKEY_F) 389 return keycode - ui::VKEY_A + 10; 390 return -1; // |keycode| cannot be a hexadecimal digit. 391 } 392 393 } // namespace 394 395 namespace ui { 396 397 CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {} 398 399 CharacterComposer::~CharacterComposer() {} 400 401 void CharacterComposer::Reset() { 402 compose_buffer_.clear(); 403 composed_character_.clear(); 404 preedit_string_.clear(); 405 composition_mode_ = KEY_SEQUENCE_MODE; 406 } 407 408 bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) { 409 uint32 keyval = event.platform_keycode(); 410 if (!keyval || 411 (event.type() != ET_KEY_PRESSED && event.type() != ET_KEY_RELEASED)) 412 return false; 413 414 return FilterKeyPressInternal(keyval, event.key_code(), event.flags()); 415 } 416 417 418 bool CharacterComposer::FilterKeyPressInternal(unsigned int keyval, 419 unsigned int keycode, 420 int flags) { 421 composed_character_.clear(); 422 preedit_string_.clear(); 423 424 // We don't care about modifier key presses. 425 if(KeypressShouldBeIgnored(keyval)) 426 return false; 427 428 // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE. 429 // We don't care about other modifiers like Alt. When CapsLock is down, we 430 // do nothing because what we receive is Ctrl+Shift+u (not U). 431 if (keyval == GDK_KEY_U && (flags & EF_SHIFT_DOWN) && 432 (flags & EF_CONTROL_DOWN)) { 433 if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) { 434 // There is no ongoing composition. Let's switch to HEX_MODE. 435 composition_mode_ = HEX_MODE; 436 UpdatePreeditStringHexMode(); 437 return true; 438 } 439 } 440 441 // Filter key press in an appropriate manner. 442 switch (composition_mode_) { 443 case KEY_SEQUENCE_MODE: 444 return FilterKeyPressSequenceMode(keyval, flags); 445 case HEX_MODE: 446 return FilterKeyPressHexMode(keyval, keycode, flags); 447 default: 448 NOTREACHED(); 449 return false; 450 } 451 } 452 453 bool CharacterComposer::FilterKeyPressSequenceMode(unsigned int keyval, 454 int flags) { 455 DCHECK(composition_mode_ == KEY_SEQUENCE_MODE); 456 compose_buffer_.push_back(keyval); 457 458 if (compose_buffer_.size() == 2U) { 459 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kBlackListedDeadKeys); ++i) { 460 if (compose_buffer_[0] == kBlackListedDeadKeys[i].first_key && 461 compose_buffer_[1] == kBlackListedDeadKeys[i].second_key ) { 462 Reset(); 463 composed_character_.push_back(kBlackListedDeadKeys[i].output_char); 464 return kBlackListedDeadKeys[i].consume; 465 } 466 } 467 } 468 469 // Check compose table. 470 uint32 composed_character_utf32 = 0; 471 if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) { 472 // Key press is recognized as a part of composition. 473 if (composed_character_utf32 != 0) { 474 // We get a composed character. 475 compose_buffer_.clear(); 476 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_); 477 } 478 return true; 479 } 480 // Key press is not a part of composition. 481 compose_buffer_.pop_back(); // Remove the keypress added this time. 482 if (!compose_buffer_.empty()) { 483 compose_buffer_.clear(); 484 return true; 485 } 486 return false; 487 } 488 489 bool CharacterComposer::FilterKeyPressHexMode(unsigned int keyval, 490 unsigned int keycode, 491 int flags) { 492 DCHECK(composition_mode_ == HEX_MODE); 493 const size_t kMaxHexSequenceLength = 8; 494 int hex_digit = KeyvalToHexDigit(keyval); 495 if (hex_digit < 0) { 496 // With 101 keyboard, control + shift + 3 produces '#', but a user may 497 // have intended to type '3'. So, if a hexadecimal character was not found, 498 // suppose a user is holding shift key (and possibly control key, too) and 499 // try a character with modifier keys removed. 500 hex_digit = KeycodeToHexDigit(keycode); 501 } 502 503 if (keyval == GDK_KEY_Escape) { 504 // Cancel composition when ESC is pressed. 505 Reset(); 506 } else if (keyval == GDK_KEY_Return || keyval == GDK_KEY_KP_Enter || 507 keyval == GDK_KEY_ISO_Enter || 508 keyval == GDK_KEY_space || keyval == GDK_KEY_KP_Space) { 509 // Commit the composed character when Enter or space is pressed. 510 CommitHex(); 511 } else if (keyval == GDK_KEY_BackSpace) { 512 // Pop back the buffer when Backspace is pressed. 513 if (!compose_buffer_.empty()) { 514 compose_buffer_.pop_back(); 515 } else { 516 // If there is no character in |compose_buffer_|, cancel composition. 517 Reset(); 518 } 519 } else if (hex_digit >= 0 && 520 compose_buffer_.size() < kMaxHexSequenceLength) { 521 // Add the key to the buffer if it is a hex digit. 522 compose_buffer_.push_back(hex_digit); 523 } 524 525 UpdatePreeditStringHexMode(); 526 527 return true; 528 } 529 530 void CharacterComposer::CommitHex() { 531 DCHECK(composition_mode_ == HEX_MODE); 532 uint32 composed_character_utf32 = 0; 533 for (size_t i = 0; i != compose_buffer_.size(); ++i) { 534 const uint32 digit = compose_buffer_[i]; 535 DCHECK(0 <= digit && digit < 16); 536 composed_character_utf32 <<= 4; 537 composed_character_utf32 |= digit; 538 } 539 Reset(); 540 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_); 541 } 542 543 void CharacterComposer::UpdatePreeditStringHexMode() { 544 if (composition_mode_ != HEX_MODE) { 545 preedit_string_.clear(); 546 return; 547 } 548 std::string preedit_string_ascii("u"); 549 for (size_t i = 0; i != compose_buffer_.size(); ++i) { 550 const int digit = compose_buffer_[i]; 551 DCHECK(0 <= digit && digit < 16); 552 preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10)); 553 } 554 preedit_string_ = base::ASCIIToUTF16(preedit_string_ascii); 555 } 556 557 } // namespace ui 558