1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "ui/base/ime/chromeos/character_composer.h" 6 7 #include <X11/Xlib.h> 8 #include <X11/Xutil.h> 9 10 #include <algorithm> 11 #include <iterator> 12 13 #include "base/strings/utf_string_conversions.h" 14 #include "base/third_party/icu/icu_utf.h" 15 // Note for Gtk removal: gdkkeysyms.h only contains a set of 16 // '#define GDK_KeyName 0xNNNN' macros and does not #include any Gtk headers. 17 #include "third_party/gtk+/gdk/gdkkeysyms.h" 18 #include "ui/base/glib/glib_integers.h" 19 #include "ui/events/event.h" 20 #include "ui/events/event_constants.h" 21 #include "ui/gfx/x/x11_types.h" 22 23 // Note for Gtk removal: gtkimcontextsimpleseqs.h does not #include any Gtk 24 // headers and only contains one big guint16 array |gtk_compose_seqs_compact| 25 // which defines the main compose table. The table has internal linkage. 26 // The order of header inclusion is out of order because 27 // gtkimcontextsimpleseqs.h depends on guint16, which is defined in 28 // "ui/base/glib/glib_integers.h". 29 #include "third_party/gtk+/gtk/gtkimcontextsimpleseqs.h" 30 31 namespace { 32 33 // A black list for not composing dead keys. Once the key combination is listed 34 // below, the dead key won't work even when this is listed in 35 // gtkimcontextsimpleseqs.h. This only supports two keyevent sequenses. 36 // TODO(nona): Remove this hack. 37 const struct BlackListedDeadKey { 38 uint32 first_key; // target first key event. 39 uint32 second_key; // target second key event. 40 uint32 output_char; // the character to be inserted if the filter is matched. 41 bool consume; // true if the original key event will be consumed. 42 } kBlackListedDeadKeys[] = { 43 { GDK_KEY_dead_acute, GDK_KEY_m, GDK_KEY_apostrophe, false }, 44 { GDK_KEY_dead_acute, GDK_KEY_s, GDK_KEY_apostrophe, false }, 45 { GDK_KEY_dead_acute, GDK_KEY_t, GDK_KEY_apostrophe, false }, 46 { GDK_KEY_dead_acute, GDK_KEY_v, GDK_KEY_apostrophe, false }, 47 { GDK_KEY_dead_acute, GDK_KEY_dead_acute, GDK_KEY_apostrophe, true }, 48 }; 49 50 typedef std::vector<unsigned int> ComposeBufferType; 51 52 // An iterator class to apply std::lower_bound for composition table. 53 class SequenceIterator 54 : public std::iterator<std::random_access_iterator_tag, const uint16*> { 55 public: 56 SequenceIterator() : ptr_(NULL), stride_(0) {} 57 SequenceIterator(const uint16* ptr, int stride) 58 : ptr_(ptr), stride_(stride) {} 59 60 const uint16* ptr() const {return ptr_;} 61 int stride() const {return stride_;} 62 63 SequenceIterator& operator++() { 64 ptr_ += stride_; 65 return *this; 66 } 67 SequenceIterator& operator+=(int n) { 68 ptr_ += stride_*n; 69 return *this; 70 } 71 72 const uint16* operator*() const {return ptr_;} 73 74 private: 75 const uint16* ptr_; 76 int stride_; 77 }; 78 79 inline SequenceIterator operator+(const SequenceIterator& l, int r) { 80 return SequenceIterator(l) += r; 81 } 82 83 inline int operator-(const SequenceIterator& l, const SequenceIterator& r) { 84 const int d = l.ptr() - r.ptr(); 85 DCHECK(l.stride() == r.stride() && l.stride() > 0 && d%l.stride() == 0); 86 return d/l.stride(); 87 } 88 89 inline bool operator==(const SequenceIterator& l, const SequenceIterator& r) { 90 DCHECK(l.stride() == r.stride()); 91 return l.ptr() == r.ptr(); 92 } 93 94 inline bool operator!=(const SequenceIterator& l, const SequenceIterator& r) { 95 return !(l == r); 96 } 97 98 // A function to compare key value. 99 inline int CompareSequenceValue(unsigned int l, unsigned int r) { 100 return (l > r) ? 1 : ((l < r) ? -1 : 0); 101 } 102 103 // A template to make |CompareFunc| work like operator<. 104 // |CompareFunc| is required to implement a member function, 105 // int operator()(const ComposeBufferType& l, const uint16* r) const. 106 template<typename CompareFunc> 107 struct ComparatorAdoptor { 108 bool operator()(const ComposeBufferType& l, const uint16* r) const { 109 return CompareFunc()(l, r) == -1; 110 } 111 bool operator()(const uint16* l, const ComposeBufferType& r) const { 112 return CompareFunc()(r, l) == 1; 113 } 114 }; 115 116 class ComposeChecker { 117 public: 118 // This class does not take the ownership of |data|, |data| should be alive 119 // for the lifetime of the object. 120 // |data| is a pointer to the head of an array of 121 // length (|max_sequence_length| + 2)*|n_sequences|. 122 // Every (|max_sequence_length| + 2) elements of |data| represent an entry. 123 // First |max_sequence_length| elements of an entry is the sequecne which 124 // composes the character represented by the last two elements of the entry. 125 ComposeChecker(const uint16* data, int max_sequence_length, int n_sequences); 126 bool CheckSequence(const ComposeBufferType& sequence, 127 uint32* composed_character) const; 128 129 private: 130 struct CompareSequence { 131 int operator()(const ComposeBufferType& l, const uint16* r) const; 132 }; 133 134 // This class does not take the ownership of |data_|, 135 // the dtor does not delete |data_|. 136 const uint16* data_; 137 int max_sequence_length_; 138 int n_sequences_; 139 int row_stride_; 140 141 DISALLOW_COPY_AND_ASSIGN(ComposeChecker); 142 }; 143 144 ComposeChecker::ComposeChecker(const uint16* data, 145 int max_sequence_length, 146 int n_sequences) 147 : data_(data), 148 max_sequence_length_(max_sequence_length), 149 n_sequences_(n_sequences), 150 row_stride_(max_sequence_length + 2) { 151 } 152 153 bool ComposeChecker::CheckSequence(const ComposeBufferType& sequence, 154 uint32* composed_character) const { 155 const int sequence_length = sequence.size(); 156 if (sequence_length > max_sequence_length_) 157 return false; 158 // Find sequence in the table. 159 const SequenceIterator begin(data_, row_stride_); 160 const SequenceIterator end = begin + n_sequences_; 161 const SequenceIterator found = std::lower_bound( 162 begin, end, sequence, ComparatorAdoptor<CompareSequence>()); 163 if (found == end || CompareSequence()(sequence, *found) != 0) 164 return false; 165 166 if (sequence_length == max_sequence_length_ || 167 (*found)[sequence_length] == 0) { 168 // |found| is not partially matching. It's fully matching. 169 if (found + 1 == end || 170 CompareSequence()(sequence, *(found + 1)) != 0) { 171 // There is no composition longer than |found| which matches to 172 // |sequence|. 173 const uint32 value = ((*found)[max_sequence_length_] << 16) | 174 (*found)[max_sequence_length_ + 1]; 175 *composed_character = value; 176 } 177 } 178 return true; 179 } 180 181 int ComposeChecker::CompareSequence::operator()(const ComposeBufferType& l, 182 const uint16* r) const { 183 for(size_t i = 0; i < l.size(); ++i) { 184 const int compare_result = CompareSequenceValue(l[i], r[i]); 185 if(compare_result) 186 return compare_result; 187 } 188 return 0; 189 } 190 191 192 class ComposeCheckerWithCompactTable { 193 public: 194 // This class does not take the ownership of |data|, |data| should be alive 195 // for the lifetime of the object. 196 // First |index_size|*|index_stride| elements of |data| are an index table. 197 // Every |index_stride| elements of an index table are an index entry. 198 // If you are checking with a sequence of length N beginning with character C, 199 // you have to find an index entry whose first element is C, then get the N-th 200 // element of the index entry as the index. 201 // The index is pointing the element of |data| where the composition table for 202 // sequences of length N beginning with C is placed. 203 204 ComposeCheckerWithCompactTable(const uint16* data, 205 int max_sequence_length, 206 int index_size, 207 int index_stride); 208 bool CheckSequence(const ComposeBufferType& sequence, 209 uint32* composed_character) const; 210 211 private: 212 struct CompareSequenceFront { 213 int operator()(const ComposeBufferType& l, const uint16* r) const; 214 }; 215 struct CompareSequenceSkipFront { 216 int operator()(const ComposeBufferType& l, const uint16* r) const; 217 }; 218 219 // This class does not take the ownership of |data_|, 220 // the dtor does not delete |data_|. 221 const uint16* data_; 222 int max_sequence_length_; 223 int index_size_; 224 int index_stride_; 225 }; 226 227 ComposeCheckerWithCompactTable::ComposeCheckerWithCompactTable( 228 const uint16* data, 229 int max_sequence_length, 230 int index_size, 231 int index_stride) 232 : data_(data), 233 max_sequence_length_(max_sequence_length), 234 index_size_(index_size), 235 index_stride_(index_stride) { 236 } 237 238 bool ComposeCheckerWithCompactTable::CheckSequence( 239 const ComposeBufferType& sequence, 240 uint32* composed_character) const { 241 const int compose_length = sequence.size(); 242 if (compose_length > max_sequence_length_) 243 return false; 244 // Find corresponding index for the first keypress. 245 const SequenceIterator index_begin(data_, index_stride_); 246 const SequenceIterator index_end = index_begin + index_size_; 247 const SequenceIterator index = 248 std::lower_bound(index_begin, index_end, sequence, 249 ComparatorAdoptor<CompareSequenceFront>()); 250 if (index == index_end || CompareSequenceFront()(sequence, *index) != 0) 251 return false; 252 if (compose_length == 1) 253 return true; 254 // Check for composition sequences. 255 for (int length = compose_length - 1; length < max_sequence_length_; 256 ++length) { 257 const uint16* table = data_ + (*index)[length]; 258 const uint16* table_next = data_ + (*index)[length + 1]; 259 if (table_next > table) { 260 // There are composition sequences for this |length|. 261 const int row_stride = length + 1; 262 const int n_sequences = (table_next - table)/row_stride; 263 const SequenceIterator table_begin(table, row_stride); 264 const SequenceIterator table_end = table_begin + n_sequences; 265 const SequenceIterator found = 266 std::lower_bound(table_begin, table_end, sequence, 267 ComparatorAdoptor<CompareSequenceSkipFront>()); 268 if (found != table_end && 269 CompareSequenceSkipFront()(sequence, *found) == 0) { 270 if (length == compose_length - 1) // Exact match. 271 *composed_character = (*found)[length]; 272 return true; 273 } 274 } 275 } 276 return false; 277 } 278 279 int ComposeCheckerWithCompactTable::CompareSequenceFront::operator()( 280 const ComposeBufferType& l, const uint16* r) const { 281 return CompareSequenceValue(l[0], r[0]); 282 } 283 284 int ComposeCheckerWithCompactTable::CompareSequenceSkipFront::operator()( 285 const ComposeBufferType& l, const uint16* r) const { 286 for(size_t i = 1; i < l.size(); ++i) { 287 const int compare_result = CompareSequenceValue(l[i], r[i - 1]); 288 if(compare_result) 289 return compare_result; 290 } 291 return 0; 292 } 293 294 295 // Additional table. 296 297 // The difference between this and the default input method is the handling 298 // of C+acute - this method produces C WITH CEDILLA rather than C WITH ACUTE. 299 // For languages that use CCedilla and not acute, this is the preferred mapping, 300 // and is particularly important for pt_BR, where the us-intl keyboard is 301 // used extensively. 302 303 const uint16 cedilla_compose_seqs[] = { 304 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA 305 GDK_KEY_dead_acute, GDK_KEY_C, 0, 0, 0, 0x00C7, 306 // LATIN_SMALL_LETTER_C_WITH_CEDILLA 307 GDK_KEY_dead_acute, GDK_KEY_c, 0, 0, 0, 0x00E7, 308 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA 309 GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_C, 0, 0, 0x00C7, 310 // LATIN_SMALL_LETTER_C_WITH_CEDILLA 311 GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_c, 0, 0, 0x00E7, 312 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA 313 GDK_KEY_Multi_key, GDK_KEY_C, GDK_KEY_apostrophe, 0, 0, 0x00C7, 314 // LATIN_SMALL_LETTER_C_WITH_CEDILLA 315 GDK_KEY_Multi_key, GDK_KEY_c, GDK_KEY_apostrophe, 0, 0, 0x00E7, 316 }; 317 318 bool KeypressShouldBeIgnored(unsigned int keyval) { 319 switch(keyval) { 320 case GDK_KEY_Shift_L: 321 case GDK_KEY_Shift_R: 322 case GDK_KEY_Control_L: 323 case GDK_KEY_Control_R: 324 case GDK_KEY_Caps_Lock: 325 case GDK_KEY_Shift_Lock: 326 case GDK_KEY_Meta_L: 327 case GDK_KEY_Meta_R: 328 case GDK_KEY_Alt_L: 329 case GDK_KEY_Alt_R: 330 case GDK_KEY_Super_L: 331 case GDK_KEY_Super_R: 332 case GDK_KEY_Hyper_L: 333 case GDK_KEY_Hyper_R: 334 case GDK_KEY_Mode_switch: 335 case GDK_KEY_ISO_Level3_Shift: 336 return true; 337 default: 338 return false; 339 } 340 } 341 342 bool CheckCharacterComposeTable(const ComposeBufferType& sequence, 343 uint32* composed_character) { 344 // Check cedilla compose table. 345 const ComposeChecker kCedillaComposeChecker( 346 cedilla_compose_seqs, 4, arraysize(cedilla_compose_seqs)/(4 + 2)); 347 if (kCedillaComposeChecker.CheckSequence(sequence, composed_character)) 348 return true; 349 350 // Check main compose table. 351 const ComposeCheckerWithCompactTable kMainComposeChecker( 352 gtk_compose_seqs_compact, 5, 24, 6); 353 if (kMainComposeChecker.CheckSequence(sequence, composed_character)) 354 return true; 355 356 return false; 357 } 358 359 // Converts |character| to UTF16 string. 360 // Returns false when |character| is not a valid character. 361 bool UTF32CharacterToUTF16(uint32 character, string16* output) { 362 output->clear(); 363 // Reject invalid character. (e.g. codepoint greater than 0x10ffff) 364 if (!CBU_IS_UNICODE_CHAR(character)) 365 return false; 366 if (character) { 367 output->resize(CBU16_LENGTH(character)); 368 size_t i = 0; 369 CBU16_APPEND_UNSAFE(&(*output)[0], i, character); 370 } 371 return true; 372 } 373 374 // Converts a X keycode to a X keysym with no modifiers. 375 KeySym XKeyCodeToXKeySym(unsigned int keycode) { 376 XDisplay* display = gfx::GetXDisplay(); 377 if (!display) 378 return NoSymbol; 379 380 XKeyEvent x_key_event = {0}; 381 x_key_event.type = KeyPress; 382 x_key_event.display = display; 383 x_key_event.keycode = keycode; 384 return ::XLookupKeysym(&x_key_event, 0); 385 } 386 387 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keyval|. 388 // -1 is returned when |keyval| cannot be a hexadecimal digit. 389 int KeyvalToHexDigit(unsigned int keyval) { 390 if (GDK_KEY_0 <= keyval && keyval <= GDK_KEY_9) 391 return keyval - GDK_KEY_0; 392 if (GDK_KEY_a <= keyval && keyval <= GDK_KEY_f) 393 return keyval - GDK_KEY_a + 10; 394 if (GDK_KEY_A <= keyval && keyval <= GDK_KEY_F) 395 return keyval - GDK_KEY_A + 10; 396 return -1; // |keyval| cannot be a hexadecimal digit. 397 } 398 399 } // namespace 400 401 namespace ui { 402 403 CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {} 404 405 CharacterComposer::~CharacterComposer() {} 406 407 void CharacterComposer::Reset() { 408 compose_buffer_.clear(); 409 composed_character_.clear(); 410 preedit_string_.clear(); 411 composition_mode_ = KEY_SEQUENCE_MODE; 412 } 413 414 bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) { 415 if (!event.HasNativeEvent() || 416 (event.type() != ET_KEY_PRESSED && event.type() != ET_KEY_RELEASED)) 417 return false; 418 419 XEvent* xevent = event.native_event(); 420 DCHECK(xevent); 421 KeySym keysym = NoSymbol; 422 ::XLookupString(&xevent->xkey, NULL, 0, &keysym, NULL); 423 424 return FilterKeyPressInternal(keysym, xevent->xkey.keycode, event.flags()); 425 } 426 427 428 bool CharacterComposer::FilterKeyPressInternal(unsigned int keyval, 429 unsigned int keycode, 430 int flags) { 431 composed_character_.clear(); 432 preedit_string_.clear(); 433 434 // We don't care about modifier key presses. 435 if(KeypressShouldBeIgnored(keyval)) 436 return false; 437 438 // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE. 439 // We don't care about other modifiers like Alt. When CapsLock is down, we 440 // do nothing because what we receive is Ctrl+Shift+u (not U). 441 if (keyval == GDK_KEY_U && (flags & EF_SHIFT_DOWN) && 442 (flags & EF_CONTROL_DOWN)) { 443 if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) { 444 // There is no ongoing composition. Let's switch to HEX_MODE. 445 composition_mode_ = HEX_MODE; 446 UpdatePreeditStringHexMode(); 447 return true; 448 } 449 } 450 451 // Filter key press in an appropriate manner. 452 switch (composition_mode_) { 453 case KEY_SEQUENCE_MODE: 454 return FilterKeyPressSequenceMode(keyval, flags); 455 case HEX_MODE: 456 return FilterKeyPressHexMode(keyval, keycode, flags); 457 default: 458 NOTREACHED(); 459 return false; 460 } 461 } 462 463 bool CharacterComposer::FilterKeyPressSequenceMode(unsigned int keyval, 464 int flags) { 465 DCHECK(composition_mode_ == KEY_SEQUENCE_MODE); 466 compose_buffer_.push_back(keyval); 467 468 if (compose_buffer_.size() == 2U) { 469 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kBlackListedDeadKeys); ++i) { 470 if (compose_buffer_[0] == kBlackListedDeadKeys[i].first_key && 471 compose_buffer_[1] == kBlackListedDeadKeys[i].second_key ) { 472 Reset(); 473 composed_character_.push_back(kBlackListedDeadKeys[i].output_char); 474 return kBlackListedDeadKeys[i].consume; 475 } 476 } 477 } 478 479 // Check compose table. 480 uint32 composed_character_utf32 = 0; 481 if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) { 482 // Key press is recognized as a part of composition. 483 if (composed_character_utf32 != 0) { 484 // We get a composed character. 485 compose_buffer_.clear(); 486 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_); 487 } 488 return true; 489 } 490 // Key press is not a part of composition. 491 compose_buffer_.pop_back(); // Remove the keypress added this time. 492 if (!compose_buffer_.empty()) { 493 compose_buffer_.clear(); 494 return true; 495 } 496 return false; 497 } 498 499 bool CharacterComposer::FilterKeyPressHexMode(unsigned int keyval, 500 unsigned int keycode, 501 int flags) { 502 DCHECK(composition_mode_ == HEX_MODE); 503 const size_t kMaxHexSequenceLength = 8; 504 int hex_digit = KeyvalToHexDigit(keyval); 505 if (hex_digit < 0) { 506 // With 101 keyboard, control + shift + 3 produces '#', but a user may 507 // have intended to type '3'. So, if a hexadecimal character was not found, 508 // suppose a user is holding shift key (and possibly control key, too) and 509 // try a character with modifier keys removed. 510 hex_digit = KeyvalToHexDigit(XKeyCodeToXKeySym(keycode)); 511 } 512 513 if (keyval == GDK_KEY_Escape) { 514 // Cancel composition when ESC is pressed. 515 Reset(); 516 } else if (keyval == GDK_KEY_Return || keyval == GDK_KEY_KP_Enter || 517 keyval == GDK_KEY_ISO_Enter || 518 keyval == GDK_KEY_space || keyval == GDK_KEY_KP_Space) { 519 // Commit the composed character when Enter or space is pressed. 520 CommitHex(); 521 } else if (keyval == GDK_KEY_BackSpace) { 522 // Pop back the buffer when Backspace is pressed. 523 if (!compose_buffer_.empty()) { 524 compose_buffer_.pop_back(); 525 } else { 526 // If there is no character in |compose_buffer_|, cancel composition. 527 Reset(); 528 } 529 } else if (hex_digit >= 0 && 530 compose_buffer_.size() < kMaxHexSequenceLength) { 531 // Add the key to the buffer if it is a hex digit. 532 compose_buffer_.push_back(hex_digit); 533 } 534 535 UpdatePreeditStringHexMode(); 536 537 return true; 538 } 539 540 void CharacterComposer::CommitHex() { 541 DCHECK(composition_mode_ == HEX_MODE); 542 uint32 composed_character_utf32 = 0; 543 for (size_t i = 0; i != compose_buffer_.size(); ++i) { 544 const uint32 digit = compose_buffer_[i]; 545 DCHECK(0 <= digit && digit < 16); 546 composed_character_utf32 <<= 4; 547 composed_character_utf32 |= digit; 548 } 549 Reset(); 550 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_); 551 } 552 553 void CharacterComposer::UpdatePreeditStringHexMode() { 554 if (composition_mode_ != HEX_MODE) { 555 preedit_string_.clear(); 556 return; 557 } 558 std::string preedit_string_ascii("u"); 559 for (size_t i = 0; i != compose_buffer_.size(); ++i) { 560 const int digit = compose_buffer_[i]; 561 DCHECK(0 <= digit && digit < 16); 562 preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10)); 563 } 564 preedit_string_ = ASCIIToUTF16(preedit_string_ascii); 565 } 566 567 } // namespace ui 568