Home | History | Annotate | Download | only in chromeos
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "ui/base/ime/chromeos/character_composer.h"
      6 
      7 #include <algorithm>
      8 #include <iterator>
      9 
     10 #include "base/strings/utf_string_conversions.h"
     11 #include "base/third_party/icu/icu_utf.h"
     12 // Note for Gtk removal: gdkkeysyms.h only contains a set of
     13 // '#define GDK_KeyName 0xNNNN' macros and does not #include any Gtk headers.
     14 #include "third_party/gtk+/gdk/gdkkeysyms.h"
     15 
     16 #include "ui/base/glib/glib_integers.h"
     17 #include "ui/events/event.h"
     18 #include "ui/events/keycodes/keyboard_codes.h"
     19 
     20 // Note for Gtk removal: gtkimcontextsimpleseqs.h does not #include any Gtk
     21 // headers and only contains one big guint16 array |gtk_compose_seqs_compact|
     22 // which defines the main compose table. The table has internal linkage.
     23 // The order of header inclusion is out of order because
     24 // gtkimcontextsimpleseqs.h depends on guint16, which is defined in
     25 // "ui/base/glib/glib_integers.h".
     26 #include "third_party/gtk+/gtk/gtkimcontextsimpleseqs.h"
     27 
     28 namespace {
     29 
     30 // A black list for not composing dead keys. Once the key combination is listed
     31 // below, the dead key won't work even when this is listed in
     32 // gtkimcontextsimpleseqs.h. This only supports two keyevent sequenses.
     33 // TODO(nona): Remove this hack.
     34 const struct BlackListedDeadKey {
     35   uint32 first_key;  // target first key event.
     36   uint32 second_key;  // target second key event.
     37   uint32 output_char;  // the character to be inserted if the filter is matched.
     38   bool consume;  // true if the original key event will be consumed.
     39 } kBlackListedDeadKeys[] = {
     40   { GDK_KEY_dead_acute, GDK_KEY_m, GDK_KEY_apostrophe, false },
     41   { GDK_KEY_dead_acute, GDK_KEY_s, GDK_KEY_apostrophe, false },
     42   { GDK_KEY_dead_acute, GDK_KEY_t, GDK_KEY_apostrophe, false },
     43   { GDK_KEY_dead_acute, GDK_KEY_v, GDK_KEY_apostrophe, false },
     44   { GDK_KEY_dead_acute, GDK_KEY_dead_acute, GDK_KEY_apostrophe, true },
     45 };
     46 
     47 typedef std::vector<unsigned int> ComposeBufferType;
     48 
     49 // An iterator class to apply std::lower_bound for composition table.
     50 class SequenceIterator
     51     : public std::iterator<std::random_access_iterator_tag, const uint16*> {
     52  public:
     53   SequenceIterator() : ptr_(NULL), stride_(0) {}
     54   SequenceIterator(const uint16* ptr, int stride)
     55       : ptr_(ptr), stride_(stride) {}
     56 
     57   const uint16* ptr() const {return ptr_;}
     58   int stride() const {return stride_;}
     59 
     60   SequenceIterator& operator++() {
     61     ptr_ += stride_;
     62     return *this;
     63   }
     64   SequenceIterator& operator+=(int n) {
     65     ptr_ += stride_*n;
     66     return *this;
     67   }
     68 
     69   const uint16* operator*() const {return ptr_;}
     70 
     71  private:
     72   const uint16* ptr_;
     73   int stride_;
     74 };
     75 
     76 inline SequenceIterator operator+(const SequenceIterator& l, int r) {
     77   return SequenceIterator(l) += r;
     78 }
     79 
     80 inline int operator-(const SequenceIterator& l, const SequenceIterator& r) {
     81   const int d = l.ptr() - r.ptr();
     82   DCHECK(l.stride() == r.stride() && l.stride() > 0 && d%l.stride() == 0);
     83   return d/l.stride();
     84 }
     85 
     86 inline bool operator==(const SequenceIterator& l, const SequenceIterator& r) {
     87   DCHECK(l.stride() == r.stride());
     88   return l.ptr() == r.ptr();
     89 }
     90 
     91 inline bool operator!=(const SequenceIterator& l, const SequenceIterator& r) {
     92   return !(l == r);
     93 }
     94 
     95 // A function to compare key value.
     96 inline int CompareSequenceValue(unsigned int l, unsigned int r) {
     97   return (l > r) ? 1 : ((l < r) ? -1 : 0);
     98 }
     99 
    100 // A template to make |CompareFunc| work like operator<.
    101 // |CompareFunc| is required to implement a member function,
    102 // int operator()(const ComposeBufferType& l, const uint16* r) const.
    103 template<typename CompareFunc>
    104 struct ComparatorAdoptor {
    105   bool operator()(const ComposeBufferType& l, const uint16* r) const {
    106     return CompareFunc()(l, r) == -1;
    107   }
    108   bool operator()(const uint16* l, const ComposeBufferType& r) const {
    109     return CompareFunc()(r, l) == 1;
    110   }
    111 };
    112 
    113 class ComposeChecker {
    114  public:
    115   // This class does not take the ownership of |data|, |data| should be alive
    116   // for the lifetime of the object.
    117   // |data| is a pointer to the head of an array of
    118   // length (|max_sequence_length| + 2)*|n_sequences|.
    119   // Every (|max_sequence_length| + 2) elements of |data| represent an entry.
    120   // First |max_sequence_length| elements of an entry is the sequecne which
    121   // composes the character represented by the last two elements of the entry.
    122   ComposeChecker(const uint16* data, int max_sequence_length, int n_sequences);
    123   bool CheckSequence(const ComposeBufferType& sequence,
    124                      uint32* composed_character) const;
    125 
    126  private:
    127   struct CompareSequence {
    128     int operator()(const ComposeBufferType& l, const uint16* r) const;
    129   };
    130 
    131   // This class does not take the ownership of |data_|,
    132   // the dtor does not delete |data_|.
    133   const uint16* data_;
    134   int max_sequence_length_;
    135   int n_sequences_;
    136   int row_stride_;
    137 
    138   DISALLOW_COPY_AND_ASSIGN(ComposeChecker);
    139 };
    140 
    141 ComposeChecker::ComposeChecker(const uint16* data,
    142                                int max_sequence_length,
    143                                int n_sequences)
    144     : data_(data),
    145       max_sequence_length_(max_sequence_length),
    146       n_sequences_(n_sequences),
    147       row_stride_(max_sequence_length + 2) {
    148 }
    149 
    150 bool ComposeChecker::CheckSequence(const ComposeBufferType& sequence,
    151                                    uint32* composed_character) const {
    152   const int sequence_length = sequence.size();
    153   if (sequence_length > max_sequence_length_)
    154     return false;
    155   // Find sequence in the table.
    156   const SequenceIterator begin(data_, row_stride_);
    157   const SequenceIterator end = begin + n_sequences_;
    158   const SequenceIterator found = std::lower_bound(
    159       begin, end, sequence, ComparatorAdoptor<CompareSequence>());
    160   if (found == end || CompareSequence()(sequence, *found) != 0)
    161     return false;
    162 
    163   if (sequence_length == max_sequence_length_ ||
    164       (*found)[sequence_length] == 0) {
    165     // |found| is not partially matching. It's fully matching.
    166     if (found + 1 == end ||
    167         CompareSequence()(sequence, *(found + 1)) != 0) {
    168       // There is no composition longer than |found| which matches to
    169       // |sequence|.
    170       const uint32 value = ((*found)[max_sequence_length_] << 16) |
    171           (*found)[max_sequence_length_ + 1];
    172       *composed_character = value;
    173     }
    174   }
    175   return true;
    176 }
    177 
    178 int ComposeChecker::CompareSequence::operator()(const ComposeBufferType& l,
    179                                                 const uint16* r) const {
    180   for(size_t i = 0; i < l.size(); ++i) {
    181     const int compare_result = CompareSequenceValue(l[i], r[i]);
    182     if(compare_result)
    183       return compare_result;
    184   }
    185   return 0;
    186 }
    187 
    188 
    189 class ComposeCheckerWithCompactTable {
    190  public:
    191   // This class does not take the ownership of |data|, |data| should be alive
    192   // for the lifetime of the object.
    193   // First |index_size|*|index_stride| elements of |data| are an index table.
    194   // Every |index_stride| elements of an index table are an index entry.
    195   // If you are checking with a sequence of length N beginning with character C,
    196   // you have to find an index entry whose first element is C, then get the N-th
    197   // element of the index entry as the index.
    198   // The index is pointing the element of |data| where the composition table for
    199   // sequences of length N beginning with C is placed.
    200 
    201   ComposeCheckerWithCompactTable(const uint16* data,
    202                                  int max_sequence_length,
    203                                  int index_size,
    204                                  int index_stride);
    205   bool CheckSequence(const ComposeBufferType& sequence,
    206                      uint32* composed_character) const;
    207 
    208  private:
    209   struct CompareSequenceFront {
    210     int operator()(const ComposeBufferType& l, const uint16* r) const;
    211   };
    212   struct CompareSequenceSkipFront {
    213     int operator()(const ComposeBufferType& l, const uint16* r) const;
    214   };
    215 
    216   // This class does not take the ownership of |data_|,
    217   // the dtor does not delete |data_|.
    218   const uint16* data_;
    219   int max_sequence_length_;
    220   int index_size_;
    221   int index_stride_;
    222 };
    223 
    224 ComposeCheckerWithCompactTable::ComposeCheckerWithCompactTable(
    225     const uint16* data,
    226     int max_sequence_length,
    227     int index_size,
    228     int index_stride)
    229     : data_(data),
    230       max_sequence_length_(max_sequence_length),
    231       index_size_(index_size),
    232       index_stride_(index_stride) {
    233 }
    234 
    235 bool ComposeCheckerWithCompactTable::CheckSequence(
    236     const ComposeBufferType& sequence,
    237     uint32* composed_character) const {
    238   const int compose_length = sequence.size();
    239   if (compose_length > max_sequence_length_)
    240     return false;
    241   // Find corresponding index for the first keypress.
    242   const SequenceIterator index_begin(data_, index_stride_);
    243   const SequenceIterator index_end = index_begin + index_size_;
    244   const SequenceIterator index =
    245       std::lower_bound(index_begin, index_end, sequence,
    246                        ComparatorAdoptor<CompareSequenceFront>());
    247   if (index == index_end || CompareSequenceFront()(sequence, *index) != 0)
    248     return false;
    249   if (compose_length == 1)
    250     return true;
    251   // Check for composition sequences.
    252   for (int length = compose_length - 1; length < max_sequence_length_;
    253        ++length) {
    254     const uint16* table = data_ + (*index)[length];
    255     const uint16* table_next = data_ + (*index)[length + 1];
    256     if (table_next > table) {
    257       // There are composition sequences for this |length|.
    258       const int row_stride = length + 1;
    259       const int n_sequences = (table_next - table)/row_stride;
    260       const SequenceIterator table_begin(table, row_stride);
    261       const SequenceIterator table_end = table_begin + n_sequences;
    262       const SequenceIterator found =
    263           std::lower_bound(table_begin, table_end, sequence,
    264                            ComparatorAdoptor<CompareSequenceSkipFront>());
    265       if (found != table_end &&
    266           CompareSequenceSkipFront()(sequence, *found) == 0) {
    267         if (length == compose_length - 1)  // Exact match.
    268           *composed_character = (*found)[length];
    269         return true;
    270       }
    271     }
    272   }
    273   return false;
    274 }
    275 
    276 int ComposeCheckerWithCompactTable::CompareSequenceFront::operator()(
    277     const ComposeBufferType& l, const uint16* r) const {
    278   return CompareSequenceValue(l[0], r[0]);
    279 }
    280 
    281 int ComposeCheckerWithCompactTable::CompareSequenceSkipFront::operator()(
    282     const ComposeBufferType& l, const uint16* r) const {
    283   for(size_t i = 1; i < l.size(); ++i) {
    284     const int compare_result = CompareSequenceValue(l[i], r[i - 1]);
    285     if(compare_result)
    286       return compare_result;
    287   }
    288   return 0;
    289 }
    290 
    291 
    292 // Additional table.
    293 
    294 // The difference between this and the default input method is the handling
    295 // of C+acute - this method produces C WITH CEDILLA rather than C WITH ACUTE.
    296 // For languages that use CCedilla and not acute, this is the preferred mapping,
    297 // and is particularly important for pt_BR, where the us-intl keyboard is
    298 // used extensively.
    299 
    300 const uint16 cedilla_compose_seqs[] = {
    301   // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
    302   GDK_KEY_dead_acute, GDK_KEY_C, 0, 0, 0, 0x00C7,
    303   // LATIN_SMALL_LETTER_C_WITH_CEDILLA
    304   GDK_KEY_dead_acute, GDK_KEY_c, 0, 0, 0, 0x00E7,
    305   // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
    306   GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_C, 0, 0, 0x00C7,
    307   // LATIN_SMALL_LETTER_C_WITH_CEDILLA
    308   GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_c, 0, 0, 0x00E7,
    309   // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
    310   GDK_KEY_Multi_key, GDK_KEY_C, GDK_KEY_apostrophe, 0, 0, 0x00C7,
    311   // LATIN_SMALL_LETTER_C_WITH_CEDILLA
    312   GDK_KEY_Multi_key, GDK_KEY_c, GDK_KEY_apostrophe, 0, 0, 0x00E7,
    313 };
    314 
    315 bool KeypressShouldBeIgnored(unsigned int keyval) {
    316   switch(keyval) {
    317     case GDK_KEY_Shift_L:
    318     case GDK_KEY_Shift_R:
    319     case GDK_KEY_Control_L:
    320     case GDK_KEY_Control_R:
    321     case GDK_KEY_Caps_Lock:
    322     case GDK_KEY_Shift_Lock:
    323     case GDK_KEY_Meta_L:
    324     case GDK_KEY_Meta_R:
    325     case GDK_KEY_Alt_L:
    326     case GDK_KEY_Alt_R:
    327     case GDK_KEY_Super_L:
    328     case GDK_KEY_Super_R:
    329     case GDK_KEY_Hyper_L:
    330     case GDK_KEY_Hyper_R:
    331     case GDK_KEY_Mode_switch:
    332     case GDK_KEY_ISO_Level3_Shift:
    333       return true;
    334     default:
    335       return false;
    336   }
    337 }
    338 
    339 bool CheckCharacterComposeTable(const ComposeBufferType& sequence,
    340                                 uint32* composed_character) {
    341   // Check cedilla compose table.
    342   const ComposeChecker kCedillaComposeChecker(
    343       cedilla_compose_seqs, 4, arraysize(cedilla_compose_seqs)/(4 + 2));
    344   if (kCedillaComposeChecker.CheckSequence(sequence, composed_character))
    345     return true;
    346 
    347   // Check main compose table.
    348   const ComposeCheckerWithCompactTable kMainComposeChecker(
    349       gtk_compose_seqs_compact, 5, 24, 6);
    350   if (kMainComposeChecker.CheckSequence(sequence, composed_character))
    351     return true;
    352 
    353   return false;
    354 }
    355 
    356 // Converts |character| to UTF16 string.
    357 // Returns false when |character| is not a valid character.
    358 bool UTF32CharacterToUTF16(uint32 character, base::string16* output) {
    359   output->clear();
    360   // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
    361   if (!CBU_IS_UNICODE_CHAR(character))
    362     return false;
    363   if (character) {
    364     output->resize(CBU16_LENGTH(character));
    365     size_t i = 0;
    366     CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
    367   }
    368   return true;
    369 }
    370 
    371 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keyval|.
    372 // -1 is returned when |keyval| cannot be a hexadecimal digit.
    373 int KeyvalToHexDigit(unsigned int keyval) {
    374   if (GDK_KEY_0 <= keyval && keyval <= GDK_KEY_9)
    375     return keyval - GDK_KEY_0;
    376   if (GDK_KEY_a <= keyval && keyval <= GDK_KEY_f)
    377     return keyval - GDK_KEY_a + 10;
    378   if (GDK_KEY_A <= keyval && keyval <= GDK_KEY_F)
    379     return keyval - GDK_KEY_A + 10;
    380   return -1;  // |keyval| cannot be a hexadecimal digit.
    381 }
    382 
    383 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keycode|.
    384 // -1 is returned when |keycode| cannot be a hexadecimal digit.
    385 int KeycodeToHexDigit(unsigned int keycode) {
    386   if (ui::VKEY_0 <= keycode && keycode <= ui::VKEY_9)
    387     return keycode - ui::VKEY_0;
    388   if (ui::VKEY_A <= keycode && keycode <= ui::VKEY_F)
    389     return keycode - ui::VKEY_A + 10;
    390   return -1;  // |keycode| cannot be a hexadecimal digit.
    391 }
    392 
    393 }  // namespace
    394 
    395 namespace ui {
    396 
    397 CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {}
    398 
    399 CharacterComposer::~CharacterComposer() {}
    400 
    401 void CharacterComposer::Reset() {
    402   compose_buffer_.clear();
    403   composed_character_.clear();
    404   preedit_string_.clear();
    405   composition_mode_ = KEY_SEQUENCE_MODE;
    406 }
    407 
    408 bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) {
    409   uint32 keyval = event.platform_keycode();
    410   if (!keyval ||
    411       (event.type() != ET_KEY_PRESSED && event.type() != ET_KEY_RELEASED))
    412     return false;
    413 
    414   return FilterKeyPressInternal(keyval, event.key_code(), event.flags());
    415 }
    416 
    417 
    418 bool CharacterComposer::FilterKeyPressInternal(unsigned int keyval,
    419                                                unsigned int keycode,
    420                                                int flags) {
    421   composed_character_.clear();
    422   preedit_string_.clear();
    423 
    424   // We don't care about modifier key presses.
    425   if(KeypressShouldBeIgnored(keyval))
    426     return false;
    427 
    428   // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
    429   // We don't care about other modifiers like Alt.  When CapsLock is down, we
    430   // do nothing because what we receive is Ctrl+Shift+u (not U).
    431   if (keyval == GDK_KEY_U && (flags & EF_SHIFT_DOWN) &&
    432       (flags & EF_CONTROL_DOWN)) {
    433     if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
    434       // There is no ongoing composition.  Let's switch to HEX_MODE.
    435       composition_mode_ = HEX_MODE;
    436       UpdatePreeditStringHexMode();
    437       return true;
    438     }
    439   }
    440 
    441   // Filter key press in an appropriate manner.
    442   switch (composition_mode_) {
    443     case KEY_SEQUENCE_MODE:
    444       return FilterKeyPressSequenceMode(keyval, flags);
    445     case HEX_MODE:
    446       return FilterKeyPressHexMode(keyval, keycode, flags);
    447     default:
    448       NOTREACHED();
    449       return false;
    450   }
    451 }
    452 
    453 bool CharacterComposer::FilterKeyPressSequenceMode(unsigned int keyval,
    454                                                    int flags) {
    455   DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
    456   compose_buffer_.push_back(keyval);
    457 
    458   if (compose_buffer_.size() == 2U) {
    459     for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kBlackListedDeadKeys); ++i) {
    460       if (compose_buffer_[0] == kBlackListedDeadKeys[i].first_key &&
    461           compose_buffer_[1] == kBlackListedDeadKeys[i].second_key ) {
    462         Reset();
    463         composed_character_.push_back(kBlackListedDeadKeys[i].output_char);
    464         return kBlackListedDeadKeys[i].consume;
    465       }
    466     }
    467   }
    468 
    469   // Check compose table.
    470   uint32 composed_character_utf32 = 0;
    471   if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
    472     // Key press is recognized as a part of composition.
    473     if (composed_character_utf32 != 0) {
    474       // We get a composed character.
    475       compose_buffer_.clear();
    476       UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
    477     }
    478     return true;
    479   }
    480   // Key press is not a part of composition.
    481   compose_buffer_.pop_back();  // Remove the keypress added this time.
    482   if (!compose_buffer_.empty()) {
    483     compose_buffer_.clear();
    484     return true;
    485   }
    486   return false;
    487 }
    488 
    489 bool CharacterComposer::FilterKeyPressHexMode(unsigned int keyval,
    490                                               unsigned int keycode,
    491                                               int flags) {
    492   DCHECK(composition_mode_ == HEX_MODE);
    493   const size_t kMaxHexSequenceLength = 8;
    494   int hex_digit = KeyvalToHexDigit(keyval);
    495   if (hex_digit < 0) {
    496     // With 101 keyboard, control + shift + 3 produces '#', but a user may
    497     // have intended to type '3'.  So, if a hexadecimal character was not found,
    498     // suppose a user is holding shift key (and possibly control key, too) and
    499     // try a character with modifier keys removed.
    500     hex_digit = KeycodeToHexDigit(keycode);
    501   }
    502 
    503   if (keyval == GDK_KEY_Escape) {
    504     // Cancel composition when ESC is pressed.
    505     Reset();
    506   } else if (keyval == GDK_KEY_Return || keyval == GDK_KEY_KP_Enter ||
    507              keyval == GDK_KEY_ISO_Enter ||
    508              keyval == GDK_KEY_space || keyval == GDK_KEY_KP_Space) {
    509     // Commit the composed character when Enter or space is pressed.
    510     CommitHex();
    511   } else if (keyval == GDK_KEY_BackSpace) {
    512     // Pop back the buffer when Backspace is pressed.
    513     if (!compose_buffer_.empty()) {
    514       compose_buffer_.pop_back();
    515     } else {
    516       // If there is no character in |compose_buffer_|, cancel composition.
    517       Reset();
    518     }
    519   } else if (hex_digit >= 0 &&
    520              compose_buffer_.size() < kMaxHexSequenceLength) {
    521     // Add the key to the buffer if it is a hex digit.
    522     compose_buffer_.push_back(hex_digit);
    523   }
    524 
    525   UpdatePreeditStringHexMode();
    526 
    527   return true;
    528 }
    529 
    530 void CharacterComposer::CommitHex() {
    531   DCHECK(composition_mode_ == HEX_MODE);
    532   uint32 composed_character_utf32 = 0;
    533   for (size_t i = 0; i != compose_buffer_.size(); ++i) {
    534     const uint32 digit = compose_buffer_[i];
    535     DCHECK(0 <= digit && digit < 16);
    536     composed_character_utf32 <<= 4;
    537     composed_character_utf32 |= digit;
    538   }
    539   Reset();
    540   UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
    541 }
    542 
    543 void CharacterComposer::UpdatePreeditStringHexMode() {
    544   if (composition_mode_ != HEX_MODE) {
    545     preedit_string_.clear();
    546     return;
    547   }
    548   std::string preedit_string_ascii("u");
    549   for (size_t i = 0; i != compose_buffer_.size(); ++i) {
    550     const int digit = compose_buffer_[i];
    551     DCHECK(0 <= digit && digit < 16);
    552     preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10));
    553   }
    554   preedit_string_ = base::ASCIIToUTF16(preedit_string_ascii);
    555 }
    556 
    557 }  // namespace ui
    558