Home | History | Annotate | Download | only in chromeos
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "ui/base/ime/chromeos/character_composer.h"
      6 
      7 #include <X11/Xlib.h>
      8 #include <X11/Xutil.h>
      9 
     10 #include <algorithm>
     11 #include <iterator>
     12 
     13 #include "base/strings/utf_string_conversions.h"
     14 #include "base/third_party/icu/icu_utf.h"
     15 // Note for Gtk removal: gdkkeysyms.h only contains a set of
     16 // '#define GDK_KeyName 0xNNNN' macros and does not #include any Gtk headers.
     17 #include "third_party/gtk+/gdk/gdkkeysyms.h"
     18 #include "ui/base/glib/glib_integers.h"
     19 #include "ui/events/event.h"
     20 #include "ui/events/event_constants.h"
     21 #include "ui/gfx/x/x11_types.h"
     22 
     23 // Note for Gtk removal: gtkimcontextsimpleseqs.h does not #include any Gtk
     24 // headers and only contains one big guint16 array |gtk_compose_seqs_compact|
     25 // which defines the main compose table. The table has internal linkage.
     26 // The order of header inclusion is out of order because
     27 // gtkimcontextsimpleseqs.h depends on guint16, which is defined in
     28 // "ui/base/glib/glib_integers.h".
     29 #include "third_party/gtk+/gtk/gtkimcontextsimpleseqs.h"
     30 
     31 namespace {
     32 
     33 // A black list for not composing dead keys. Once the key combination is listed
     34 // below, the dead key won't work even when this is listed in
     35 // gtkimcontextsimpleseqs.h. This only supports two keyevent sequenses.
     36 // TODO(nona): Remove this hack.
     37 const struct BlackListedDeadKey {
     38   uint32 first_key;  // target first key event.
     39   uint32 second_key;  // target second key event.
     40   uint32 output_char;  // the character to be inserted if the filter is matched.
     41   bool consume;  // true if the original key event will be consumed.
     42 } kBlackListedDeadKeys[] = {
     43   { GDK_KEY_dead_acute, GDK_KEY_m, GDK_KEY_apostrophe, false },
     44   { GDK_KEY_dead_acute, GDK_KEY_s, GDK_KEY_apostrophe, false },
     45   { GDK_KEY_dead_acute, GDK_KEY_t, GDK_KEY_apostrophe, false },
     46   { GDK_KEY_dead_acute, GDK_KEY_v, GDK_KEY_apostrophe, false },
     47   { GDK_KEY_dead_acute, GDK_KEY_dead_acute, GDK_KEY_apostrophe, true },
     48 };
     49 
     50 typedef std::vector<unsigned int> ComposeBufferType;
     51 
     52 // An iterator class to apply std::lower_bound for composition table.
     53 class SequenceIterator
     54     : public std::iterator<std::random_access_iterator_tag, const uint16*> {
     55  public:
     56   SequenceIterator() : ptr_(NULL), stride_(0) {}
     57   SequenceIterator(const uint16* ptr, int stride)
     58       : ptr_(ptr), stride_(stride) {}
     59 
     60   const uint16* ptr() const {return ptr_;}
     61   int stride() const {return stride_;}
     62 
     63   SequenceIterator& operator++() {
     64     ptr_ += stride_;
     65     return *this;
     66   }
     67   SequenceIterator& operator+=(int n) {
     68     ptr_ += stride_*n;
     69     return *this;
     70   }
     71 
     72   const uint16* operator*() const {return ptr_;}
     73 
     74  private:
     75   const uint16* ptr_;
     76   int stride_;
     77 };
     78 
     79 inline SequenceIterator operator+(const SequenceIterator& l, int r) {
     80   return SequenceIterator(l) += r;
     81 }
     82 
     83 inline int operator-(const SequenceIterator& l, const SequenceIterator& r) {
     84   const int d = l.ptr() - r.ptr();
     85   DCHECK(l.stride() == r.stride() && l.stride() > 0 && d%l.stride() == 0);
     86   return d/l.stride();
     87 }
     88 
     89 inline bool operator==(const SequenceIterator& l, const SequenceIterator& r) {
     90   DCHECK(l.stride() == r.stride());
     91   return l.ptr() == r.ptr();
     92 }
     93 
     94 inline bool operator!=(const SequenceIterator& l, const SequenceIterator& r) {
     95   return !(l == r);
     96 }
     97 
     98 // A function to compare key value.
     99 inline int CompareSequenceValue(unsigned int l, unsigned int r) {
    100   return (l > r) ? 1 : ((l < r) ? -1 : 0);
    101 }
    102 
    103 // A template to make |CompareFunc| work like operator<.
    104 // |CompareFunc| is required to implement a member function,
    105 // int operator()(const ComposeBufferType& l, const uint16* r) const.
    106 template<typename CompareFunc>
    107 struct ComparatorAdoptor {
    108   bool operator()(const ComposeBufferType& l, const uint16* r) const {
    109     return CompareFunc()(l, r) == -1;
    110   }
    111   bool operator()(const uint16* l, const ComposeBufferType& r) const {
    112     return CompareFunc()(r, l) == 1;
    113   }
    114 };
    115 
    116 class ComposeChecker {
    117  public:
    118   // This class does not take the ownership of |data|, |data| should be alive
    119   // for the lifetime of the object.
    120   // |data| is a pointer to the head of an array of
    121   // length (|max_sequence_length| + 2)*|n_sequences|.
    122   // Every (|max_sequence_length| + 2) elements of |data| represent an entry.
    123   // First |max_sequence_length| elements of an entry is the sequecne which
    124   // composes the character represented by the last two elements of the entry.
    125   ComposeChecker(const uint16* data, int max_sequence_length, int n_sequences);
    126   bool CheckSequence(const ComposeBufferType& sequence,
    127                      uint32* composed_character) const;
    128 
    129  private:
    130   struct CompareSequence {
    131     int operator()(const ComposeBufferType& l, const uint16* r) const;
    132   };
    133 
    134   // This class does not take the ownership of |data_|,
    135   // the dtor does not delete |data_|.
    136   const uint16* data_;
    137   int max_sequence_length_;
    138   int n_sequences_;
    139   int row_stride_;
    140 
    141   DISALLOW_COPY_AND_ASSIGN(ComposeChecker);
    142 };
    143 
    144 ComposeChecker::ComposeChecker(const uint16* data,
    145                                int max_sequence_length,
    146                                int n_sequences)
    147     : data_(data),
    148       max_sequence_length_(max_sequence_length),
    149       n_sequences_(n_sequences),
    150       row_stride_(max_sequence_length + 2) {
    151 }
    152 
    153 bool ComposeChecker::CheckSequence(const ComposeBufferType& sequence,
    154                                    uint32* composed_character) const {
    155   const int sequence_length = sequence.size();
    156   if (sequence_length > max_sequence_length_)
    157     return false;
    158   // Find sequence in the table.
    159   const SequenceIterator begin(data_, row_stride_);
    160   const SequenceIterator end = begin + n_sequences_;
    161   const SequenceIterator found = std::lower_bound(
    162       begin, end, sequence, ComparatorAdoptor<CompareSequence>());
    163   if (found == end || CompareSequence()(sequence, *found) != 0)
    164     return false;
    165 
    166   if (sequence_length == max_sequence_length_ ||
    167       (*found)[sequence_length] == 0) {
    168     // |found| is not partially matching. It's fully matching.
    169     if (found + 1 == end ||
    170         CompareSequence()(sequence, *(found + 1)) != 0) {
    171       // There is no composition longer than |found| which matches to
    172       // |sequence|.
    173       const uint32 value = ((*found)[max_sequence_length_] << 16) |
    174           (*found)[max_sequence_length_ + 1];
    175       *composed_character = value;
    176     }
    177   }
    178   return true;
    179 }
    180 
    181 int ComposeChecker::CompareSequence::operator()(const ComposeBufferType& l,
    182                                                 const uint16* r) const {
    183   for(size_t i = 0; i < l.size(); ++i) {
    184     const int compare_result = CompareSequenceValue(l[i], r[i]);
    185     if(compare_result)
    186       return compare_result;
    187   }
    188   return 0;
    189 }
    190 
    191 
    192 class ComposeCheckerWithCompactTable {
    193  public:
    194   // This class does not take the ownership of |data|, |data| should be alive
    195   // for the lifetime of the object.
    196   // First |index_size|*|index_stride| elements of |data| are an index table.
    197   // Every |index_stride| elements of an index table are an index entry.
    198   // If you are checking with a sequence of length N beginning with character C,
    199   // you have to find an index entry whose first element is C, then get the N-th
    200   // element of the index entry as the index.
    201   // The index is pointing the element of |data| where the composition table for
    202   // sequences of length N beginning with C is placed.
    203 
    204   ComposeCheckerWithCompactTable(const uint16* data,
    205                                  int max_sequence_length,
    206                                  int index_size,
    207                                  int index_stride);
    208   bool CheckSequence(const ComposeBufferType& sequence,
    209                      uint32* composed_character) const;
    210 
    211  private:
    212   struct CompareSequenceFront {
    213     int operator()(const ComposeBufferType& l, const uint16* r) const;
    214   };
    215   struct CompareSequenceSkipFront {
    216     int operator()(const ComposeBufferType& l, const uint16* r) const;
    217   };
    218 
    219   // This class does not take the ownership of |data_|,
    220   // the dtor does not delete |data_|.
    221   const uint16* data_;
    222   int max_sequence_length_;
    223   int index_size_;
    224   int index_stride_;
    225 };
    226 
    227 ComposeCheckerWithCompactTable::ComposeCheckerWithCompactTable(
    228     const uint16* data,
    229     int max_sequence_length,
    230     int index_size,
    231     int index_stride)
    232     : data_(data),
    233       max_sequence_length_(max_sequence_length),
    234       index_size_(index_size),
    235       index_stride_(index_stride) {
    236 }
    237 
    238 bool ComposeCheckerWithCompactTable::CheckSequence(
    239     const ComposeBufferType& sequence,
    240     uint32* composed_character) const {
    241   const int compose_length = sequence.size();
    242   if (compose_length > max_sequence_length_)
    243     return false;
    244   // Find corresponding index for the first keypress.
    245   const SequenceIterator index_begin(data_, index_stride_);
    246   const SequenceIterator index_end = index_begin + index_size_;
    247   const SequenceIterator index =
    248       std::lower_bound(index_begin, index_end, sequence,
    249                        ComparatorAdoptor<CompareSequenceFront>());
    250   if (index == index_end || CompareSequenceFront()(sequence, *index) != 0)
    251     return false;
    252   if (compose_length == 1)
    253     return true;
    254   // Check for composition sequences.
    255   for (int length = compose_length - 1; length < max_sequence_length_;
    256        ++length) {
    257     const uint16* table = data_ + (*index)[length];
    258     const uint16* table_next = data_ + (*index)[length + 1];
    259     if (table_next > table) {
    260       // There are composition sequences for this |length|.
    261       const int row_stride = length + 1;
    262       const int n_sequences = (table_next - table)/row_stride;
    263       const SequenceIterator table_begin(table, row_stride);
    264       const SequenceIterator table_end = table_begin + n_sequences;
    265       const SequenceIterator found =
    266           std::lower_bound(table_begin, table_end, sequence,
    267                            ComparatorAdoptor<CompareSequenceSkipFront>());
    268       if (found != table_end &&
    269           CompareSequenceSkipFront()(sequence, *found) == 0) {
    270         if (length == compose_length - 1)  // Exact match.
    271           *composed_character = (*found)[length];
    272         return true;
    273       }
    274     }
    275   }
    276   return false;
    277 }
    278 
    279 int ComposeCheckerWithCompactTable::CompareSequenceFront::operator()(
    280     const ComposeBufferType& l, const uint16* r) const {
    281   return CompareSequenceValue(l[0], r[0]);
    282 }
    283 
    284 int ComposeCheckerWithCompactTable::CompareSequenceSkipFront::operator()(
    285     const ComposeBufferType& l, const uint16* r) const {
    286   for(size_t i = 1; i < l.size(); ++i) {
    287     const int compare_result = CompareSequenceValue(l[i], r[i - 1]);
    288     if(compare_result)
    289       return compare_result;
    290   }
    291   return 0;
    292 }
    293 
    294 
    295 // Additional table.
    296 
    297 // The difference between this and the default input method is the handling
    298 // of C+acute - this method produces C WITH CEDILLA rather than C WITH ACUTE.
    299 // For languages that use CCedilla and not acute, this is the preferred mapping,
    300 // and is particularly important for pt_BR, where the us-intl keyboard is
    301 // used extensively.
    302 
    303 const uint16 cedilla_compose_seqs[] = {
    304   // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
    305   GDK_KEY_dead_acute, GDK_KEY_C, 0, 0, 0, 0x00C7,
    306   // LATIN_SMALL_LETTER_C_WITH_CEDILLA
    307   GDK_KEY_dead_acute, GDK_KEY_c, 0, 0, 0, 0x00E7,
    308   // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
    309   GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_C, 0, 0, 0x00C7,
    310   // LATIN_SMALL_LETTER_C_WITH_CEDILLA
    311   GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_c, 0, 0, 0x00E7,
    312   // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
    313   GDK_KEY_Multi_key, GDK_KEY_C, GDK_KEY_apostrophe, 0, 0, 0x00C7,
    314   // LATIN_SMALL_LETTER_C_WITH_CEDILLA
    315   GDK_KEY_Multi_key, GDK_KEY_c, GDK_KEY_apostrophe, 0, 0, 0x00E7,
    316 };
    317 
    318 bool KeypressShouldBeIgnored(unsigned int keyval) {
    319   switch(keyval) {
    320     case GDK_KEY_Shift_L:
    321     case GDK_KEY_Shift_R:
    322     case GDK_KEY_Control_L:
    323     case GDK_KEY_Control_R:
    324     case GDK_KEY_Caps_Lock:
    325     case GDK_KEY_Shift_Lock:
    326     case GDK_KEY_Meta_L:
    327     case GDK_KEY_Meta_R:
    328     case GDK_KEY_Alt_L:
    329     case GDK_KEY_Alt_R:
    330     case GDK_KEY_Super_L:
    331     case GDK_KEY_Super_R:
    332     case GDK_KEY_Hyper_L:
    333     case GDK_KEY_Hyper_R:
    334     case GDK_KEY_Mode_switch:
    335     case GDK_KEY_ISO_Level3_Shift:
    336       return true;
    337     default:
    338       return false;
    339   }
    340 }
    341 
    342 bool CheckCharacterComposeTable(const ComposeBufferType& sequence,
    343                                 uint32* composed_character) {
    344   // Check cedilla compose table.
    345   const ComposeChecker kCedillaComposeChecker(
    346       cedilla_compose_seqs, 4, arraysize(cedilla_compose_seqs)/(4 + 2));
    347   if (kCedillaComposeChecker.CheckSequence(sequence, composed_character))
    348     return true;
    349 
    350   // Check main compose table.
    351   const ComposeCheckerWithCompactTable kMainComposeChecker(
    352       gtk_compose_seqs_compact, 5, 24, 6);
    353   if (kMainComposeChecker.CheckSequence(sequence, composed_character))
    354     return true;
    355 
    356   return false;
    357 }
    358 
    359 // Converts |character| to UTF16 string.
    360 // Returns false when |character| is not a valid character.
    361 bool UTF32CharacterToUTF16(uint32 character, string16* output) {
    362   output->clear();
    363   // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
    364   if (!CBU_IS_UNICODE_CHAR(character))
    365     return false;
    366   if (character) {
    367     output->resize(CBU16_LENGTH(character));
    368     size_t i = 0;
    369     CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
    370   }
    371   return true;
    372 }
    373 
    374 // Converts a X keycode to a X keysym with no modifiers.
    375 KeySym XKeyCodeToXKeySym(unsigned int keycode) {
    376   XDisplay* display = gfx::GetXDisplay();
    377   if (!display)
    378     return NoSymbol;
    379 
    380   XKeyEvent x_key_event = {0};
    381   x_key_event.type = KeyPress;
    382   x_key_event.display = display;
    383   x_key_event.keycode = keycode;
    384   return ::XLookupKeysym(&x_key_event, 0);
    385 }
    386 
    387 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keyval|.
    388 // -1 is returned when |keyval| cannot be a hexadecimal digit.
    389 int KeyvalToHexDigit(unsigned int keyval) {
    390   if (GDK_KEY_0 <= keyval && keyval <= GDK_KEY_9)
    391     return keyval - GDK_KEY_0;
    392   if (GDK_KEY_a <= keyval && keyval <= GDK_KEY_f)
    393     return keyval - GDK_KEY_a + 10;
    394   if (GDK_KEY_A <= keyval && keyval <= GDK_KEY_F)
    395     return keyval - GDK_KEY_A + 10;
    396   return -1;  // |keyval| cannot be a hexadecimal digit.
    397 }
    398 
    399 }  // namespace
    400 
    401 namespace ui {
    402 
    403 CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {}
    404 
    405 CharacterComposer::~CharacterComposer() {}
    406 
    407 void CharacterComposer::Reset() {
    408   compose_buffer_.clear();
    409   composed_character_.clear();
    410   preedit_string_.clear();
    411   composition_mode_ = KEY_SEQUENCE_MODE;
    412 }
    413 
    414 bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) {
    415   if (!event.HasNativeEvent() ||
    416       (event.type() != ET_KEY_PRESSED && event.type() != ET_KEY_RELEASED))
    417     return false;
    418 
    419   XEvent* xevent = event.native_event();
    420   DCHECK(xevent);
    421   KeySym keysym = NoSymbol;
    422   ::XLookupString(&xevent->xkey, NULL, 0, &keysym, NULL);
    423 
    424   return FilterKeyPressInternal(keysym, xevent->xkey.keycode, event.flags());
    425 }
    426 
    427 
    428 bool CharacterComposer::FilterKeyPressInternal(unsigned int keyval,
    429                                                unsigned int keycode,
    430                                                int flags) {
    431   composed_character_.clear();
    432   preedit_string_.clear();
    433 
    434   // We don't care about modifier key presses.
    435   if(KeypressShouldBeIgnored(keyval))
    436     return false;
    437 
    438   // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
    439   // We don't care about other modifiers like Alt.  When CapsLock is down, we
    440   // do nothing because what we receive is Ctrl+Shift+u (not U).
    441   if (keyval == GDK_KEY_U && (flags & EF_SHIFT_DOWN) &&
    442       (flags & EF_CONTROL_DOWN)) {
    443     if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
    444       // There is no ongoing composition.  Let's switch to HEX_MODE.
    445       composition_mode_ = HEX_MODE;
    446       UpdatePreeditStringHexMode();
    447       return true;
    448     }
    449   }
    450 
    451   // Filter key press in an appropriate manner.
    452   switch (composition_mode_) {
    453     case KEY_SEQUENCE_MODE:
    454       return FilterKeyPressSequenceMode(keyval, flags);
    455     case HEX_MODE:
    456       return FilterKeyPressHexMode(keyval, keycode, flags);
    457     default:
    458       NOTREACHED();
    459       return false;
    460   }
    461 }
    462 
    463 bool CharacterComposer::FilterKeyPressSequenceMode(unsigned int keyval,
    464                                                    int flags) {
    465   DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
    466   compose_buffer_.push_back(keyval);
    467 
    468   if (compose_buffer_.size() == 2U) {
    469     for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kBlackListedDeadKeys); ++i) {
    470       if (compose_buffer_[0] == kBlackListedDeadKeys[i].first_key &&
    471           compose_buffer_[1] == kBlackListedDeadKeys[i].second_key ) {
    472         Reset();
    473         composed_character_.push_back(kBlackListedDeadKeys[i].output_char);
    474         return kBlackListedDeadKeys[i].consume;
    475       }
    476     }
    477   }
    478 
    479   // Check compose table.
    480   uint32 composed_character_utf32 = 0;
    481   if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
    482     // Key press is recognized as a part of composition.
    483     if (composed_character_utf32 != 0) {
    484       // We get a composed character.
    485       compose_buffer_.clear();
    486       UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
    487     }
    488     return true;
    489   }
    490   // Key press is not a part of composition.
    491   compose_buffer_.pop_back();  // Remove the keypress added this time.
    492   if (!compose_buffer_.empty()) {
    493     compose_buffer_.clear();
    494     return true;
    495   }
    496   return false;
    497 }
    498 
    499 bool CharacterComposer::FilterKeyPressHexMode(unsigned int keyval,
    500                                               unsigned int keycode,
    501                                               int flags) {
    502   DCHECK(composition_mode_ == HEX_MODE);
    503   const size_t kMaxHexSequenceLength = 8;
    504   int hex_digit = KeyvalToHexDigit(keyval);
    505   if (hex_digit < 0) {
    506     // With 101 keyboard, control + shift + 3 produces '#', but a user may
    507     // have intended to type '3'.  So, if a hexadecimal character was not found,
    508     // suppose a user is holding shift key (and possibly control key, too) and
    509     // try a character with modifier keys removed.
    510     hex_digit = KeyvalToHexDigit(XKeyCodeToXKeySym(keycode));
    511   }
    512 
    513   if (keyval == GDK_KEY_Escape) {
    514     // Cancel composition when ESC is pressed.
    515     Reset();
    516   } else if (keyval == GDK_KEY_Return || keyval == GDK_KEY_KP_Enter ||
    517              keyval == GDK_KEY_ISO_Enter ||
    518              keyval == GDK_KEY_space || keyval == GDK_KEY_KP_Space) {
    519     // Commit the composed character when Enter or space is pressed.
    520     CommitHex();
    521   } else if (keyval == GDK_KEY_BackSpace) {
    522     // Pop back the buffer when Backspace is pressed.
    523     if (!compose_buffer_.empty()) {
    524       compose_buffer_.pop_back();
    525     } else {
    526       // If there is no character in |compose_buffer_|, cancel composition.
    527       Reset();
    528     }
    529   } else if (hex_digit >= 0 &&
    530              compose_buffer_.size() < kMaxHexSequenceLength) {
    531     // Add the key to the buffer if it is a hex digit.
    532     compose_buffer_.push_back(hex_digit);
    533   }
    534 
    535   UpdatePreeditStringHexMode();
    536 
    537   return true;
    538 }
    539 
    540 void CharacterComposer::CommitHex() {
    541   DCHECK(composition_mode_ == HEX_MODE);
    542   uint32 composed_character_utf32 = 0;
    543   for (size_t i = 0; i != compose_buffer_.size(); ++i) {
    544     const uint32 digit = compose_buffer_[i];
    545     DCHECK(0 <= digit && digit < 16);
    546     composed_character_utf32 <<= 4;
    547     composed_character_utf32 |= digit;
    548   }
    549   Reset();
    550   UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
    551 }
    552 
    553 void CharacterComposer::UpdatePreeditStringHexMode() {
    554   if (composition_mode_ != HEX_MODE) {
    555     preedit_string_.clear();
    556     return;
    557   }
    558   std::string preedit_string_ascii("u");
    559   for (size_t i = 0; i != compose_buffer_.size(); ++i) {
    560     const int digit = compose_buffer_[i];
    561     DCHECK(0 <= digit && digit < 16);
    562     preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10));
    563   }
    564   preedit_string_ = ASCIIToUTF16(preedit_string_ascii);
    565 }
    566 
    567 }  // namespace ui
    568