Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2003, 2006, 2008, 2009, 2010, 2011 Apple Inc. All rights reserved.
      3  * Copyright (C) 2008 Holger Hans Peter Freyther
      4  * Copyright (C) Research In Motion Limited 2011. All rights reserved.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #include "config.h"
     24 #include "platform/text/SurrogatePairAwareTextIterator.h"
     25 
     26 #include <unicode/unorm.h>
     27 
     28 using namespace WTF;
     29 using namespace Unicode;
     30 
     31 namespace blink {
     32 
     33 SurrogatePairAwareTextIterator::SurrogatePairAwareTextIterator(const UChar* characters, int currentCharacter, int lastCharacter, int endCharacter)
     34     : m_characters(characters)
     35     , m_currentCharacter(currentCharacter)
     36     , m_lastCharacter(lastCharacter)
     37     , m_endCharacter(endCharacter)
     38 {
     39 }
     40 
     41 bool SurrogatePairAwareTextIterator::consumeSlowCase(UChar32& character, unsigned& clusterLength)
     42 {
     43     if (character <= 0x30FE) {
     44         // Deal with Hiragana and Katakana voiced and semi-voiced syllables.
     45         // Normalize into composed form, and then look for glyph with base + combined mark.
     46         // Check above for character range to minimize performance impact.
     47         if (UChar32 normalized = normalizeVoicingMarks()) {
     48             character = normalized;
     49             clusterLength = 2;
     50         }
     51         return true;
     52     }
     53 
     54     if (!U16_IS_SURROGATE(character))
     55         return true;
     56 
     57     // If we have a surrogate pair, make sure it starts with the high part.
     58     if (!U16_IS_SURROGATE_LEAD(character))
     59         return false;
     60 
     61     // Do we have a surrogate pair? If so, determine the full Unicode (32 bit) code point before glyph lookup.
     62     // Make sure we have another character and it's a low surrogate.
     63     if (m_currentCharacter + 1 >= m_endCharacter)
     64         return false;
     65 
     66     UChar low = m_characters[1];
     67     if (!U16_IS_TRAIL(low))
     68         return false;
     69 
     70     character = U16_GET_SUPPLEMENTARY(character, low);
     71     clusterLength = 2;
     72     return true;
     73 }
     74 
     75 UChar32 SurrogatePairAwareTextIterator::normalizeVoicingMarks()
     76 {
     77     // According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values
     78     static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8;
     79 
     80     if (m_currentCharacter + 1 >= m_endCharacter)
     81         return 0;
     82 
     83     if (combiningClass(m_characters[1]) == hiraganaKatakanaVoicingMarksCombiningClass) {
     84         // Normalize into composed form using 3.2 rules.
     85         UChar normalizedCharacters[2] = { 0, 0 };
     86         UErrorCode uStatus = U_ZERO_ERROR;
     87         int32_t resultLength = unorm_normalize(m_characters, 2, UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus);
     88         if (resultLength == 1 && !uStatus)
     89             return normalizedCharacters[0];
     90     }
     91 
     92     return 0;
     93 }
     94 
     95 }
     96