Home | History | Annotate | Download | only in dictionary
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/core/dictionary/digraph_utils.h"
     18 
     19 #include <cstdlib>
     20 
     21 #include "defines.h"
     22 #include "dictionary/interface/dictionary_header_structure_policy.h"
     23 #include "utils/char_utils.h"
     24 
     25 namespace latinime {
     26 
     27 const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
     28         { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
     29         { 'o', 'e', 0x00F6 },   // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
     30         { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
     31 const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
     32         { DIGRAPH_TYPE_GERMAN_UMLAUT };
     33 
     34 /* static */ bool DigraphUtils::hasDigraphForCodePoint(
     35         const DictionaryHeaderStructurePolicy *const headerPolicy,
     36         const int compositeGlyphCodePoint) {
     37     const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(headerPolicy);
     38     if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
     39         return true;
     40     }
     41     return false;
     42 }
     43 
     44 // Returns the digraph type associated with the given dictionary.
     45 /* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
     46         const DictionaryHeaderStructurePolicy *const headerPolicy) {
     47     if (headerPolicy->requiresGermanUmlautProcessing()) {
     48         return DIGRAPH_TYPE_GERMAN_UMLAUT;
     49     }
     50     return DIGRAPH_TYPE_NONE;
     51 }
     52 
     53 // Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
     54 // (which specifies the first or second codepoint in the digraph).
     55 /* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
     56         const DigraphCodePointIndex digraphCodePointIndex) {
     57     if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
     58         return NOT_A_CODE_POINT;
     59     }
     60     const DigraphUtils::digraph_t *const digraph =
     61             DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint);
     62     if (!digraph) {
     63         return NOT_A_CODE_POINT;
     64     }
     65     if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) {
     66         return digraph->first;
     67     } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) {
     68         return digraph->second;
     69     }
     70     ASSERT(false);
     71     return NOT_A_CODE_POINT;
     72 }
     73 
     74 // Retrieves the set of all digraphs associated with the given digraph type.
     75 // Returns the size of the digraph array, or 0 if none exist.
     76 /* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(
     77         const DigraphUtils::DigraphType digraphType,
     78         const DigraphUtils::digraph_t **const digraphs) {
     79     if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) {
     80         *digraphs = GERMAN_UMLAUT_DIGRAPHS;
     81         return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
     82     }
     83     return 0;
     84 }
     85 
     86 /**
     87  * Returns the digraph for the input composite glyph codepoint, or nullptr if none exists.
     88  * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
     89  */
     90 /* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
     91         const int compositeGlyphCodePoint) {
     92     for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) {
     93         const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint(
     94                 USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint);
     95         if (digraph) {
     96             return digraph;
     97         }
     98     }
     99     return nullptr;
    100 }
    101 
    102 /**
    103  * Returns the digraph for the input composite glyph codepoint, or nullptr if none exists.
    104  * digraphType: the type of digraphs supported.
    105  * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
    106  */
    107 /* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
    108         const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
    109     const DigraphUtils::digraph_t *digraphs = nullptr;
    110     const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint);
    111     const int digraphsSize =
    112             DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(digraphType, &digraphs);
    113     for (int i = 0; i < digraphsSize; i++) {
    114         if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) {
    115             return &digraphs[i];
    116         }
    117     }
    118     return nullptr;
    119 }
    120 
    121 } // namespace latinime
    122