1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "char_utils.h" 18 #include "binary_format.h" 19 #include "defines.h" 20 #include "digraph_utils.h" 21 22 namespace latinime { 23 24 const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] = 25 { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS 26 { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS 27 { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS 28 const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] = 29 { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE 30 { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE 31 const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = 32 { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES }; 33 34 /* static */ bool DigraphUtils::hasDigraphForCodePoint( 35 const int dictFlags, const int compositeGlyphCodePoint) { 36 const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags); 37 if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) { 38 return true; 39 } 40 return false; 41 } 42 43 // Returns the digraph type associated with the given dictionary. 44 /* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary( 45 const int dictFlags) { 46 if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) { 47 return DIGRAPH_TYPE_GERMAN_UMLAUT; 48 } 49 if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) { 50 return DIGRAPH_TYPE_FRENCH_LIGATURES; 51 } 52 return DIGRAPH_TYPE_NONE; 53 } 54 55 // Retrieves the set of all digraphs associated with the given dictionary flags. 56 // Returns the size of the digraph array, or 0 if none exist. 57 /* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize( 58 const int dictFlags, const DigraphUtils::digraph_t **const digraphs) { 59 const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags); 60 return getAllDigraphsForDigraphTypeAndReturnSize(digraphType, digraphs); 61 } 62 63 // Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index 64 // (which specifies the first or second codepoint in the digraph). 65 /* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint, 66 const DigraphCodePointIndex digraphCodePointIndex) { 67 if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) { 68 return NOT_A_CODE_POINT; 69 } 70 const DigraphUtils::digraph_t *const digraph = 71 DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint); 72 if (!digraph) { 73 return NOT_A_CODE_POINT; 74 } 75 if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) { 76 return digraph->first; 77 } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) { 78 return digraph->second; 79 } 80 ASSERT(false); 81 return NOT_A_CODE_POINT; 82 } 83 84 // Retrieves the set of all digraphs associated with the given digraph type. 85 // Returns the size of the digraph array, or 0 if none exist. 86 /* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize( 87 const DigraphUtils::DigraphType digraphType, 88 const DigraphUtils::digraph_t **const digraphs) { 89 if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) { 90 *digraphs = GERMAN_UMLAUT_DIGRAPHS; 91 return NELEMS(GERMAN_UMLAUT_DIGRAPHS); 92 } 93 if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) { 94 *digraphs = FRENCH_LIGATURES_DIGRAPHS; 95 return NELEMS(FRENCH_LIGATURES_DIGRAPHS); 96 } 97 return 0; 98 } 99 100 /** 101 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists. 102 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint. 103 */ 104 /* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint( 105 const int compositeGlyphCodePoint) { 106 for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) { 107 const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint( 108 USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint); 109 if (digraph) { 110 return digraph; 111 } 112 } 113 return 0; 114 } 115 116 /** 117 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists. 118 * digraphType: the type of digraphs supported. 119 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint. 120 */ 121 /* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint( 122 const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) { 123 const DigraphUtils::digraph_t *digraphs = 0; 124 const int compositeGlyphLowerCodePoint = toLowerCase(compositeGlyphCodePoint); 125 const int digraphsSize = 126 DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs); 127 for (int i = 0; i < digraphsSize; i++) { 128 if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) { 129 return &digraphs[i]; 130 } 131 } 132 return 0; 133 } 134 135 } // namespace latinime 136