1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "text/Unicode.h" 18 19 #include <algorithm> 20 #include <array> 21 22 #include "text/Utf8Iterator.h" 23 24 using ::android::StringPiece; 25 26 namespace aapt { 27 namespace text { 28 29 namespace { 30 31 struct CharacterProperties { 32 enum : uint32_t { 33 kXidStart = 1 << 0, 34 kXidContinue = 1 << 1, 35 }; 36 37 char32_t first_char; 38 char32_t last_char; 39 uint32_t properties; 40 }; 41 42 // Incude the generated data table. 43 #include "text/Unicode_data.cpp" 44 45 bool CompareCharacterProperties(const CharacterProperties& a, char32_t codepoint) { 46 return a.last_char < codepoint; 47 } 48 49 uint32_t FindCharacterProperties(char32_t codepoint) { 50 const auto iter_end = sCharacterProperties.end(); 51 const auto iter = std::lower_bound(sCharacterProperties.begin(), iter_end, codepoint, 52 CompareCharacterProperties); 53 if (iter != iter_end && codepoint >= iter->first_char) { 54 return iter->properties; 55 } 56 return 0u; 57 } 58 59 } // namespace 60 61 bool IsXidStart(char32_t codepoint) { 62 return FindCharacterProperties(codepoint) & CharacterProperties::kXidStart; 63 } 64 65 bool IsXidContinue(char32_t codepoint) { 66 return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue; 67 } 68 69 // Hardcode the White_Space characters since they are few and the external/icu project doesn't 70 // list them as data files to parse. 71 // Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt 72 bool IsWhitespace(char32_t codepoint) { 73 return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) || 74 (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) || 75 (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) || 76 (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) || 77 (codepoint == 0x3000); 78 } 79 80 bool IsJavaIdentifier(const StringPiece& str) { 81 Utf8Iterator iter(str); 82 83 // Check the first character. 84 if (!iter.HasNext()) { 85 return false; 86 } 87 88 const char32_t first_codepoint = iter.Next(); 89 if (!IsXidStart(first_codepoint) && first_codepoint != U'_' && first_codepoint != U'$') { 90 return false; 91 } 92 93 while (iter.HasNext()) { 94 const char32_t codepoint = iter.Next(); 95 if (!IsXidContinue(codepoint) && codepoint != U'$') { 96 return false; 97 } 98 } 99 return true; 100 } 101 102 bool IsValidResourceEntryName(const StringPiece& str) { 103 Utf8Iterator iter(str); 104 105 // Check the first character. 106 if (!iter.HasNext()) { 107 return false; 108 } 109 110 // Resources are allowed to start with '_' 111 const char32_t first_codepoint = iter.Next(); 112 if (!IsXidStart(first_codepoint) && first_codepoint != U'_') { 113 return false; 114 } 115 116 while (iter.HasNext()) { 117 const char32_t codepoint = iter.Next(); 118 if (!IsXidContinue(codepoint) && codepoint != U'.' && codepoint != U'-') { 119 return false; 120 } 121 } 122 return true; 123 } 124 125 } // namespace text 126 } // namespace aapt 127