Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "text/Unicode.h"
     18 
     19 #include <algorithm>
     20 #include <array>
     21 
     22 #include "text/Utf8Iterator.h"
     23 
     24 using ::android::StringPiece;
     25 
     26 namespace aapt {
     27 namespace text {
     28 
     29 namespace {
     30 
     31 struct CharacterProperties {
     32   enum : uint32_t {
     33     kXidStart = 1 << 0,
     34     kXidContinue = 1 << 1,
     35   };
     36 
     37   char32_t first_char;
     38   char32_t last_char;
     39   uint32_t properties;
     40 };
     41 
     42 // Incude the generated data table.
     43 #include "text/Unicode_data.cpp"
     44 
     45 bool CompareCharacterProperties(const CharacterProperties& a, char32_t codepoint) {
     46   return a.last_char < codepoint;
     47 }
     48 
     49 uint32_t FindCharacterProperties(char32_t codepoint) {
     50   const auto iter_end = sCharacterProperties.end();
     51   const auto iter = std::lower_bound(sCharacterProperties.begin(), iter_end, codepoint,
     52                                      CompareCharacterProperties);
     53   if (iter != iter_end && codepoint >= iter->first_char) {
     54     return iter->properties;
     55   }
     56   return 0u;
     57 }
     58 
     59 }  // namespace
     60 
     61 bool IsXidStart(char32_t codepoint) {
     62   return FindCharacterProperties(codepoint) & CharacterProperties::kXidStart;
     63 }
     64 
     65 bool IsXidContinue(char32_t codepoint) {
     66   return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue;
     67 }
     68 
     69 // Hardcode the White_Space characters since they are few and the external/icu project doesn't
     70 // list them as data files to parse.
     71 // Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
     72 bool IsWhitespace(char32_t codepoint) {
     73   return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) ||
     74          (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) ||
     75          (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) ||
     76          (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) ||
     77          (codepoint == 0x3000);
     78 }
     79 
     80 bool IsJavaIdentifier(const StringPiece& str) {
     81   Utf8Iterator iter(str);
     82 
     83   // Check the first character.
     84   if (!iter.HasNext()) {
     85     return false;
     86   }
     87 
     88   const char32_t first_codepoint = iter.Next();
     89   if (!IsXidStart(first_codepoint) && first_codepoint != U'_' && first_codepoint != U'$') {
     90     return false;
     91   }
     92 
     93   while (iter.HasNext()) {
     94     const char32_t codepoint = iter.Next();
     95     if (!IsXidContinue(codepoint) && codepoint != U'$') {
     96       return false;
     97     }
     98   }
     99   return true;
    100 }
    101 
    102 bool IsValidResourceEntryName(const StringPiece& str) {
    103   Utf8Iterator iter(str);
    104 
    105   // Check the first character.
    106   if (!iter.HasNext()) {
    107     return false;
    108   }
    109 
    110   // Resources are allowed to start with '_'
    111   const char32_t first_codepoint = iter.Next();
    112   if (!IsXidStart(first_codepoint) && first_codepoint != U'_') {
    113     return false;
    114   }
    115 
    116   while (iter.HasNext()) {
    117     const char32_t codepoint = iter.Next();
    118     if (!IsXidContinue(codepoint) && codepoint != U'.' && codepoint != U'-') {
    119       return false;
    120     }
    121   }
    122   return true;
    123 }
    124 
    125 }  // namespace text
    126 }  // namespace aapt
    127