Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "utf.h"
     18 
     19 #include "base/logging.h"
     20 #include "mirror/array.h"
     21 #include "mirror/object-inl.h"
     22 
     23 namespace art {
     24 
     25 size_t CountModifiedUtf8Chars(const char* utf8) {
     26   size_t len = 0;
     27   int ic;
     28   while ((ic = *utf8++) != '\0') {
     29     len++;
     30     if ((ic & 0x80) == 0) {
     31       // one-byte encoding
     32       continue;
     33     }
     34     // two- or three-byte encoding
     35     utf8++;
     36     if ((ic & 0x20) == 0) {
     37       // two-byte encoding
     38       continue;
     39     }
     40     // three-byte encoding
     41     utf8++;
     42   }
     43   return len;
     44 }
     45 
     46 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) {
     47   while (*utf8_data_in != '\0') {
     48     *utf16_data_out++ = GetUtf16FromUtf8(&utf8_data_in);
     49   }
     50 }
     51 
     52 void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) {
     53   while (char_count--) {
     54     uint16_t ch = *utf16_in++;
     55     if (ch > 0 && ch <= 0x7f) {
     56       *utf8_out++ = ch;
     57     } else {
     58       if (ch > 0x07ff) {
     59         *utf8_out++ = (ch >> 12) | 0xe0;
     60         *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80;
     61         *utf8_out++ = (ch & 0x3f) | 0x80;
     62       } else /*(ch > 0x7f || ch == 0)*/ {
     63         *utf8_out++ = (ch >> 6) | 0xc0;
     64         *utf8_out++ = (ch & 0x3f) | 0x80;
     65       }
     66     }
     67   }
     68 }
     69 
     70 int32_t ComputeUtf16Hash(const mirror::CharArray* chars, int32_t offset,
     71                          size_t char_count) {
     72   int32_t hash = 0;
     73   for (size_t i = 0; i < char_count; i++) {
     74     hash = hash * 31 + chars->Get(offset + i);
     75   }
     76   return hash;
     77 }
     78 
     79 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count) {
     80   int32_t hash = 0;
     81   while (char_count--) {
     82     hash = hash * 31 + *chars++;
     83   }
     84   return hash;
     85 }
     86 
     87 
     88 uint16_t GetUtf16FromUtf8(const char** utf8_data_in) {
     89   uint8_t one = *(*utf8_data_in)++;
     90   if ((one & 0x80) == 0) {
     91     // one-byte encoding
     92     return one;
     93   }
     94   // two- or three-byte encoding
     95   uint8_t two = *(*utf8_data_in)++;
     96   if ((one & 0x20) == 0) {
     97     // two-byte encoding
     98     return ((one & 0x1f) << 6) | (two & 0x3f);
     99   }
    100   // three-byte encoding
    101   uint8_t three = *(*utf8_data_in)++;
    102   return ((one & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
    103 }
    104 
    105 int CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(const char* utf8_1, const char* utf8_2) {
    106   for (;;) {
    107     if (*utf8_1 == '\0') {
    108       return (*utf8_2 == '\0') ? 0 : -1;
    109     } else if (*utf8_2 == '\0') {
    110       return 1;
    111     }
    112 
    113     int c1 = GetUtf16FromUtf8(&utf8_1);
    114     int c2 = GetUtf16FromUtf8(&utf8_2);
    115 
    116     if (c1 != c2) {
    117       return c1 > c2 ? 1 : -1;
    118     }
    119   }
    120 }
    121 
    122 int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8_1, const uint16_t* utf8_2) {
    123   for (;;) {
    124     if (*utf8_1 == '\0') {
    125       return (*utf8_2 == '\0') ? 0 : -1;
    126     } else if (*utf8_2 == '\0') {
    127       return 1;
    128     }
    129 
    130     int c1 = GetUtf16FromUtf8(&utf8_1);
    131     int c2 = *utf8_2;
    132 
    133     if (c1 != c2) {
    134       return c1 > c2 ? 1 : -1;
    135     }
    136   }
    137 }
    138 
    139 size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
    140   size_t result = 0;
    141   while (char_count--) {
    142     uint16_t ch = *chars++;
    143     if (ch > 0 && ch <= 0x7f) {
    144       ++result;
    145     } else {
    146       if (ch > 0x7ff) {
    147         result += 3;
    148       } else {
    149         result += 2;
    150       }
    151     }
    152   }
    153   return result;
    154 }
    155 
    156 }  // namespace art
    157