1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.lang; 19 20 import java.io.Serializable; 21 import java.io.UnsupportedEncodingException; 22 import java.nio.ByteBuffer; 23 import java.nio.CharBuffer; 24 import java.nio.charset.Charset; 25 import java.util.Arrays; 26 import java.util.Comparator; 27 import libcore.util.CharsetUtils; 28 import libcore.util.EmptyArray; 29 30 /** 31 * Class used to generate strings instead of calling String.<init>. 32 * 33 * @hide 34 */ 35 public final class StringFactory { 36 37 // TODO: Remove once native methods are in place. 38 private static final char REPLACEMENT_CHAR = (char) 0xfffd; 39 40 public static String newEmptyString() { 41 return newStringFromChars(EmptyArray.CHAR, 0, 0); 42 } 43 44 public static String newStringFromBytes(byte[] data) { 45 return newStringFromBytes(data, 0, data.length); 46 } 47 48 public static String newStringFromBytes(byte[] data, int high) { 49 return newStringFromBytes(data, high, 0, data.length); 50 } 51 52 public static String newStringFromBytes(byte[] data, int offset, int byteCount) { 53 return newStringFromBytes(data, offset, byteCount, Charset.defaultCharset()); 54 } 55 56 public static native String newStringFromBytes(byte[] data, int high, int offset, int byteCount); 57 58 public static String newStringFromBytes(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException { 59 return newStringFromBytes(data, offset, byteCount, Charset.forNameUEE(charsetName)); 60 } 61 62 public static String newStringFromBytes(byte[] data, String charsetName) throws UnsupportedEncodingException { 63 return newStringFromBytes(data, 0, data.length, Charset.forNameUEE(charsetName)); 64 } 65 66 // TODO: Implement this method natively. 67 public static String newStringFromBytes(byte[] data, int offset, int byteCount, Charset charset) { 68 if ((offset | byteCount) < 0 || byteCount > data.length - offset) { 69 throw new StringIndexOutOfBoundsException(data.length, offset, byteCount); 70 } 71 72 char[] value; 73 int length; 74 75 // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed. 76 String canonicalCharsetName = charset.name(); 77 if (canonicalCharsetName.equals("UTF-8")) { 78 byte[] d = data; 79 char[] v = new char[byteCount]; 80 81 int idx = offset; 82 int last = offset + byteCount; 83 int s = 0; 84 outer: 85 while (idx < last) { 86 byte b0 = d[idx++]; 87 if ((b0 & 0x80) == 0) { 88 // 0xxxxxxx 89 // Range: U-00000000 - U-0000007F 90 int val = b0 & 0xff; 91 v[s++] = (char) val; 92 } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) || 93 ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) { 94 int utfCount = 1; 95 if ((b0 & 0xf0) == 0xe0) utfCount = 2; 96 else if ((b0 & 0xf8) == 0xf0) utfCount = 3; 97 else if ((b0 & 0xfc) == 0xf8) utfCount = 4; 98 else if ((b0 & 0xfe) == 0xfc) utfCount = 5; 99 100 // 110xxxxx (10xxxxxx)+ 101 // Range: U-00000080 - U-000007FF (count == 1) 102 // Range: U-00000800 - U-0000FFFF (count == 2) 103 // Range: U-00010000 - U-001FFFFF (count == 3) 104 // Range: U-00200000 - U-03FFFFFF (count == 4) 105 // Range: U-04000000 - U-7FFFFFFF (count == 5) 106 107 if (idx + utfCount > last) { 108 v[s++] = REPLACEMENT_CHAR; 109 continue; 110 } 111 112 // Extract usable bits from b0 113 int val = b0 & (0x1f >> (utfCount - 1)); 114 for (int i = 0; i < utfCount; ++i) { 115 byte b = d[idx++]; 116 if ((b & 0xc0) != 0x80) { 117 v[s++] = REPLACEMENT_CHAR; 118 idx--; // Put the input char back 119 continue outer; 120 } 121 // Push new bits in from the right side 122 val <<= 6; 123 val |= b & 0x3f; 124 } 125 126 // Note: Java allows overlong char 127 // specifications To disallow, check that val 128 // is greater than or equal to the minimum 129 // value for each count: 130 // 131 // count min value 132 // ----- ---------- 133 // 1 0x80 134 // 2 0x800 135 // 3 0x10000 136 // 4 0x200000 137 // 5 0x4000000 138 139 // Allow surrogate values (0xD800 - 0xDFFF) to 140 // be specified using 3-byte UTF values only 141 if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) { 142 v[s++] = REPLACEMENT_CHAR; 143 continue; 144 } 145 146 // Reject chars greater than the Unicode maximum of U+10FFFF. 147 if (val > 0x10FFFF) { 148 v[s++] = REPLACEMENT_CHAR; 149 continue; 150 } 151 152 // Encode chars from U+10000 up as surrogate pairs 153 if (val < 0x10000) { 154 v[s++] = (char) val; 155 } else { 156 int x = val & 0xffff; 157 int u = (val >> 16) & 0x1f; 158 int w = (u - 1) & 0xffff; 159 int hi = 0xd800 | (w << 6) | (x >> 10); 160 int lo = 0xdc00 | (x & 0x3ff); 161 v[s++] = (char) hi; 162 v[s++] = (char) lo; 163 } 164 } else { 165 // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff 166 v[s++] = REPLACEMENT_CHAR; 167 } 168 } 169 170 if (s == byteCount) { 171 // We guessed right, so we can use our temporary array as-is. 172 value = v; 173 length = s; 174 } else { 175 // Our temporary array was too big, so reallocate and copy. 176 value = new char[s]; 177 length = s; 178 System.arraycopy(v, 0, value, 0, s); 179 } 180 } else if (canonicalCharsetName.equals("ISO-8859-1")) { 181 value = new char[byteCount]; 182 length = byteCount; 183 CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value); 184 } else if (canonicalCharsetName.equals("US-ASCII")) { 185 value = new char[byteCount]; 186 length = byteCount; 187 CharsetUtils.asciiBytesToChars(data, offset, byteCount, value); 188 } else { 189 CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount)); 190 length = cb.length(); 191 if (length > 0) { 192 // We could use cb.array() directly, but that would mean we'd have to trust 193 // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later, 194 // which would break String's immutability guarantee. It would also tend to 195 // mean that we'd be wasting memory because CharsetDecoder doesn't trim the 196 // array. So we copy. 197 value = new char[length]; 198 System.arraycopy(cb.array(), 0, value, 0, length); 199 } else { 200 value = EmptyArray.CHAR; 201 } 202 } 203 return newStringFromChars(value, 0, length); 204 } 205 206 public static String newStringFromBytes(byte[] data, Charset charset) { 207 return newStringFromBytes(data, 0, data.length, charset); 208 } 209 210 public static String newStringFromChars(char[] data) { 211 return newStringFromChars(data, 0, data.length); 212 } 213 214 public static String newStringFromChars(char[] data, int offset, int charCount) { 215 if ((offset | charCount) < 0 || charCount > data.length - offset) { 216 throw new StringIndexOutOfBoundsException(data.length, offset, charCount); 217 } 218 return newStringFromChars(offset, charCount, data); 219 } 220 221 // The char array passed as {@code java_data} must not be a null reference. 222 static native String newStringFromChars(int offset, int charCount, char[] data); 223 224 public static native String newStringFromString(String toCopy); 225 226 public static String newStringFromStringBuffer(StringBuffer stringBuffer) { 227 synchronized (stringBuffer) { 228 return newStringFromChars(stringBuffer.getValue(), 0, stringBuffer.length()); 229 } 230 } 231 232 // TODO: Implement this method natively. 233 public static String newStringFromCodePoints(int[] codePoints, int offset, int count) { 234 if (codePoints == null) { 235 throw new NullPointerException("codePoints == null"); 236 } 237 if ((offset | count) < 0 || count > codePoints.length - offset) { 238 throw new StringIndexOutOfBoundsException(codePoints.length, offset, count); 239 } 240 char[] value = new char[count * 2]; 241 int end = offset + count; 242 int length = 0; 243 for (int i = offset; i < end; i++) { 244 length += Character.toChars(codePoints[i], value, length); 245 } 246 return newStringFromChars(value, 0, length); 247 } 248 249 public static String newStringFromStringBuilder(StringBuilder stringBuilder) { 250 return newStringFromChars(stringBuilder.getValue(), 0, stringBuilder.length()); 251 } 252 } 253