Home | History | Annotate | Download | only in parsers
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package javax.xml.parsers;
     19 
     20 class FilePathToURI {
     21 
     22     // which ASCII characters need to be escaped
     23     private static boolean gNeedEscaping[] = new boolean[128];
     24     // the first hex character if a character needs to be escaped
     25     private static char[] gAfterEscaping1 = new char[128];
     26     // the second hex character if a character needs to be escaped
     27     private static char[] gAfterEscaping2 = new char[128];
     28     private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
     29                                      '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
     30     // initialize the above 3 arrays
     31     static {
     32         for (int i = 0; i <= 0x1f; i++) {
     33             gNeedEscaping[i] = true;
     34             gAfterEscaping1[i] = gHexChs[i >> 4];
     35             gAfterEscaping2[i] = gHexChs[i & 0xf];
     36         }
     37         gNeedEscaping[0x7f] = true;
     38         gAfterEscaping1[0x7f] = '7';
     39         gAfterEscaping2[0x7f] = 'F';
     40         char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}',
     41                          '|', '\\', '^', '~', '[', ']', '`'};
     42         int len = escChs.length;
     43         char ch;
     44         for (int i = 0; i < len; i++) {
     45             ch = escChs[i];
     46             gNeedEscaping[ch] = true;
     47             gAfterEscaping1[ch] = gHexChs[ch >> 4];
     48             gAfterEscaping2[ch] = gHexChs[ch & 0xf];
     49         }
     50     }
     51 
     52     // To escape a file path to a URI, by using %HH to represent
     53     // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%'
     54     // and '"' and non-ASCII characters (whose value >= 128).
     55     public static String filepath2URI(String path){
     56         // return null if path is null.
     57         if (path == null)
     58             return null;
     59 
     60         char separator = java.io.File.separatorChar;
     61         path = path.replace(separator, '/');
     62 
     63         int len = path.length(), ch;
     64         StringBuilder buffer = new StringBuilder(len*3);
     65         buffer.append("file://");
     66         // change C:/blah to /C:/blah
     67         if (len >= 2 && path.charAt(1) == ':') {
     68             ch = Character.toUpperCase(path.charAt(0));
     69             if (ch >= 'A' && ch <= 'Z') {
     70                 buffer.append('/');
     71             }
     72         }
     73 
     74         // for each character in the path
     75         int i = 0;
     76         for (; i < len; i++) {
     77             ch = path.charAt(i);
     78             // if it's not an ASCII character, break here, and use UTF-8 encoding
     79             if (ch >= 128)
     80                 break;
     81             if (gNeedEscaping[ch]) {
     82                 buffer.append('%');
     83                 buffer.append(gAfterEscaping1[ch]);
     84                 buffer.append(gAfterEscaping2[ch]);
     85                 // record the fact that it's escaped
     86             }
     87             else {
     88                 buffer.append((char)ch);
     89             }
     90         }
     91 
     92         // we saw some non-ascii character
     93         if (i < len) {
     94             // get UTF-8 bytes for the remaining sub-string
     95             byte[] bytes = null;
     96             byte b;
     97             try {
     98                 bytes = path.substring(i).getBytes("UTF-8");
     99             } catch (java.io.UnsupportedEncodingException e) {
    100                 // should never happen
    101                 return path;
    102             }
    103             len = bytes.length;
    104 
    105             // for each byte
    106             for (i = 0; i < len; i++) {
    107                 b = bytes[i];
    108                 // for non-ascii character: make it positive, then escape
    109                 if (b < 0) {
    110                     ch = b + 256;
    111                     buffer.append('%');
    112                     buffer.append(gHexChs[ch >> 4]);
    113                     buffer.append(gHexChs[ch & 0xf]);
    114                 }
    115                 else if (gNeedEscaping[b]) {
    116                     buffer.append('%');
    117                     buffer.append(gAfterEscaping1[b]);
    118                     buffer.append(gAfterEscaping2[b]);
    119                 }
    120                 else {
    121                     buffer.append((char)b);
    122                 }
    123             }
    124         }
    125 
    126         return buffer.toString();
    127     }
    128 
    129 }//FilePathToURI
    130