Home | History | Annotate | Download | only in SQLite
      1 package SQLite;
      2 
      3 /**
      4  * String encoder/decoder for SQLite.
      5  *
      6  * This module was kindly donated by Eric van der Maarel of Nedap N.V.
      7  *
      8  * This encoder was implemented based on an original idea from an anonymous
      9  * author in the source code of the SQLite distribution.
     10  * I feel obliged to provide a quote from the original C-source code:
     11  *
     12  * "The author disclaims copyright to this source code.  In place of
     13  *  a legal notice, here is a blessing:
     14  *
     15  *     May you do good and not evil.
     16  *     May you find forgiveness for yourself and forgive others.
     17  *     May you share freely, never taking more than you give."
     18  *
     19  */
     20 
     21 public class StringEncoder {
     22 
     23     /**
     24      * Encodes the given byte array into a string that can be used by
     25      * the SQLite database. The database cannot handle null (0x00) and
     26      * the character '\'' (0x27). The encoding consists of escaping
     27      * these characters with a reserved character (0x01). The escaping
     28      * is applied after determining and applying a shift that minimizes
     29      * the number of escapes required.
     30      * With this encoding the data of original size n is increased to a
     31      * maximum of 1+(n*257)/254.
     32      * For sufficiently large n the overhead is thus less than 1.2%.
     33      * @param a the byte array to be encoded. A null reference is handled as
     34      *     an empty array.
     35      * @return the encoded bytes as a string. When an empty array is
     36      *     provided a string of length 1 is returned, the value of
     37      *     which is bogus.
     38      *     When decoded with this class' <code>decode</code> method
     39      *     a string of size 1 will return an empty byte array.
     40      */
     41 
     42     public static String encode(byte[] a) {
     43 	// check input
     44 	if (a == null || a.length == 0) {
     45 	    // bogus shift, no data
     46 	    return "x";
     47 	}
     48 	// determine count
     49 	int[] cnt = new int[256];
     50 	for (int i = 0 ; i < a.length; i++) {
     51 	    cnt[a[i] & 0xff]++;
     52 	}
     53 	// determine shift for minimum number of escapes
     54 	int shift = 1;
     55 	int nEscapes = a.length;
     56 	for (int i = 1; i < 256; i++) {
     57 	    if (i == '\'') {
     58 		continue;
     59 	    }
     60 	    int sum = cnt[i] + cnt[(i + 1) & 0xff] + cnt[(i + '\'') & 0xff];
     61 	    if (sum < nEscapes) {
     62 		nEscapes = sum;
     63 		shift = i;
     64 		if (nEscapes == 0) {
     65 		    // cannot become smaller
     66 		    break;
     67 		}
     68 	    }
     69 	}
     70 	// construct encoded output
     71 	int outLen = a.length + nEscapes + 1;
     72 	StringBuffer out = new StringBuffer(outLen);
     73 	out.append((char)shift);
     74 	for (int i = 0; i < a.length; i++) {
     75 	    // apply shift
     76 	    char c = (char)((a[i] - shift)&0xff);
     77 	    // insert escapes
     78 	    if (c == 0) { // forbidden
     79 		out.append((char)1);
     80 		out.append((char)1);
     81 	    } else if (c == 1) { // escape character
     82 		out.append((char)1);
     83 		out.append((char)2);
     84 	    } else if (c == '\'') { // forbidden
     85 		out.append((char)1);
     86 		out.append((char)3);
     87 	    } else {
     88 		out.append(c);
     89 	    }
     90 	}
     91 	return out.toString();
     92     }
     93 
     94     /**
     95      * Decodes the given string that is assumed to be a valid encoding
     96      * of a byte array. Typically the given string is generated by
     97      * this class' <code>encode</code> method.
     98      * @param s the given string encoding.
     99      * @return the byte array obtained from the decoding.
    100      * @throws IllegalArgumentException when the string given is not
    101      *    a valid encoded string for this encoder.
    102      */
    103 
    104     public static byte[] decode(String s) {
    105 	char[] a = s.toCharArray();
    106 	if (a.length > 2 && a[0] == 'X' &&
    107 	    a[1] == '\'' && a[a.length-1] == '\'') {
    108 	    // SQLite3 BLOB syntax
    109 	    byte[] result = new byte[(a.length-3)/2];
    110 	    for (int i = 2, k = 0; i < a.length - 1; i += 2, k++) {
    111 		byte tmp;
    112 		switch (a[i]) {
    113 		case '0': tmp = 0; break;
    114 		case '1': tmp = 1; break;
    115 		case '2': tmp = 2; break;
    116 		case '3': tmp = 3; break;
    117 		case '4': tmp = 4; break;
    118 		case '5': tmp = 5; break;
    119 		case '6': tmp = 6; break;
    120 		case '7': tmp = 7; break;
    121 		case '8': tmp = 8; break;
    122 		case '9': tmp = 9; break;
    123 		case 'A':
    124 		case 'a': tmp = 10; break;
    125 		case 'B':
    126 		case 'b': tmp = 11; break;
    127 		case 'C':
    128 		case 'c': tmp = 12; break;
    129 		case 'D':
    130 		case 'd': tmp = 13; break;
    131 		case 'E':
    132 		case 'e': tmp = 14; break;
    133 		case 'F':
    134 		case 'f': tmp = 15; break;
    135 		default:  tmp = 0; break;
    136 		}
    137 		result[k] = (byte) (tmp << 4);
    138 		switch (a[i+1]) {
    139 		case '0': tmp = 0; break;
    140 		case '1': tmp = 1; break;
    141 		case '2': tmp = 2; break;
    142 		case '3': tmp = 3; break;
    143 		case '4': tmp = 4; break;
    144 		case '5': tmp = 5; break;
    145 		case '6': tmp = 6; break;
    146 		case '7': tmp = 7; break;
    147 		case '8': tmp = 8; break;
    148 		case '9': tmp = 9; break;
    149 		case 'A':
    150 		case 'a': tmp = 10; break;
    151 		case 'B':
    152 		case 'b': tmp = 11; break;
    153 		case 'C':
    154 		case 'c': tmp = 12; break;
    155 		case 'D':
    156 		case 'd': tmp = 13; break;
    157 		case 'E':
    158 		case 'e': tmp = 14; break;
    159 		case 'F':
    160 		case 'f': tmp = 15; break;
    161 		default:  tmp = 0; break;
    162 		}
    163 		result[k] |= tmp;
    164 	    }
    165 	    return result;
    166 	}
    167 	// first element is the shift
    168 	byte[] result = new byte[a.length-1];
    169 	int i = 0;
    170 	int shift = s.charAt(i++);
    171 	int j = 0;
    172 	while (i < s.length()) {
    173 	    int c;
    174 	    if ((c = s.charAt(i++)) == 1) { // escape character found
    175 		if ((c = s.charAt(i++)) == 1) {
    176 		    c = 0;
    177 		} else if (c == 2) {
    178 		    c = 1;
    179 		} else if (c == 3) {
    180 		    c = '\'';
    181 		} else {
    182 		    throw new IllegalArgumentException(
    183 			"invalid string passed to decoder: " + j);
    184 		}
    185 	    }
    186 	    // do shift
    187 	    result[j++] = (byte)((c + shift) & 0xff);
    188 	}
    189 	int outLen = j;
    190 	// provide array of correct length
    191 	if (result.length != outLen) {
    192 	    result = byteCopy(result, 0, outLen, new byte[outLen]);
    193 	}
    194 	return result;
    195     }
    196 
    197     /**
    198      * Copies count elements from source, starting at element with
    199      * index offset, to the given target.
    200      * @param source the source.
    201      * @param offset the offset.
    202      * @param count the number of elements to be copied.
    203      * @param target the target to be returned.
    204      * @return the target being copied to.
    205      */
    206 
    207     private static byte[] byteCopy(byte[] source, int offset,
    208 				   int count, byte[] target) {
    209 	for (int i = offset, j = 0; i < offset + count; i++, j++) {
    210 	    target[j] = source[i];
    211 	}
    212 	return target;
    213     }
    214 
    215 
    216     static final char[] xdigits = {
    217 	'0', '1', '2', '3', '4', '5', '6', '7',
    218 	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
    219     };
    220 
    221     /**
    222      * Encodes the given byte array into SQLite3 blob notation, ie X'..'
    223      * @param a the byte array to be encoded. A null reference is handled as
    224      *     an empty array.
    225      * @return the encoded bytes as a string.
    226      */
    227 
    228     public static String encodeX(byte[] a) {
    229 	// check input
    230 	if (a == null || a.length == 0) {
    231 	    return "X''";
    232 	}
    233 	int outLen = a.length * 2 + 3;
    234 	StringBuffer out = new StringBuffer(outLen);
    235 	out.append('X');
    236 	out.append('\'');
    237 	for (int i = 0; i < a.length; i++) {
    238 	    out.append(xdigits[(a[i] >> 4) & 0x0F]);
    239 	    out.append(xdigits[a[i] & 0x0F]);
    240 	}
    241 	out.append('\'');
    242 	return out.toString();
    243     }
    244 }
    245