1 // ================================================================================================= 2 // ADOBE SYSTEMS INCORPORATED 3 // Copyright 2006 Adobe Systems Incorporated 4 // All Rights Reserved 5 // 6 // NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms 7 // of the Adobe license agreement accompanying it. 8 // ================================================================================================= 9 10 package com.adobe.xmp.impl; 11 12 import java.io.IOException; 13 import java.io.PushbackReader; 14 import java.io.Reader; 15 16 17 /** 18 * @since 22.08.2006 19 */ 20 public class FixASCIIControlsReader extends PushbackReader 21 { 22 /** */ 23 private static final int STATE_START = 0; 24 /** */ 25 private static final int STATE_AMP = 1; 26 /** */ 27 private static final int STATE_HASH = 2; 28 /** */ 29 private static final int STATE_HEX = 3; 30 /** */ 31 private static final int STATE_DIG1 = 4; 32 /** */ 33 private static final int STATE_ERROR = 5; 34 /** */ 35 private static final int BUFFER_SIZE = 8; 36 /** the state of the automaton */ 37 private int state = STATE_START; 38 /** the result of the escaping sequence */ 39 private int control = 0; 40 /** count the digits of the sequence */ 41 private int digits = 0; 42 43 /** 44 * The look-ahead size is 6 at maximum (&#xAB;) 45 * @see PushbackReader#PushbackReader(Reader, int) 46 * @param in a Reader 47 */ 48 public FixASCIIControlsReader(Reader in) 49 { 50 super(in, BUFFER_SIZE); 51 } 52 53 54 /** 55 * @see Reader#read(char[], int, int) 56 */ 57 public int read(char[] cbuf, int off, int len) throws IOException 58 { 59 int readAhead = 0; 60 int read = 0; 61 int pos = off; 62 char[] readAheadBuffer = new char[BUFFER_SIZE]; 63 64 boolean available = true; 65 while (available && read < len) 66 { 67 available = super.read(readAheadBuffer, readAhead, 1) == 1; 68 if (available) 69 { 70 char c = processChar(readAheadBuffer[readAhead]); 71 if (state == STATE_START) 72 { 73 // replace control chars with space 74 if (Utils.isControlChar(c)) 75 { 76 c = ' '; 77 } 78 cbuf[pos++] = c; 79 readAhead = 0; 80 read++; 81 } 82 else if (state == STATE_ERROR) 83 { 84 unread(readAheadBuffer, 0, readAhead + 1); 85 readAhead = 0; 86 } 87 else 88 { 89 readAhead++; 90 } 91 } 92 else if (readAhead > 0) 93 { 94 // handles case when file ends within excaped sequence 95 unread(readAheadBuffer, 0, readAhead); 96 state = STATE_ERROR; 97 readAhead = 0; 98 available = true; 99 } 100 } 101 102 103 return read > 0 || available ? read : -1; 104 } 105 106 107 /** 108 * Processes numeric escaped chars to find out if they are a control character. 109 * @param ch a char 110 * @return Returns the char directly or as replacement for the escaped sequence. 111 */ 112 private char processChar(char ch) 113 { 114 switch (state) 115 { 116 case STATE_START: 117 if (ch == '&') 118 { 119 state = STATE_AMP; 120 } 121 return ch; 122 123 case STATE_AMP: 124 if (ch == '#') 125 { 126 state = STATE_HASH; 127 } 128 else 129 { 130 state = STATE_ERROR; 131 } 132 return ch; 133 134 case STATE_HASH: 135 if (ch == 'x') 136 { 137 control = 0; 138 digits = 0; 139 state = STATE_HEX; 140 } 141 else if ('0' <= ch && ch <= '9') 142 { 143 control = Character.digit(ch, 10); 144 digits = 1; 145 state = STATE_DIG1; 146 } 147 else 148 { 149 state = STATE_ERROR; 150 } 151 return ch; 152 153 case STATE_DIG1: 154 if ('0' <= ch && ch <= '9') 155 { 156 control = control * 10 + Character.digit(ch, 10); 157 digits++; 158 if (digits <= 5) 159 { 160 state = STATE_DIG1; 161 } 162 else 163 { 164 state = STATE_ERROR; // sequence too long 165 } 166 } 167 else if (ch == ';' && Utils.isControlChar((char) control)) 168 { 169 state = STATE_START; 170 return (char) control; 171 } 172 else 173 { 174 state = STATE_ERROR; 175 } 176 return ch; 177 178 case STATE_HEX: 179 if (('0' <= ch && ch <= '9') || 180 ('a' <= ch && ch <= 'f') || 181 ('A' <= ch && ch <= 'F')) 182 { 183 control = control * 16 + Character.digit(ch, 16); 184 digits++; 185 if (digits <= 4) 186 { 187 state = STATE_HEX; 188 } 189 else 190 { 191 state = STATE_ERROR; // sequence too long 192 } 193 } 194 else if (ch == ';' && Utils.isControlChar((char) control)) 195 { 196 state = STATE_START; 197 return (char) control; 198 } 199 else 200 { 201 state = STATE_ERROR; 202 } 203 return ch; 204 205 case STATE_ERROR: 206 state = STATE_START; 207 return ch; 208 209 default: 210 // not reachable 211 return ch; 212 } 213 } 214 } 215