Home | History | Annotate | Download | only in impl
      1 // =================================================================================================
      2 // ADOBE SYSTEMS INCORPORATED
      3 // Copyright 2006 Adobe Systems Incorporated
      4 // All Rights Reserved
      5 //
      6 // NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
      7 // of the Adobe license agreement accompanying it.
      8 // =================================================================================================
      9 
     10 package com.adobe.xmp.impl;
     11 
     12 import java.io.IOException;
     13 import java.io.PushbackReader;
     14 import java.io.Reader;
     15 
     16 
     17 /**
     18  * @since   22.08.2006
     19  */
     20 public class FixASCIIControlsReader extends PushbackReader
     21 {
     22 	/** */
     23 	private static final int STATE_START = 0;
     24 	/** */
     25 	private static final int STATE_AMP = 1;
     26 	/** */
     27 	private static final int STATE_HASH = 2;
     28 	/** */
     29 	private static final int STATE_HEX = 3;
     30 	/** */
     31 	private static final int STATE_DIG1 = 4;
     32 	/** */
     33 	private static final int STATE_ERROR = 5;
     34 	/** */
     35 	private static final int BUFFER_SIZE = 8;
     36 	/** the state of the automaton */
     37 	private int state = STATE_START;
     38 	/** the result of the escaping sequence */
     39 	private int control = 0;
     40 	/** count the digits of the sequence */
     41 	private int digits = 0;
     42 
     43 	/**
     44 	 * The look-ahead size is 6 at maximum («)
     45 	 * @see PushbackReader#PushbackReader(Reader, int)
     46 	 * @param in a Reader
     47 	 */
     48 	public FixASCIIControlsReader(Reader in)
     49 	{
     50 		super(in, BUFFER_SIZE);
     51 	}
     52 
     53 
     54 	/**
     55 	 * @see Reader#read(char[], int, int)
     56 	 */
     57 	public int read(char[] cbuf, int off, int len) throws IOException
     58 	{
     59 		int readAhead = 0;
     60 		int read = 0;
     61 		int pos = off;
     62 		char[] readAheadBuffer = new char[BUFFER_SIZE];
     63 
     64 		boolean available = true;
     65 		while (available  &&  read < len)
     66 		{
     67 			available = super.read(readAheadBuffer, readAhead, 1) == 1;
     68 			if (available)
     69 			{
     70 				char c = processChar(readAheadBuffer[readAhead]);
     71 				if (state == STATE_START)
     72 				{
     73 					// replace control chars with space
     74 					if (Utils.isControlChar(c))
     75 					{
     76 						c = ' ';
     77 					}
     78 					cbuf[pos++] = c;
     79 					readAhead = 0;
     80 					read++;
     81 				}
     82 				else if (state == STATE_ERROR)
     83 				{
     84 					unread(readAheadBuffer, 0, readAhead + 1);
     85 					readAhead = 0;
     86 				}
     87 				else
     88 				{
     89 					readAhead++;
     90 				}
     91 			}
     92 			else if (readAhead > 0)
     93 			{
     94 				// handles case when file ends within excaped sequence
     95 				unread(readAheadBuffer, 0, readAhead);
     96 				state = STATE_ERROR;
     97 				readAhead = 0;
     98 				available = true;
     99 			}
    100 		}
    101 
    102 
    103 		return read > 0  ||  available ? read : -1;
    104 	}
    105 
    106 
    107 	/**
    108 	 * Processes numeric escaped chars to find out if they are a control character.
    109 	 * @param ch a char
    110 	 * @return Returns the char directly or as replacement for the escaped sequence.
    111 	 */
    112 	private char processChar(char ch)
    113 	{
    114 		switch (state)
    115 		{
    116 			case STATE_START:
    117 				if (ch == '&')
    118 				{
    119 					state = STATE_AMP;
    120 				}
    121 				return ch;
    122 
    123 			case STATE_AMP:
    124 				if (ch == '#')
    125 				{
    126 					state = STATE_HASH;
    127 				}
    128 				else
    129 				{
    130 					state = STATE_ERROR;
    131 				}
    132 				return ch;
    133 
    134 			case STATE_HASH:
    135 				if (ch == 'x')
    136 				{
    137 					control = 0;
    138 					digits = 0;
    139 					state = STATE_HEX;
    140 				}
    141 				else if ('0' <= ch  &&  ch <= '9')
    142 				{
    143 					control = Character.digit(ch, 10);
    144 					digits = 1;
    145 					state = STATE_DIG1;
    146 				}
    147 				else
    148 				{
    149 					state = STATE_ERROR;
    150 				}
    151 				return ch;
    152 
    153 			case STATE_DIG1:
    154 				if ('0' <= ch  &&  ch <= '9')
    155 				{
    156 					control = control * 10 + Character.digit(ch, 10);
    157 					digits++;
    158 					if (digits <= 5)
    159 					{
    160 						state = STATE_DIG1;
    161 					}
    162 					else
    163 					{
    164 						state = STATE_ERROR; // sequence too long
    165 					}
    166 				}
    167 				else if (ch == ';'  &&  Utils.isControlChar((char) control))
    168 				{
    169 					state = STATE_START;
    170 					return (char) control;
    171 				}
    172 				else
    173 				{
    174 					state = STATE_ERROR;
    175 				}
    176 				return ch;
    177 
    178 			case STATE_HEX:
    179 				if (('0' <= ch  &&  ch <= '9')  ||
    180 					('a' <= ch  &&  ch <= 'f')  ||
    181 					('A' <= ch  &&  ch <= 'F'))
    182 				{
    183 					control = control * 16 + Character.digit(ch, 16);
    184 					digits++;
    185 					if (digits <= 4)
    186 					{
    187 						state = STATE_HEX;
    188 					}
    189 					else
    190 					{
    191 						state = STATE_ERROR; // sequence too long
    192 					}
    193 				}
    194 				else if (ch == ';'  &&   Utils.isControlChar((char) control))
    195 				{
    196 					state = STATE_START;
    197 					return (char) control;
    198 				}
    199 				else
    200 				{
    201 					state = STATE_ERROR;
    202 				}
    203 				return ch;
    204 
    205 			case STATE_ERROR:
    206 				state = STATE_START;
    207 				return ch;
    208 
    209 			default:
    210 				// not reachable
    211 				return ch;
    212 		}
    213 	}
    214 }
    215