1 /* 2 * $HeadURL: http://svn.apache.org/repos/asf/httpcomponents/httpcore/trunk/module-main/src/main/java/org/apache/http/message/BasicTokenIterator.java $ 3 * $Revision: 602520 $ 4 * $Date: 2007-12-08 09:42:26 -0800 (Sat, 08 Dec 2007) $ 5 * 6 * ==================================================================== 7 * Licensed to the Apache Software Foundation (ASF) under one 8 * or more contributor license agreements. See the NOTICE file 9 * distributed with this work for additional information 10 * regarding copyright ownership. The ASF licenses this file 11 * to you under the Apache License, Version 2.0 (the 12 * "License"); you may not use this file except in compliance 13 * with the License. You may obtain a copy of the License at 14 * 15 * http://www.apache.org/licenses/LICENSE-2.0 16 * 17 * Unless required by applicable law or agreed to in writing, 18 * software distributed under the License is distributed on an 19 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 20 * KIND, either express or implied. See the License for the 21 * specific language governing permissions and limitations 22 * under the License. 23 * ==================================================================== 24 * 25 * This software consists of voluntary contributions made by many 26 * individuals on behalf of the Apache Software Foundation. For more 27 * information on the Apache Software Foundation, please see 28 * <http://www.apache.org/>. 29 * 30 */ 31 32 package org.apache.http.message; 33 34 import java.util.NoSuchElementException; 35 36 import org.apache.http.HeaderIterator; 37 import org.apache.http.ParseException; 38 import org.apache.http.TokenIterator; 39 40 /** 41 * Basic implementation of a {@link TokenIterator}. 42 * This implementation parses <tt>#token<tt> sequences as 43 * defined by RFC 2616, section 2. 44 * It extends that definition somewhat beyond US-ASCII. 45 * 46 * @version $Revision: 602520 $ 47 * 48 * @deprecated Please use {@link java.net.URL#openConnection} instead. 49 * Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a> 50 * for further details. 51 */ 52 @Deprecated 53 public class BasicTokenIterator implements TokenIterator { 54 55 /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */ 56 // the order of the characters here is adjusted to put the 57 // most likely candidates at the beginning of the collection 58 public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t"; 59 60 61 /** The iterator from which to obtain the next header. */ 62 protected final HeaderIterator headerIt; 63 64 /** 65 * The value of the current header. 66 * This is the header value that includes {@link #currentToken}. 67 * Undefined if the iteration is over. 68 */ 69 protected String currentHeader; 70 71 /** 72 * The token to be returned by the next call to {@link #currentToken}. 73 * <code>null</code> if the iteration is over. 74 */ 75 protected String currentToken; 76 77 /** 78 * The position after {@link #currentToken} in {@link #currentHeader}. 79 * Undefined if the iteration is over. 80 */ 81 protected int searchPos; 82 83 84 /** 85 * Creates a new instance of {@link BasicTokenIterator}. 86 * 87 * @param headerIterator the iterator for the headers to tokenize 88 */ 89 public BasicTokenIterator(final HeaderIterator headerIterator) { 90 if (headerIterator == null) { 91 throw new IllegalArgumentException 92 ("Header iterator must not be null."); 93 } 94 95 this.headerIt = headerIterator; 96 this.searchPos = findNext(-1); 97 } 98 99 100 // non-javadoc, see interface TokenIterator 101 public boolean hasNext() { 102 return (this.currentToken != null); 103 } 104 105 106 /** 107 * Obtains the next token from this iteration. 108 * 109 * @return the next token in this iteration 110 * 111 * @throws NoSuchElementException if the iteration is already over 112 * @throws ParseException if an invalid header value is encountered 113 */ 114 public String nextToken() 115 throws NoSuchElementException, ParseException { 116 117 if (this.currentToken == null) { 118 throw new NoSuchElementException("Iteration already finished."); 119 } 120 121 final String result = this.currentToken; 122 // updates currentToken, may trigger ParseException: 123 this.searchPos = findNext(this.searchPos); 124 125 return result; 126 } 127 128 129 /** 130 * Returns the next token. 131 * Same as {@link #nextToken}, but with generic return type. 132 * 133 * @return the next token in this iteration 134 * 135 * @throws NoSuchElementException if there are no more tokens 136 * @throws ParseException if an invalid header value is encountered 137 */ 138 public final Object next() 139 throws NoSuchElementException, ParseException { 140 return nextToken(); 141 } 142 143 144 /** 145 * Removing tokens is not supported. 146 * 147 * @throws UnsupportedOperationException always 148 */ 149 public final void remove() 150 throws UnsupportedOperationException { 151 152 throw new UnsupportedOperationException 153 ("Removing tokens is not supported."); 154 } 155 156 157 /** 158 * Determines the next token. 159 * If found, the token is stored in {@link #currentToken}. 160 * The return value indicates the position after the token 161 * in {@link #currentHeader}. If necessary, the next header 162 * will be obtained from {@link #headerIt}. 163 * If not found, {@link #currentToken} is set to <code>null</code>. 164 * 165 * @param from the position in the current header at which to 166 * start the search, -1 to search in the first header 167 * 168 * @return the position after the found token in the current header, or 169 * negative if there was no next token 170 * 171 * @throws ParseException if an invalid header value is encountered 172 */ 173 protected int findNext(int from) 174 throws ParseException { 175 176 if (from < 0) { 177 // called from the constructor, initialize the first header 178 if (!this.headerIt.hasNext()) { 179 return -1; 180 } 181 this.currentHeader = this.headerIt.nextHeader().getValue(); 182 from = 0; 183 } else { 184 // called after a token, make sure there is a separator 185 from = findTokenSeparator(from); 186 } 187 188 int start = findTokenStart(from); 189 if (start < 0) { 190 this.currentToken = null; 191 return -1; // nothing found 192 } 193 194 int end = findTokenEnd(start); 195 this.currentToken = createToken(this.currentHeader, start, end); 196 return end; 197 } 198 199 200 /** 201 * Creates a new token to be returned. 202 * Called from {@link #findNext findNext} after the token is identified. 203 * The default implementation simply calls 204 * {@link java.lang.String#substring String.substring}. 205 * <br/> 206 * If header values are significantly longer than tokens, and some 207 * tokens are permanently referenced by the application, there can 208 * be problems with garbage collection. A substring will hold a 209 * reference to the full characters of the original string and 210 * therefore occupies more memory than might be expected. 211 * To avoid this, override this method and create a new string 212 * instead of a substring. 213 * 214 * @param value the full header value from which to create a token 215 * @param start the index of the first token character 216 * @param end the index after the last token character 217 * 218 * @return a string representing the token identified by the arguments 219 */ 220 protected String createToken(String value, int start, int end) { 221 return value.substring(start, end); 222 } 223 224 225 /** 226 * Determines the starting position of the next token. 227 * This method will iterate over headers if necessary. 228 * 229 * @param from the position in the current header at which to 230 * start the search 231 * 232 * @return the position of the token start in the current header, 233 * negative if no token start could be found 234 */ 235 protected int findTokenStart(int from) { 236 if (from < 0) { 237 throw new IllegalArgumentException 238 ("Search position must not be negative: " + from); 239 } 240 241 boolean found = false; 242 while (!found && (this.currentHeader != null)) { 243 244 final int to = this.currentHeader.length(); 245 while (!found && (from < to)) { 246 247 final char ch = this.currentHeader.charAt(from); 248 if (isTokenSeparator(ch) || isWhitespace(ch)) { 249 // whitspace and token separators are skipped 250 from++; 251 } else if (isTokenChar(this.currentHeader.charAt(from))) { 252 // found the start of a token 253 found = true; 254 } else { 255 throw new ParseException 256 ("Invalid character before token (pos " + from + 257 "): " + this.currentHeader); 258 } 259 } 260 if (!found) { 261 if (this.headerIt.hasNext()) { 262 this.currentHeader = this.headerIt.nextHeader().getValue(); 263 from = 0; 264 } else { 265 this.currentHeader = null; 266 } 267 } 268 } // while headers 269 270 return found ? from : -1; 271 } 272 273 274 /** 275 * Determines the position of the next token separator. 276 * Because of multi-header joining rules, the end of a 277 * header value is a token separator. This method does 278 * therefore not need to iterate over headers. 279 * 280 * @param from the position in the current header at which to 281 * start the search 282 * 283 * @return the position of a token separator in the current header, 284 * or at the end 285 * 286 * @throws ParseException 287 * if a new token is found before a token separator. 288 * RFC 2616, section 2.1 explicitly requires a comma between 289 * tokens for <tt>#</tt>. 290 */ 291 protected int findTokenSeparator(int from) { 292 if (from < 0) { 293 throw new IllegalArgumentException 294 ("Search position must not be negative: " + from); 295 } 296 297 boolean found = false; 298 final int to = this.currentHeader.length(); 299 while (!found && (from < to)) { 300 final char ch = this.currentHeader.charAt(from); 301 if (isTokenSeparator(ch)) { 302 found = true; 303 } else if (isWhitespace(ch)) { 304 from++; 305 } else if (isTokenChar(ch)) { 306 throw new ParseException 307 ("Tokens without separator (pos " + from + 308 "): " + this.currentHeader); 309 } else { 310 throw new ParseException 311 ("Invalid character after token (pos " + from + 312 "): " + this.currentHeader); 313 } 314 } 315 316 return from; 317 } 318 319 320 /** 321 * Determines the ending position of the current token. 322 * This method will not leave the current header value, 323 * since the end of the header value is a token boundary. 324 * 325 * @param from the position of the first character of the token 326 * 327 * @return the position after the last character of the token. 328 * The behavior is undefined if <code>from</code> does not 329 * point to a token character in the current header value. 330 */ 331 protected int findTokenEnd(int from) { 332 if (from < 0) { 333 throw new IllegalArgumentException 334 ("Token start position must not be negative: " + from); 335 } 336 337 final int to = this.currentHeader.length(); 338 int end = from+1; 339 while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) { 340 end++; 341 } 342 343 return end; 344 } 345 346 347 /** 348 * Checks whether a character is a token separator. 349 * RFC 2616, section 2.1 defines comma as the separator for 350 * <tt>#token</tt> sequences. The end of a header value will 351 * also separate tokens, but that is not a character check. 352 * 353 * @param ch the character to check 354 * 355 * @return <code>true</code> if the character is a token separator, 356 * <code>false</code> otherwise 357 */ 358 protected boolean isTokenSeparator(char ch) { 359 return (ch == ','); 360 } 361 362 363 /** 364 * Checks whether a character is a whitespace character. 365 * RFC 2616, section 2.2 defines space and horizontal tab as whitespace. 366 * The optional preceeding line break is irrelevant, since header 367 * continuation is handled transparently when parsing messages. 368 * 369 * @param ch the character to check 370 * 371 * @return <code>true</code> if the character is whitespace, 372 * <code>false</code> otherwise 373 */ 374 protected boolean isWhitespace(char ch) { 375 376 // we do not use Character.isWhitspace(ch) here, since that allows 377 // many control characters which are not whitespace as per RFC 2616 378 return ((ch == '\t') || Character.isSpaceChar(ch)); 379 } 380 381 382 /** 383 * Checks whether a character is a valid token character. 384 * Whitespace, control characters, and HTTP separators are not 385 * valid token characters. The HTTP specification (RFC 2616, section 2.2) 386 * defines tokens only for the US-ASCII character set, this 387 * method extends the definition to other character sets. 388 * 389 * @param ch the character to check 390 * 391 * @return <code>true</code> if the character is a valid token start, 392 * <code>false</code> otherwise 393 */ 394 protected boolean isTokenChar(char ch) { 395 396 // common sense extension of ALPHA + DIGIT 397 if (Character.isLetterOrDigit(ch)) 398 return true; 399 400 // common sense extension of CTL 401 if (Character.isISOControl(ch)) 402 return false; 403 404 // no common sense extension for this 405 if (isHttpSeparator(ch)) 406 return false; 407 408 // RFC 2616, section 2.2 defines a token character as 409 // "any CHAR except CTLs or separators". The controls 410 // and separators are included in the checks above. 411 // This will yield unexpected results for Unicode format characters. 412 // If that is a problem, overwrite isHttpSeparator(char) to filter 413 // out the false positives. 414 return true; 415 } 416 417 418 /** 419 * Checks whether a character is an HTTP separator. 420 * The implementation in this class checks only for the HTTP separators 421 * defined in RFC 2616, section 2.2. If you need to detect other 422 * separators beyond the US-ASCII character set, override this method. 423 * 424 * @param ch the character to check 425 * 426 * @return <code>true</code> if the character is an HTTP separator 427 */ 428 protected boolean isHttpSeparator(char ch) { 429 return (HTTP_SEPARATORS.indexOf(ch) >= 0); 430 } 431 432 433 } // class BasicTokenIterator 434 435