Home | History | Annotate | Download | only in message
      1 /*
      2  * $HeadURL: http://svn.apache.org/repos/asf/httpcomponents/httpcore/trunk/module-main/src/main/java/org/apache/http/message/BasicTokenIterator.java $
      3  * $Revision: 602520 $
      4  * $Date: 2007-12-08 09:42:26 -0800 (Sat, 08 Dec 2007) $
      5  *
      6  * ====================================================================
      7  * Licensed to the Apache Software Foundation (ASF) under one
      8  * or more contributor license agreements.  See the NOTICE file
      9  * distributed with this work for additional information
     10  * regarding copyright ownership.  The ASF licenses this file
     11  * to you under the Apache License, Version 2.0 (the
     12  * "License"); you may not use this file except in compliance
     13  * with the License.  You may obtain a copy of the License at
     14  *
     15  *   http://www.apache.org/licenses/LICENSE-2.0
     16  *
     17  * Unless required by applicable law or agreed to in writing,
     18  * software distributed under the License is distributed on an
     19  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
     20  * KIND, either express or implied.  See the License for the
     21  * specific language governing permissions and limitations
     22  * under the License.
     23  * ====================================================================
     24  *
     25  * This software consists of voluntary contributions made by many
     26  * individuals on behalf of the Apache Software Foundation.  For more
     27  * information on the Apache Software Foundation, please see
     28  * <http://www.apache.org/>.
     29  *
     30  */
     31 
     32 package org.apache.http.message;
     33 
     34 import java.util.NoSuchElementException;
     35 
     36 import org.apache.http.HeaderIterator;
     37 import org.apache.http.ParseException;
     38 import org.apache.http.TokenIterator;
     39 
     40 /**
     41  * Basic implementation of a {@link TokenIterator}.
     42  * This implementation parses <tt>#token<tt> sequences as
     43  * defined by RFC 2616, section 2.
     44  * It extends that definition somewhat beyond US-ASCII.
     45  *
     46  * @version $Revision: 602520 $
     47  *
     48  * @deprecated Please use {@link java.net.URL#openConnection} instead.
     49  *     Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a>
     50  *     for further details.
     51  */
     52 @Deprecated
     53 public class BasicTokenIterator implements TokenIterator {
     54 
     55     /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
     56     // the order of the characters here is adjusted to put the
     57     // most likely candidates at the beginning of the collection
     58     public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
     59 
     60 
     61     /** The iterator from which to obtain the next header. */
     62     protected final HeaderIterator headerIt;
     63 
     64     /**
     65      * The value of the current header.
     66      * This is the header value that includes {@link #currentToken}.
     67      * Undefined if the iteration is over.
     68      */
     69     protected String currentHeader;
     70 
     71     /**
     72      * The token to be returned by the next call to {@link #currentToken}.
     73      * <code>null</code> if the iteration is over.
     74      */
     75     protected String currentToken;
     76 
     77     /**
     78      * The position after {@link #currentToken} in {@link #currentHeader}.
     79      * Undefined if the iteration is over.
     80      */
     81     protected int searchPos;
     82 
     83 
     84     /**
     85      * Creates a new instance of {@link BasicTokenIterator}.
     86      *
     87      * @param headerIterator    the iterator for the headers to tokenize
     88      */
     89     public BasicTokenIterator(final HeaderIterator headerIterator) {
     90         if (headerIterator == null) {
     91             throw new IllegalArgumentException
     92                 ("Header iterator must not be null.");
     93         }
     94 
     95         this.headerIt = headerIterator;
     96         this.searchPos = findNext(-1);
     97     }
     98 
     99 
    100     // non-javadoc, see interface TokenIterator
    101     public boolean hasNext() {
    102         return (this.currentToken != null);
    103     }
    104 
    105 
    106     /**
    107      * Obtains the next token from this iteration.
    108      *
    109      * @return  the next token in this iteration
    110      *
    111      * @throws NoSuchElementException   if the iteration is already over
    112      * @throws ParseException   if an invalid header value is encountered
    113      */
    114     public String nextToken()
    115         throws NoSuchElementException, ParseException {
    116 
    117         if (this.currentToken == null) {
    118             throw new NoSuchElementException("Iteration already finished.");
    119         }
    120 
    121         final String result = this.currentToken;
    122         // updates currentToken, may trigger ParseException:
    123         this.searchPos = findNext(this.searchPos);
    124 
    125         return result;
    126     }
    127 
    128 
    129     /**
    130      * Returns the next token.
    131      * Same as {@link #nextToken}, but with generic return type.
    132      *
    133      * @return  the next token in this iteration
    134      *
    135      * @throws NoSuchElementException   if there are no more tokens
    136      * @throws ParseException   if an invalid header value is encountered
    137      */
    138     public final Object next()
    139         throws NoSuchElementException, ParseException {
    140         return nextToken();
    141     }
    142 
    143 
    144     /**
    145      * Removing tokens is not supported.
    146      *
    147      * @throws UnsupportedOperationException    always
    148      */
    149     public final void remove()
    150         throws UnsupportedOperationException {
    151 
    152         throw new UnsupportedOperationException
    153             ("Removing tokens is not supported.");
    154     }
    155 
    156 
    157     /**
    158      * Determines the next token.
    159      * If found, the token is stored in {@link #currentToken}.
    160      * The return value indicates the position after the token
    161      * in {@link #currentHeader}. If necessary, the next header
    162      * will be obtained from {@link #headerIt}.
    163      * If not found, {@link #currentToken} is set to <code>null</code>.
    164      *
    165      * @param from      the position in the current header at which to
    166      *                  start the search, -1 to search in the first header
    167      *
    168      * @return  the position after the found token in the current header, or
    169      *          negative if there was no next token
    170      *
    171      * @throws ParseException   if an invalid header value is encountered
    172      */
    173     protected int findNext(int from)
    174         throws ParseException {
    175 
    176         if (from < 0) {
    177             // called from the constructor, initialize the first header
    178             if (!this.headerIt.hasNext()) {
    179                 return -1;
    180             }
    181             this.currentHeader = this.headerIt.nextHeader().getValue();
    182             from = 0;
    183         } else {
    184             // called after a token, make sure there is a separator
    185             from = findTokenSeparator(from);
    186         }
    187 
    188         int start = findTokenStart(from);
    189         if (start < 0) {
    190             this.currentToken = null;
    191             return -1; // nothing found
    192         }
    193 
    194         int end = findTokenEnd(start);
    195         this.currentToken = createToken(this.currentHeader, start, end);
    196         return end;
    197     }
    198 
    199 
    200     /**
    201      * Creates a new token to be returned.
    202      * Called from {@link #findNext findNext} after the token is identified.
    203      * The default implementation simply calls
    204      * {@link java.lang.String#substring String.substring}.
    205      * <br/>
    206      * If header values are significantly longer than tokens, and some
    207      * tokens are permanently referenced by the application, there can
    208      * be problems with garbage collection. A substring will hold a
    209      * reference to the full characters of the original string and
    210      * therefore occupies more memory than might be expected.
    211      * To avoid this, override this method and create a new string
    212      * instead of a substring.
    213      *
    214      * @param value     the full header value from which to create a token
    215      * @param start     the index of the first token character
    216      * @param end       the index after the last token character
    217      *
    218      * @return  a string representing the token identified by the arguments
    219      */
    220     protected String createToken(String value, int start, int end) {
    221         return value.substring(start, end);
    222     }
    223 
    224 
    225     /**
    226      * Determines the starting position of the next token.
    227      * This method will iterate over headers if necessary.
    228      *
    229      * @param from      the position in the current header at which to
    230      *                  start the search
    231      *
    232      * @return  the position of the token start in the current header,
    233      *          negative if no token start could be found
    234      */
    235     protected int findTokenStart(int from) {
    236         if (from < 0) {
    237             throw new IllegalArgumentException
    238                 ("Search position must not be negative: " + from);
    239         }
    240 
    241         boolean found = false;
    242         while (!found && (this.currentHeader != null)) {
    243 
    244             final int to = this.currentHeader.length();
    245             while (!found && (from < to)) {
    246 
    247                 final char ch = this.currentHeader.charAt(from);
    248                 if (isTokenSeparator(ch) || isWhitespace(ch)) {
    249                     // whitspace and token separators are skipped
    250                     from++;
    251                 } else if (isTokenChar(this.currentHeader.charAt(from))) {
    252                     // found the start of a token
    253                     found = true;
    254                 } else {
    255                     throw new ParseException
    256                         ("Invalid character before token (pos " + from +
    257                          "): " + this.currentHeader);
    258                 }
    259             }
    260             if (!found) {
    261                 if (this.headerIt.hasNext()) {
    262                     this.currentHeader = this.headerIt.nextHeader().getValue();
    263                     from = 0;
    264                 } else {
    265                     this.currentHeader = null;
    266                 }
    267             }
    268         } // while headers
    269 
    270         return found ? from : -1;
    271     }
    272 
    273 
    274     /**
    275      * Determines the position of the next token separator.
    276      * Because of multi-header joining rules, the end of a
    277      * header value is a token separator. This method does
    278      * therefore not need to iterate over headers.
    279      *
    280      * @param from      the position in the current header at which to
    281      *                  start the search
    282      *
    283      * @return  the position of a token separator in the current header,
    284      *          or at the end
    285      *
    286      * @throws ParseException
    287      *         if a new token is found before a token separator.
    288      *         RFC 2616, section 2.1 explicitly requires a comma between
    289      *         tokens for <tt>#</tt>.
    290      */
    291     protected int findTokenSeparator(int from) {
    292         if (from < 0) {
    293             throw new IllegalArgumentException
    294                 ("Search position must not be negative: " + from);
    295         }
    296 
    297         boolean found = false;
    298         final int to = this.currentHeader.length();
    299         while (!found && (from < to)) {
    300             final char ch = this.currentHeader.charAt(from);
    301             if (isTokenSeparator(ch)) {
    302                 found = true;
    303             } else if (isWhitespace(ch)) {
    304                 from++;
    305             } else if (isTokenChar(ch)) {
    306                 throw new ParseException
    307                     ("Tokens without separator (pos " + from +
    308                      "): " + this.currentHeader);
    309             } else {
    310                 throw new ParseException
    311                     ("Invalid character after token (pos " + from +
    312                      "): " + this.currentHeader);
    313             }
    314         }
    315 
    316         return from;
    317     }
    318 
    319 
    320     /**
    321      * Determines the ending position of the current token.
    322      * This method will not leave the current header value,
    323      * since the end of the header value is a token boundary.
    324      *
    325      * @param from      the position of the first character of the token
    326      *
    327      * @return  the position after the last character of the token.
    328      *          The behavior is undefined if <code>from</code> does not
    329      *          point to a token character in the current header value.
    330      */
    331     protected int findTokenEnd(int from) {
    332         if (from < 0) {
    333             throw new IllegalArgumentException
    334                 ("Token start position must not be negative: " + from);
    335         }
    336 
    337         final int to = this.currentHeader.length();
    338         int end = from+1;
    339         while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
    340             end++;
    341         }
    342 
    343         return end;
    344     }
    345 
    346 
    347     /**
    348      * Checks whether a character is a token separator.
    349      * RFC 2616, section 2.1 defines comma as the separator for
    350      * <tt>#token</tt> sequences. The end of a header value will
    351      * also separate tokens, but that is not a character check.
    352      *
    353      * @param ch        the character to check
    354      *
    355      * @return  <code>true</code> if the character is a token separator,
    356      *          <code>false</code> otherwise
    357      */
    358     protected boolean isTokenSeparator(char ch) {
    359         return (ch == ',');
    360     }
    361 
    362 
    363     /**
    364      * Checks whether a character is a whitespace character.
    365      * RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
    366      * The optional preceeding line break is irrelevant, since header
    367      * continuation is handled transparently when parsing messages.
    368      *
    369      * @param ch        the character to check
    370      *
    371      * @return  <code>true</code> if the character is whitespace,
    372      *          <code>false</code> otherwise
    373      */
    374     protected boolean isWhitespace(char ch) {
    375 
    376         // we do not use Character.isWhitspace(ch) here, since that allows
    377         // many control characters which are not whitespace as per RFC 2616
    378         return ((ch == '\t') || Character.isSpaceChar(ch));
    379     }
    380 
    381 
    382     /**
    383      * Checks whether a character is a valid token character.
    384      * Whitespace, control characters, and HTTP separators are not
    385      * valid token characters. The HTTP specification (RFC 2616, section 2.2)
    386      * defines tokens only for the US-ASCII character set, this
    387      * method extends the definition to other character sets.
    388      *
    389      * @param ch        the character to check
    390      *
    391      * @return  <code>true</code> if the character is a valid token start,
    392      *          <code>false</code> otherwise
    393      */
    394     protected boolean isTokenChar(char ch) {
    395 
    396         // common sense extension of ALPHA + DIGIT
    397         if (Character.isLetterOrDigit(ch))
    398             return true;
    399 
    400         // common sense extension of CTL
    401         if (Character.isISOControl(ch))
    402             return false;
    403 
    404         // no common sense extension for this
    405         if (isHttpSeparator(ch))
    406             return false;
    407 
    408         // RFC 2616, section 2.2 defines a token character as
    409         // "any CHAR except CTLs or separators". The controls
    410         // and separators are included in the checks above.
    411         // This will yield unexpected results for Unicode format characters.
    412         // If that is a problem, overwrite isHttpSeparator(char) to filter
    413         // out the false positives.
    414         return true;
    415     }
    416 
    417 
    418     /**
    419      * Checks whether a character is an HTTP separator.
    420      * The implementation in this class checks only for the HTTP separators
    421      * defined in RFC 2616, section 2.2. If you need to detect other
    422      * separators beyond the US-ASCII character set, override this method.
    423      *
    424      * @param ch        the character to check
    425      *
    426      * @return  <code>true</code> if the character is an HTTP separator
    427      */
    428     protected boolean isHttpSeparator(char ch) {
    429         return (HTTP_SEPARATORS.indexOf(ch) >= 0);
    430     }
    431 
    432 
    433 } // class BasicTokenIterator
    434 
    435