Home | History | Annotate | Download | only in mime4j
      1 /****************************************************************
      2  * Licensed to the Apache Software Foundation (ASF) under one   *
      3  * or more contributor license agreements.  See the NOTICE file *
      4  * distributed with this work for additional information        *
      5  * regarding copyright ownership.  The ASF licenses this file   *
      6  * to you under the Apache License, Version 2.0 (the            *
      7  * "License"); you may not use this file except in compliance   *
      8  * with the License.  You may obtain a copy of the License at   *
      9  *                                                              *
     10  *   http://www.apache.org/licenses/LICENSE-2.0                 *
     11  *                                                              *
     12  * Unless required by applicable law or agreed to in writing,   *
     13  * software distributed under the License is distributed on an  *
     14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
     15  * KIND, either express or implied.  See the License for the    *
     16  * specific language governing permissions and limitations      *
     17  * under the License.                                           *
     18  ****************************************************************/
     19 
     20 package org.apache.james.mime4j;
     21 
     22 import com.android.mail.utils.LoggingInputStream;
     23 
     24 import org.apache.james.mime4j.decoder.Base64InputStream;
     25 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
     26 
     27 import java.io.IOException;
     28 import java.io.InputStream;
     29 import java.util.BitSet;
     30 import java.util.LinkedList;
     31 
     32 /**
     33  * <p>
     34  * Parses MIME (or RFC822) message streams of bytes or characters and reports
     35  * parsing events to a <code>ContentHandler</code> instance.
     36  * </p>
     37  * <p>
     38  * Typical usage:<br/>
     39  * <pre>
     40  *      ContentHandler handler = new MyHandler();
     41  *      MimeStreamParser parser = new MimeStreamParser();
     42  *      parser.setContentHandler(handler);
     43  *      parser.parse(new BufferedInputStream(new FileInputStream("mime.msg")));
     44  * </pre>
     45  * <strong>NOTE:</strong> All lines must end with CRLF
     46  * (<code>\r\n</code>). If you are unsure of the line endings in your stream
     47  * you should wrap it in a {@link org.apache.james.mime4j.EOLConvertingInputStream} instance.
     48  *
     49  *
     50  * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $
     51  */
     52 public class MimeStreamParser {
     53     private static final Log log = LogFactory.getLog(MimeStreamParser.class);
     54 
     55     private static final boolean DEBUG_LOG_MESSAGE = false; //DO NOT RELEASE AS 'TRUE'
     56 
     57     private static BitSet fieldChars = null;
     58 
     59     private RootInputStream rootStream = null;
     60     private LinkedList<BodyDescriptor> bodyDescriptors = new LinkedList<BodyDescriptor>();
     61     private ContentHandler handler = null;
     62     private boolean raw = false;
     63 
     64     static {
     65         fieldChars = new BitSet();
     66         for (int i = 0x21; i <= 0x39; i++) {
     67             fieldChars.set(i);
     68         }
     69         for (int i = 0x3b; i <= 0x7e; i++) {
     70             fieldChars.set(i);
     71         }
     72     }
     73 
     74     /**
     75      * Creates a new <code>MimeStreamParser</code> instance.
     76      */
     77     public MimeStreamParser() {
     78     }
     79 
     80     /**
     81      * Parses a stream of bytes containing a MIME message.
     82      *
     83      * @param is the stream to parse.
     84      * @throws IOException on I/O errors.
     85      */
     86     public void parse(InputStream is) throws IOException {
     87         if (DEBUG_LOG_MESSAGE) {
     88             is = new LoggingInputStream(is, "MIME", true);
     89         }
     90         rootStream = new RootInputStream(is);
     91         parseMessage(rootStream);
     92     }
     93 
     94     /**
     95      * Determines if this parser is currently in raw mode.
     96      *
     97      * @return <code>true</code> if in raw mode, <code>false</code>
     98      *         otherwise.
     99      * @see #setRaw(boolean)
    100      */
    101     public boolean isRaw() {
    102         return raw;
    103     }
    104 
    105     /**
    106      * Enables or disables raw mode. In raw mode all future entities
    107      * (messages or body parts) in the stream will be reported to the
    108      * {@link ContentHandler#raw(InputStream)} handler method only.
    109      * The stream will contain the entire unparsed entity contents
    110      * including header fields and whatever is in the body.
    111      *
    112      * @param raw <code>true</code> enables raw mode, <code>false</code>
    113      *        disables it.
    114      */
    115     public void setRaw(boolean raw) {
    116         this.raw = raw;
    117     }
    118 
    119     /**
    120      * Finishes the parsing and stops reading lines.
    121      * NOTE: No more lines will be parsed but the parser
    122      * will still call
    123      * {@link ContentHandler#endMultipart()},
    124      * {@link ContentHandler#endBodyPart()},
    125      * {@link ContentHandler#endMessage()}, etc to match previous calls
    126      * to
    127      * {@link ContentHandler#startMultipart(BodyDescriptor)},
    128      * {@link ContentHandler#startBodyPart()},
    129      * {@link ContentHandler#startMessage()}, etc.
    130      */
    131     public void stop() {
    132         rootStream.truncate();
    133     }
    134 
    135     /**
    136      * Parses an entity which consists of a header followed by a body containing
    137      * arbitrary data, body parts or an embedded message.
    138      *
    139      * @param is the stream to parse.
    140      * @throws IOException on I/O errors.
    141      */
    142     private void parseEntity(InputStream is) throws IOException {
    143         BodyDescriptor bd = parseHeader(is);
    144 
    145         if (bd.isMultipart()) {
    146             bodyDescriptors.addFirst(bd);
    147 
    148             handler.startMultipart(bd);
    149 
    150             MimeBoundaryInputStream tempIs =
    151                 new MimeBoundaryInputStream(is, bd.getBoundary());
    152             handler.preamble(new CloseShieldInputStream(tempIs));
    153             tempIs.consume();
    154 
    155             while (tempIs.hasMoreParts()) {
    156                 tempIs = new MimeBoundaryInputStream(is, bd.getBoundary());
    157                 parseBodyPart(tempIs);
    158                 tempIs.consume();
    159                 if (tempIs.parentEOF()) {
    160 //                    if (log.isWarnEnabled()) {
    161 //                        log.warn("Line " + rootStream.getLineNumber()
    162 //                                + ": Body part ended prematurely. "
    163 //                                + "Higher level boundary detected or "
    164 //                                + "EOF reached.");
    165 //                    }
    166                     break;
    167                 }
    168             }
    169 
    170             handler.epilogue(new CloseShieldInputStream(is));
    171 
    172             handler.endMultipart();
    173 
    174             bodyDescriptors.removeFirst();
    175 
    176         } else if (bd.isMessage()) {
    177             if (bd.isBase64Encoded()) {
    178                 log.warn("base64 encoded message/rfc822 detected");
    179                 is = new EOLConvertingInputStream(
    180                         new Base64InputStream(is));
    181             } else if (bd.isQuotedPrintableEncoded()) {
    182                 log.warn("quoted-printable encoded message/rfc822 detected");
    183                 is = new EOLConvertingInputStream(
    184                         new QuotedPrintableInputStream(is));
    185             }
    186             bodyDescriptors.addFirst(bd);
    187             parseMessage(is);
    188             bodyDescriptors.removeFirst();
    189         } else {
    190             handler.body(bd, new CloseShieldInputStream(is));
    191         }
    192 
    193         /*
    194          * Make sure the stream has been consumed.
    195          */
    196         while (is.read() != -1) {
    197         }
    198     }
    199 
    200     private void parseMessage(InputStream is) throws IOException {
    201         if (raw) {
    202             handler.raw(new CloseShieldInputStream(is));
    203         } else {
    204             handler.startMessage();
    205             parseEntity(is);
    206             handler.endMessage();
    207         }
    208     }
    209 
    210     private void parseBodyPart(InputStream is) throws IOException {
    211         if (raw) {
    212             handler.raw(new CloseShieldInputStream(is));
    213         } else {
    214             handler.startBodyPart();
    215             parseEntity(is);
    216             handler.endBodyPart();
    217         }
    218     }
    219 
    220     /**
    221      * Parses a header.
    222      *
    223      * @param is the stream to parse.
    224      * @return a <code>BodyDescriptor</code> describing the body following
    225      *         the header.
    226      */
    227     private BodyDescriptor parseHeader(InputStream is) throws IOException {
    228         BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty()
    229                         ? null : (BodyDescriptor) bodyDescriptors.getFirst());
    230 
    231         handler.startHeader();
    232 
    233         int lineNumber = rootStream.getLineNumber();
    234 
    235         StringBuffer sb = new StringBuffer();
    236         int curr = 0;
    237         int prev = 0;
    238         while ((curr = is.read()) != -1) {
    239             if (curr == '\n' && (prev == '\n' || prev == 0)) {
    240                 /*
    241                  * [\r]\n[\r]\n or an immediate \r\n have been seen.
    242                  */
    243                 sb.deleteCharAt(sb.length() - 1);
    244                 break;
    245             }
    246             sb.append((char) curr);
    247             prev = curr == '\r' ? prev : curr;
    248         }
    249 
    250 //        if (curr == -1 && log.isWarnEnabled()) {
    251 //            log.warn("Line " + rootStream.getLineNumber()
    252 //                    + ": Unexpected end of headers detected. "
    253 //                    + "Boundary detected in header or EOF reached.");
    254 //        }
    255 
    256         int start = 0;
    257         int pos = 0;
    258         int startLineNumber = lineNumber;
    259         while (pos < sb.length()) {
    260             while (pos < sb.length() && sb.charAt(pos) != '\r') {
    261                 pos++;
    262             }
    263             if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') {
    264                 pos++;
    265                 continue;
    266             }
    267 
    268             if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) {
    269 
    270                 /*
    271                  * field should be the complete field data excluding the
    272                  * trailing \r\n.
    273                  */
    274                 String field = sb.substring(start, pos);
    275                 start = pos + 2;
    276 
    277                 /*
    278                  * Check for a valid field.
    279                  */
    280                 int index = field.indexOf(':');
    281                 boolean valid = false;
    282                 if (index != -1 && fieldChars.get(field.charAt(0))) {
    283                     valid = true;
    284                     String fieldName = field.substring(0, index).trim();
    285                     for (int i = 0; i < fieldName.length(); i++) {
    286                         if (!fieldChars.get(fieldName.charAt(i))) {
    287                             valid = false;
    288                             break;
    289                         }
    290                     }
    291 
    292                     if (valid) {
    293                         handler.field(field);
    294                         bd.addField(fieldName, field.substring(index + 1));
    295                     }
    296                 }
    297 
    298                 if (!valid && log.isWarnEnabled()) {
    299                     log.warn("Line " + startLineNumber
    300                             + ": Ignoring invalid field: '" + field.trim() + "'");
    301                 }
    302 
    303                 startLineNumber = lineNumber;
    304             }
    305 
    306             pos += 2;
    307             lineNumber++;
    308         }
    309 
    310         handler.endHeader();
    311 
    312         return bd;
    313     }
    314 
    315     /**
    316      * Sets the <code>ContentHandler</code> to use when reporting
    317      * parsing events.
    318      *
    319      * @param h the <code>ContentHandler</code>.
    320      */
    321     public void setContentHandler(ContentHandler h) {
    322         this.handler = h;
    323     }
    324 
    325 }
    326