Home | History | Annotate | Download | only in mime4j
      1 /****************************************************************
      2  * Licensed to the Apache Software Foundation (ASF) under one   *
      3  * or more contributor license agreements.  See the NOTICE file *
      4  * distributed with this work for additional information        *
      5  * regarding copyright ownership.  The ASF licenses this file   *
      6  * to you under the Apache License, Version 2.0 (the            *
      7  * "License"); you may not use this file except in compliance   *
      8  * with the License.  You may obtain a copy of the License at   *
      9  *                                                              *
     10  *   http://www.apache.org/licenses/LICENSE-2.0                 *
     11  *                                                              *
     12  * Unless required by applicable law or agreed to in writing,   *
     13  * software distributed under the License is distributed on an  *
     14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
     15  * KIND, either express or implied.  See the License for the    *
     16  * specific language governing permissions and limitations      *
     17  * under the License.                                           *
     18  ****************************************************************/
     19 
     20 package org.apache.james.mime4j;
     21 
     22 import com.android.email.Email;
     23 import com.android.email.mail.transport.LoggingInputStream;
     24 
     25 import org.apache.james.mime4j.decoder.Base64InputStream;
     26 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
     27 
     28 import java.io.IOException;
     29 import java.io.InputStream;
     30 import java.util.BitSet;
     31 import java.util.LinkedList;
     32 
     33 /**
     34  * <p>
     35  * Parses MIME (or RFC822) message streams of bytes or characters and reports
     36  * parsing events to a <code>ContentHandler</code> instance.
     37  * </p>
     38  * <p>
     39  * Typical usage:<br/>
     40  * <pre>
     41  *      ContentHandler handler = new MyHandler();
     42  *      MimeStreamParser parser = new MimeStreamParser();
     43  *      parser.setContentHandler(handler);
     44  *      parser.parse(new BufferedInputStream(new FileInputStream("mime.msg")));
     45  * </pre>
     46  * <strong>NOTE:</strong> All lines must end with CRLF
     47  * (<code>\r\n</code>). If you are unsure of the line endings in your stream
     48  * you should wrap it in a {@link org.apache.james.mime4j.EOLConvertingInputStream} instance.
     49  *
     50  *
     51  * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $
     52  */
     53 public class MimeStreamParser {
     54     private static final Log log = LogFactory.getLog(MimeStreamParser.class);
     55 
     56     private static final boolean DEBUG_LOG_MESSAGE = false; //DO NOT RELEASE AS 'TRUE'
     57 
     58     private static BitSet fieldChars = null;
     59 
     60     private RootInputStream rootStream = null;
     61     private LinkedList bodyDescriptors = new LinkedList();
     62     private ContentHandler handler = null;
     63     private boolean raw = false;
     64 
     65     static {
     66         fieldChars = new BitSet();
     67         for (int i = 0x21; i <= 0x39; i++) {
     68             fieldChars.set(i);
     69         }
     70         for (int i = 0x3b; i <= 0x7e; i++) {
     71             fieldChars.set(i);
     72         }
     73     }
     74 
     75     /**
     76      * Creates a new <code>MimeStreamParser</code> instance.
     77      */
     78     public MimeStreamParser() {
     79     }
     80 
     81     /**
     82      * Parses a stream of bytes containing a MIME message.
     83      *
     84      * @param is the stream to parse.
     85      * @throws IOException on I/O errors.
     86      */
     87     public void parse(InputStream is) throws IOException {
     88         if (DEBUG_LOG_MESSAGE && Email.DEBUG) {
     89             is = new LoggingInputStream(is, "MIME", true);
     90         }
     91         rootStream = new RootInputStream(is);
     92         parseMessage(rootStream);
     93     }
     94 
     95     /**
     96      * Determines if this parser is currently in raw mode.
     97      *
     98      * @return <code>true</code> if in raw mode, <code>false</code>
     99      *         otherwise.
    100      * @see #setRaw(boolean)
    101      */
    102     public boolean isRaw() {
    103         return raw;
    104     }
    105 
    106     /**
    107      * Enables or disables raw mode. In raw mode all future entities
    108      * (messages or body parts) in the stream will be reported to the
    109      * {@link ContentHandler#raw(InputStream)} handler method only.
    110      * The stream will contain the entire unparsed entity contents
    111      * including header fields and whatever is in the body.
    112      *
    113      * @param raw <code>true</code> enables raw mode, <code>false</code>
    114      *        disables it.
    115      */
    116     public void setRaw(boolean raw) {
    117         this.raw = raw;
    118     }
    119 
    120     /**
    121      * Finishes the parsing and stops reading lines.
    122      * NOTE: No more lines will be parsed but the parser
    123      * will still call
    124      * {@link ContentHandler#endMultipart()},
    125      * {@link ContentHandler#endBodyPart()},
    126      * {@link ContentHandler#endMessage()}, etc to match previous calls
    127      * to
    128      * {@link ContentHandler#startMultipart(BodyDescriptor)},
    129      * {@link ContentHandler#startBodyPart()},
    130      * {@link ContentHandler#startMessage()}, etc.
    131      */
    132     public void stop() {
    133         rootStream.truncate();
    134     }
    135 
    136     /**
    137      * Parses an entity which consists of a header followed by a body containing
    138      * arbitrary data, body parts or an embedded message.
    139      *
    140      * @param is the stream to parse.
    141      * @throws IOException on I/O errors.
    142      */
    143     private void parseEntity(InputStream is) throws IOException {
    144         BodyDescriptor bd = parseHeader(is);
    145 
    146         if (bd.isMultipart()) {
    147             bodyDescriptors.addFirst(bd);
    148 
    149             handler.startMultipart(bd);
    150 
    151             MimeBoundaryInputStream tempIs =
    152                 new MimeBoundaryInputStream(is, bd.getBoundary());
    153             handler.preamble(new CloseShieldInputStream(tempIs));
    154             tempIs.consume();
    155 
    156             while (tempIs.hasMoreParts()) {
    157                 tempIs = new MimeBoundaryInputStream(is, bd.getBoundary());
    158                 parseBodyPart(tempIs);
    159                 tempIs.consume();
    160                 if (tempIs.parentEOF()) {
    161                     if (log.isWarnEnabled()) {
    162                         log.warn("Line " + rootStream.getLineNumber()
    163                                 + ": Body part ended prematurely. "
    164                                 + "Higher level boundary detected or "
    165                                 + "EOF reached.");
    166                     }
    167                     break;
    168                 }
    169             }
    170 
    171             handler.epilogue(new CloseShieldInputStream(is));
    172 
    173             handler.endMultipart();
    174 
    175             bodyDescriptors.removeFirst();
    176 
    177         } else if (bd.isMessage()) {
    178             if (bd.isBase64Encoded()) {
    179                 log.warn("base64 encoded message/rfc822 detected");
    180                 is = new EOLConvertingInputStream(
    181                         new Base64InputStream(is));
    182             } else if (bd.isQuotedPrintableEncoded()) {
    183                 log.warn("quoted-printable encoded message/rfc822 detected");
    184                 is = new EOLConvertingInputStream(
    185                         new QuotedPrintableInputStream(is));
    186             }
    187             bodyDescriptors.addFirst(bd);
    188             parseMessage(is);
    189             bodyDescriptors.removeFirst();
    190         } else {
    191             handler.body(bd, new CloseShieldInputStream(is));
    192         }
    193 
    194         /*
    195          * Make sure the stream has been consumed.
    196          */
    197         while (is.read() != -1) {
    198         }
    199     }
    200 
    201     private void parseMessage(InputStream is) throws IOException {
    202         if (raw) {
    203             handler.raw(new CloseShieldInputStream(is));
    204         } else {
    205             handler.startMessage();
    206             parseEntity(is);
    207             handler.endMessage();
    208         }
    209     }
    210 
    211     private void parseBodyPart(InputStream is) throws IOException {
    212         if (raw) {
    213             handler.raw(new CloseShieldInputStream(is));
    214         } else {
    215             handler.startBodyPart();
    216             parseEntity(is);
    217             handler.endBodyPart();
    218         }
    219     }
    220 
    221     /**
    222      * Parses a header.
    223      *
    224      * @param is the stream to parse.
    225      * @return a <code>BodyDescriptor</code> describing the body following
    226      *         the header.
    227      */
    228     private BodyDescriptor parseHeader(InputStream is) throws IOException {
    229         BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty()
    230                         ? null : (BodyDescriptor) bodyDescriptors.getFirst());
    231 
    232         handler.startHeader();
    233 
    234         int lineNumber = rootStream.getLineNumber();
    235 
    236         StringBuffer sb = new StringBuffer();
    237         int curr = 0;
    238         int prev = 0;
    239         while ((curr = is.read()) != -1) {
    240             if (curr == '\n' && (prev == '\n' || prev == 0)) {
    241                 /*
    242                  * [\r]\n[\r]\n or an immediate \r\n have been seen.
    243                  */
    244                 sb.deleteCharAt(sb.length() - 1);
    245                 break;
    246             }
    247             sb.append((char) curr);
    248             prev = curr == '\r' ? prev : curr;
    249         }
    250 
    251         if (curr == -1 && log.isWarnEnabled()) {
    252             log.warn("Line " + rootStream.getLineNumber()
    253                     + ": Unexpected end of headers detected. "
    254                     + "Boundary detected in header or EOF reached.");
    255         }
    256 
    257         int start = 0;
    258         int pos = 0;
    259         int startLineNumber = lineNumber;
    260         while (pos < sb.length()) {
    261             while (pos < sb.length() && sb.charAt(pos) != '\r') {
    262                 pos++;
    263             }
    264             if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') {
    265                 pos++;
    266                 continue;
    267             }
    268 
    269             if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) {
    270 
    271                 /*
    272                  * field should be the complete field data excluding the
    273                  * trailing \r\n.
    274                  */
    275                 String field = sb.substring(start, pos);
    276                 start = pos + 2;
    277 
    278                 /*
    279                  * Check for a valid field.
    280                  */
    281                 int index = field.indexOf(':');
    282                 boolean valid = false;
    283                 if (index != -1 && fieldChars.get(field.charAt(0))) {
    284                     valid = true;
    285                     String fieldName = field.substring(0, index).trim();
    286                     for (int i = 0; i < fieldName.length(); i++) {
    287                         if (!fieldChars.get(fieldName.charAt(i))) {
    288                             valid = false;
    289                             break;
    290                         }
    291                     }
    292 
    293                     if (valid) {
    294                         handler.field(field);
    295                         bd.addField(fieldName, field.substring(index + 1));
    296                     }
    297                 }
    298 
    299                 if (!valid && log.isWarnEnabled()) {
    300                     log.warn("Line " + startLineNumber
    301                             + ": Ignoring invalid field: '" + field.trim() + "'");
    302                 }
    303 
    304                 startLineNumber = lineNumber;
    305             }
    306 
    307             pos += 2;
    308             lineNumber++;
    309         }
    310 
    311         handler.endHeader();
    312 
    313         return bd;
    314     }
    315 
    316     /**
    317      * Sets the <code>ContentHandler</code> to use when reporting
    318      * parsing events.
    319      *
    320      * @param h the <code>ContentHandler</code>.
    321      */
    322     public void setContentHandler(ContentHandler h) {
    323         this.handler = h;
    324     }
    325 
    326 }
    327