001 /****************************************************************
002 * Licensed to the Apache Software Foundation (ASF) under one *
003 * or more contributor license agreements. See the NOTICE file *
004 * distributed with this work for additional information *
005 * regarding copyright ownership. The ASF licenses this file *
006 * to you under the Apache License, Version 2.0 (the *
007 * "License"); you may not use this file except in compliance *
008 * with the License. You may obtain a copy of the License at *
009 * *
010 * http://www.apache.org/licenses/LICENSE-2.0 *
011 * *
012 * Unless required by applicable law or agreed to in writing, *
013 * software distributed under the License is distributed on an *
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
015 * KIND, either express or implied. See the License for the *
016 * specific language governing permissions and limitations *
017 * under the License. *
018 ****************************************************************/
019
020 package org.apache.james.mime4j.parser;
021
022 import java.io.IOException;
023 import java.io.InputStream;
024
025 import org.apache.james.mime4j.MimeException;
026 import org.apache.james.mime4j.codec.DecodeMonitor;
027 import org.apache.james.mime4j.stream.BodyDescriptor;
028 import org.apache.james.mime4j.stream.BodyDescriptorBuilder;
029 import org.apache.james.mime4j.stream.EntityState;
030 import org.apache.james.mime4j.stream.Field;
031 import org.apache.james.mime4j.stream.MimeConfig;
032 import org.apache.james.mime4j.stream.MimeTokenStream;
033 import org.apache.james.mime4j.stream.RecursionMode;
034
035 /**
036 * <p>
037 * Parses MIME (or RFC822) message streams of bytes or characters and reports
038 * parsing events to a {@link ContentHandler} instance.
039 * </p>
040 * <p>
041 * Typical usage:<br/>
042 * <pre>
043 * ContentHandler handler = new MyHandler();
044 * MimeConfig config = new MimeConfig();
045 * MimeStreamParser parser = new MimeStreamParser(config);
046 * parser.setContentHandler(handler);
047 * InputStream instream = new FileInputStream("mime.msg");
048 * try {
049 * parser.parse(instream);
050 * } finally {
051 * instream.close();
052 * }
053 * </pre>
054 */
055 public class MimeStreamParser {
056
057 private ContentHandler handler = null;
058 private boolean contentDecoding;
059
060 private final MimeTokenStream mimeTokenStream;
061
062 public MimeStreamParser(MimeTokenStream tokenStream) {
063 super();
064 this.mimeTokenStream = tokenStream;
065 this.contentDecoding = false;
066 }
067
068 public MimeStreamParser(
069 final MimeConfig config,
070 final DecodeMonitor monitor,
071 final BodyDescriptorBuilder bodyDescBuilder) {
072 this(new MimeTokenStream(config != null ? config.clone() : new MimeConfig(),
073 monitor, bodyDescBuilder));
074 }
075
076 public MimeStreamParser(final MimeConfig config) {
077 this(config, null, null);
078 }
079
080 public MimeStreamParser() {
081 this(new MimeTokenStream(new MimeConfig(), null, null));
082 }
083
084 /**
085 * Determines whether this parser automatically decodes body content
086 * based on the on the MIME fields with the standard defaults.
087 */
088 public boolean isContentDecoding() {
089 return contentDecoding;
090 }
091
092 /**
093 * Defines whether parser should automatically decode body content
094 * based on the on the MIME fields with the standard defaults.
095 */
096 public void setContentDecoding(boolean b) {
097 this.contentDecoding = b;
098 }
099
100 /**
101 * Parses a stream of bytes containing a MIME message. Please note that if the
102 * {@link MimeConfig} associated with the mime stream returns a not null Content-Type
103 * value from its {@link MimeConfig#getHeadlessParsing()} method, the message is
104 * assumed to have no head section and the headless parsing mode will be used.
105 *
106 * @param instream the stream to parse.
107 * @throws MimeException if the message can not be processed
108 * @throws IOException on I/O errors.
109 */
110 public void parse(InputStream instream) throws MimeException, IOException {
111 MimeConfig config = mimeTokenStream.getConfig();
112 if (config.getHeadlessParsing() != null) {
113 Field contentType = mimeTokenStream.parseHeadless(
114 instream, config.getHeadlessParsing());
115 handler.startMessage();
116 handler.startHeader();
117 handler.field(contentType);
118 handler.endHeader();
119 } else {
120 mimeTokenStream.parse(instream);
121 }
122 OUTER: for (;;) {
123 EntityState state = mimeTokenStream.getState();
124 switch (state) {
125 case T_BODY:
126 BodyDescriptor desc = mimeTokenStream.getBodyDescriptor();
127 InputStream bodyContent;
128 if (contentDecoding) {
129 bodyContent = mimeTokenStream.getDecodedInputStream();
130 } else {
131 bodyContent = mimeTokenStream.getInputStream();
132 }
133 handler.body(desc, bodyContent);
134 break;
135 case T_END_BODYPART:
136 handler.endBodyPart();
137 break;
138 case T_END_HEADER:
139 handler.endHeader();
140 break;
141 case T_END_MESSAGE:
142 handler.endMessage();
143 break;
144 case T_END_MULTIPART:
145 handler.endMultipart();
146 break;
147 case T_END_OF_STREAM:
148 break OUTER;
149 case T_EPILOGUE:
150 handler.epilogue(mimeTokenStream.getInputStream());
151 break;
152 case T_FIELD:
153 handler.field(mimeTokenStream.getField());
154 break;
155 case T_PREAMBLE:
156 handler.preamble(mimeTokenStream.getInputStream());
157 break;
158 case T_RAW_ENTITY:
159 handler.raw(mimeTokenStream.getInputStream());
160 break;
161 case T_START_BODYPART:
162 handler.startBodyPart();
163 break;
164 case T_START_HEADER:
165 handler.startHeader();
166 break;
167 case T_START_MESSAGE:
168 handler.startMessage();
169 break;
170 case T_START_MULTIPART:
171 handler.startMultipart(mimeTokenStream.getBodyDescriptor());
172 break;
173 default:
174 throw new IllegalStateException("Invalid state: " + state);
175 }
176 state = mimeTokenStream.next();
177 }
178 }
179
180 /**
181 * Determines if this parser is currently in raw mode.
182 *
183 * @return <code>true</code> if in raw mode, <code>false</code>
184 * otherwise.
185 * @see #setRaw()
186 */
187 public boolean isRaw() {
188 return mimeTokenStream.isRaw();
189 }
190
191 /**
192 * Enables raw mode. In raw mode all future entities (messages
193 * or body parts) in the stream will be reported to the
194 * {@link ContentHandler#raw(InputStream)} handler method only.
195 * The stream will contain the entire unparsed entity contents
196 * including header fields and whatever is in the body.
197 */
198 public void setRaw() {
199 mimeTokenStream.setRecursionMode(RecursionMode.M_RAW);
200 }
201
202 /**
203 * Enables flat mode. In flat mode rfc822 parts are not recursively
204 * parsed and multipart content is handled as a single "simple" stream.
205 */
206 public void setFlat() {
207 mimeTokenStream.setRecursionMode(RecursionMode.M_FLAT);
208 }
209
210 /**
211 * Enables recursive mode. In this mode rfc822 parts are recursively
212 * parsed.
213 */
214 public void setRecurse() {
215 mimeTokenStream.setRecursionMode(RecursionMode.M_RECURSE);
216 }
217
218 /**
219 * Finishes the parsing and stops reading lines.
220 * NOTE: No more lines will be parsed but the parser
221 * will still call
222 * {@link ContentHandler#endMultipart()},
223 * {@link ContentHandler#endBodyPart()},
224 * {@link ContentHandler#endMessage()}, etc to match previous calls
225 * to
226 * {@link ContentHandler#startMultipart(BodyDescriptor)},
227 * {@link ContentHandler#startBodyPart()},
228 * {@link ContentHandler#startMessage()}, etc.
229 */
230 public void stop() {
231 mimeTokenStream.stop();
232 }
233
234 /**
235 * Sets the <code>ContentHandler</code> to use when reporting
236 * parsing events.
237 *
238 * @param h the <code>ContentHandler</code>.
239 */
240 public void setContentHandler(ContentHandler h) {
241 this.handler = h;
242 }
243
244 }