001 /****************************************************************
002 * Licensed to the Apache Software Foundation (ASF) under one *
003 * or more contributor license agreements. See the NOTICE file *
004 * distributed with this work for additional information *
005 * regarding copyright ownership. The ASF licenses this file *
006 * to you under the Apache License, Version 2.0 (the *
007 * "License"); you may not use this file except in compliance *
008 * with the License. You may obtain a copy of the License at *
009 * *
010 * http://www.apache.org/licenses/LICENSE-2.0 *
011 * *
012 * Unless required by applicable law or agreed to in writing, *
013 * software distributed under the License is distributed on an *
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
015 * KIND, either express or implied. See the License for the *
016 * specific language governing permissions and limitations *
017 * under the License. *
018 ****************************************************************/
019
020 package org.apache.james.mime4j.codec;
021
022 import java.io.ByteArrayInputStream;
023 import java.io.ByteArrayOutputStream;
024 import java.io.IOException;
025 import java.io.UnsupportedEncodingException;
026 import java.nio.charset.Charset;
027 import java.util.regex.Matcher;
028 import java.util.regex.Pattern;
029
030 import org.apache.james.mime4j.util.CharsetUtil;
031
032 /**
033 * Static methods for decoding strings, byte arrays and encoded words.
034 */
035 public class DecoderUtil {
036
037 private static final Pattern PATTERN_ENCODED_WORD = Pattern.compile(
038 "(.*?)=\\?(.+?)\\?(\\w)\\?(.+?)\\?=", Pattern.DOTALL);
039
040 /**
041 * Decodes a string containing quoted-printable encoded data.
042 *
043 * @param s the string to decode.
044 * @return the decoded bytes.
045 */
046 private static byte[] decodeQuotedPrintable(String s, DecodeMonitor monitor) {
047 ByteArrayOutputStream baos = new ByteArrayOutputStream();
048
049 try {
050 byte[] bytes = s.getBytes("US-ASCII");
051
052 QuotedPrintableInputStream is = new QuotedPrintableInputStream(
053 new ByteArrayInputStream(bytes), monitor);
054
055 int b = 0;
056 while ((b = is.read()) != -1) {
057 baos.write(b);
058 }
059 } catch (IOException e) {
060 // This should never happen!
061 throw new IllegalStateException(e);
062 }
063
064 return baos.toByteArray();
065 }
066
067 /**
068 * Decodes a string containing base64 encoded data.
069 *
070 * @param s the string to decode.
071 * @param monitor
072 * @return the decoded bytes.
073 */
074 private static byte[] decodeBase64(String s, DecodeMonitor monitor) {
075 ByteArrayOutputStream baos = new ByteArrayOutputStream();
076
077 try {
078 byte[] bytes = s.getBytes("US-ASCII");
079
080 Base64InputStream is = new Base64InputStream(
081 new ByteArrayInputStream(bytes), monitor);
082
083 int b = 0;
084 while ((b = is.read()) != -1) {
085 baos.write(b);
086 }
087 } catch (IOException e) {
088 // This should never happen!
089 throw new IllegalStateException(e);
090 }
091
092 return baos.toByteArray();
093 }
094
095 /**
096 * Decodes an encoded text encoded with the 'B' encoding (described in
097 * RFC 2047) found in a header field body.
098 *
099 * @param encodedText the encoded text to decode.
100 * @param charset the Java charset to use.
101 * @param monitor
102 * @return the decoded string.
103 * @throws UnsupportedEncodingException if the given Java charset isn't
104 * supported.
105 */
106 static String decodeB(String encodedText, String charset, DecodeMonitor monitor)
107 throws UnsupportedEncodingException {
108 byte[] decodedBytes = decodeBase64(encodedText, monitor);
109 return new String(decodedBytes, charset);
110 }
111
112 /**
113 * Decodes an encoded text encoded with the 'Q' encoding (described in
114 * RFC 2047) found in a header field body.
115 *
116 * @param encodedText the encoded text to decode.
117 * @param charset the Java charset to use.
118 * @return the decoded string.
119 * @throws UnsupportedEncodingException if the given Java charset isn't
120 * supported.
121 */
122 static String decodeQ(String encodedText, String charset, DecodeMonitor monitor)
123 throws UnsupportedEncodingException {
124 encodedText = replaceUnderscores(encodedText);
125
126 byte[] decodedBytes = decodeQuotedPrintable(encodedText, monitor);
127 return new String(decodedBytes, charset);
128 }
129
130 static String decodeEncodedWords(String body) {
131 return decodeEncodedWords(body, DecodeMonitor.SILENT);
132 }
133
134 /**
135 * Decodes a string containing encoded words as defined by RFC 2047. Encoded
136 * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
137 * or 'q' for quoted-printable and 'B' or 'b' for base64.
138 *
139 * @param body the string to decode
140 * @param monitor the DecodeMonitor to be used.
141 * @return the decoded string.
142 * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
143 */
144 public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException {
145 int tailIndex = 0;
146 boolean lastMatchValid = false;
147
148 StringBuilder sb = new StringBuilder();
149
150 for (Matcher matcher = PATTERN_ENCODED_WORD.matcher(body); matcher.find();) {
151 String separator = matcher.group(1);
152 String mimeCharset = matcher.group(2);
153 String encoding = matcher.group(3);
154 String encodedText = matcher.group(4);
155
156 String decoded = null;
157 decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor);
158 if (decoded == null) {
159 sb.append(matcher.group(0));
160 } else {
161 if (!lastMatchValid || !CharsetUtil.isWhitespace(separator)) {
162 sb.append(separator);
163 }
164 sb.append(decoded);
165 }
166
167 tailIndex = matcher.end();
168 lastMatchValid = decoded != null;
169 }
170
171 if (tailIndex == 0) {
172 return body;
173 } else {
174 sb.append(body.substring(tailIndex));
175 return sb.toString();
176 }
177 }
178
179 // return null on error
180 private static String tryDecodeEncodedWord(final String mimeCharset,
181 final String encoding, final String encodedText, final DecodeMonitor monitor) {
182 Charset charset = CharsetUtil.lookup(mimeCharset);
183 if (charset == null) {
184 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
185 "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset");
186 return null;
187 }
188
189 if (encodedText.length() == 0) {
190 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
191 "Missing encoded text in encoded word");
192 return null;
193 }
194
195 try {
196 if (encoding.equalsIgnoreCase("Q")) {
197 return DecoderUtil.decodeQ(encodedText, charset.name(), monitor);
198 } else if (encoding.equalsIgnoreCase("B")) {
199 return DecoderUtil.decodeB(encodedText, charset.name(), monitor);
200 } else {
201 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
202 "Warning: Unknown encoding in encoded word");
203 return null;
204 }
205 } catch (UnsupportedEncodingException e) {
206 // should not happen because of isDecodingSupported check above
207 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
208 "Unsupported encoding (", e.getMessage(), ") in encoded word");
209 return null;
210 } catch (RuntimeException e) {
211 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
212 "Could not decode (", e.getMessage(), ") encoded word");
213 return null;
214 }
215 }
216
217 private static void monitor(DecodeMonitor monitor, String mimeCharset, String encoding,
218 String encodedText, String dropDesc, String... strings) throws IllegalArgumentException {
219 if (monitor.isListening()) {
220 String encodedWord = recombine(mimeCharset, encoding, encodedText);
221 StringBuilder text = new StringBuilder();
222 for (String str : strings) {
223 text.append(str);
224 }
225 text.append(" (");
226 text.append(encodedWord);
227 text.append(")");
228 String exceptionDesc = text.toString();
229 if (monitor.warn(exceptionDesc, dropDesc))
230 throw new IllegalArgumentException(text.toString());
231 }
232 }
233
234 private static String recombine(final String mimeCharset,
235 final String encoding, final String encodedText) {
236 return "=?" + mimeCharset + "?" + encoding + "?" + encodedText + "?=";
237 }
238
239 // Replace _ with =20
240 private static String replaceUnderscores(String str) {
241 // probably faster than String#replace(CharSequence, CharSequence)
242
243 StringBuilder sb = new StringBuilder(128);
244
245 for (int i = 0; i < str.length(); i++) {
246 char c = str.charAt(i);
247 if (c == '_') {
248 sb.append("=20");
249 } else {
250 sb.append(c);
251 }
252 }
253
254 return sb.toString();
255 }
256 }