001 /****************************************************************
002 * Licensed to the Apache Software Foundation (ASF) under one *
003 * or more contributor license agreements. See the NOTICE file *
004 * distributed with this work for additional information *
005 * regarding copyright ownership. The ASF licenses this file *
006 * to you under the Apache License, Version 2.0 (the *
007 * "License"); you may not use this file except in compliance *
008 * with the License. You may obtain a copy of the License at *
009 * *
010 * http://www.apache.org/licenses/LICENSE-2.0 *
011 * *
012 * Unless required by applicable law or agreed to in writing, *
013 * software distributed under the License is distributed on an *
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
015 * KIND, either express or implied. See the License for the *
016 * specific language governing permissions and limitations *
017 * under the License. *
018 ****************************************************************/
019
020
021 package org.apache.james.jspf.parser;
022
023 import org.apache.james.jspf.core.Logger;
024 import org.apache.james.jspf.core.SPF1Constants;
025 import org.apache.james.jspf.core.SPF1Record;
026 import org.apache.james.jspf.core.SPFRecordParser;
027 import org.apache.james.jspf.core.exceptions.NeutralException;
028 import org.apache.james.jspf.core.exceptions.NoneException;
029 import org.apache.james.jspf.core.exceptions.PermErrorException;
030 import org.apache.james.jspf.terms.Configuration;
031 import org.apache.james.jspf.terms.Directive;
032 import org.apache.james.jspf.terms.Mechanism;
033 import org.apache.james.jspf.terms.Modifier;
034
035 import java.util.ArrayList;
036 import java.util.Collection;
037 import java.util.Collections;
038 import java.util.Iterator;
039 import java.util.List;
040 import java.util.regex.Matcher;
041 import java.util.regex.Pattern;
042
043 /**
044 * This class is used to parse SPF1-Records from their textual form to an
045 * SPF1Record object that is composed by 2 collections: directives and
046 * modifiers.
047 *
048 * The parsing is modular and get informations from Mechanism and Modifiers
049 * classes declared in the org/apache/james/jspf/parser/jspf.default.terms file.
050 *
051 * Each term implementation provide its own REGEX in the REGEX static public
052 * field. This parser simply join all the regexp in a single "alternative"
053 * pattern and count the number of catch groups (brackets) assigned to each
054 * regex fragment.
055 *
056 * SO it creates a big regex and an array where it store what term is associated
057 * to each catch group of the big regex.
058 *
059 * If the regex matches the input vspf1 record then it start looking for the
060 * matched group (not null) and lookup the term that created that part of the
061 * regex.
062 *
063 * With this informations it creates a new instance of the term and, if the term
064 * is ConfigurationEnabled it calls the config() method passing to it only the specific
065 * subset of the MatchResult (using the MatchResultSubset).
066 *
067 * TODO doubts about the specification - redirect or exp with no domain-spec are
068 * evaluated as an unknown-modifiers according to the current spec (it does not
069 * make too much sense) - top-label is defined differently in various specs.
070 * We'll have to review the code. -
071 * http://data.iana.org/TLD/tlds-alpha-by-domain.txt (we should probably beeter
072 * use and alpha sequence being at least 2 chars - Somewhere is defined as "."
073 * TLD [ "." ] - Otherwise defined as ( *alphanum ALPHA *alphanum ) / (
074 * 1*alphanum "-" *( * alphanum / "-" ) alphanum )
075 *
076 * @see org.apache.james.jspf.core.SPF1Record
077 *
078 */
079 public class RFC4408SPF1Parser implements SPFRecordParser {
080
081 /**
082 * Regex based on http://www.ietf.org/rfc/rfc4408.txt.
083 * This will be the next official SPF-Spec
084 */
085
086 // Changed this because C, T and R MACRO_LETTERS are not available
087 // in record parsing and must return a PermError.
088
089 // private static final String MACRO_LETTER_PATTERN = "[lsodipvhcrtLSODIPVHCRT]";
090
091 /**
092 * ABNF: qualifier = "+" / "-" / "?" / "~"
093 */
094 private static final String QUALIFIER_PATTERN = "[" + "\\"
095 + SPF1Constants.PASS + "\\" + SPF1Constants.FAIL + "\\"
096 + SPF1Constants.NEUTRAL + "\\" + SPF1Constants.SOFTFAIL + "]";
097
098 private Pattern termsSeparatorPattern = null;
099
100 private Pattern termPattern = null;
101
102 private int TERM_STEP_REGEX_QUALIFIER_POS;
103
104 private int TERM_STEP_REGEX_MECHANISM_POS;
105
106 private int TERM_STEP_REGEX_MODIFIER_POS;
107
108 private List<TermDefinition> matchResultPositions;
109
110 private Logger log;
111
112 private TermsFactory termsFactory;
113
114 /**
115 * Constructor. Creates all the values needed to run the parsing
116 *
117 * @param logger the logger to use
118 * @param termsFactory the TermsFactory implementation
119 */
120 public RFC4408SPF1Parser(Logger logger, TermsFactory termsFactory) {
121 this.log = logger;
122 this.termsFactory = termsFactory;
123
124 /**
125 * ABNF: mechanism = ( all / include / A / MX / PTR / IP4 / IP6 / exists )
126 */
127 String MECHANISM_REGEX = createRegex(termsFactory.getMechanismsCollection());
128
129 /**
130 * ABNF: modifier = redirect / explanation / unknown-modifier
131 */
132 String MODIFIER_REGEX = "(" + createRegex(termsFactory.getModifiersCollection()) + ")";
133
134 /**
135 * ABNF: directive = [ qualifier ] mechanism
136 */
137 String DIRECTIVE_REGEX = "(" + QUALIFIER_PATTERN + "?)("
138 + MECHANISM_REGEX + ")";
139
140 /**
141 * ABNF: ( directive / modifier )
142 */
143 String TERM_REGEX = "(?:" + MODIFIER_REGEX + "|" + DIRECTIVE_REGEX
144 + ")";
145
146 /**
147 * ABNF: 1*SP
148 */
149 String TERMS_SEPARATOR_REGEX = "[ ]+";
150
151 termsSeparatorPattern = Pattern.compile(TERMS_SEPARATOR_REGEX);
152 termPattern = Pattern.compile(TERM_REGEX);
153
154 initializePositions();
155 }
156
157 /**
158 * Fill in the matchResultPositions ArrayList. This array simply map each
159 * regex matchgroup to the Term class that originated that part of the
160 * regex.
161 */
162 private void initializePositions() {
163 ArrayList<TermDefinition> matchResultPositions = new ArrayList<TermDefinition>();
164
165 // FULL MATCH
166 int posIndex = 0;
167 matchResultPositions.ensureCapacity(posIndex + 1);
168 matchResultPositions.add(posIndex, null);
169
170 Iterator<TermDefinition> i;
171
172 TERM_STEP_REGEX_MODIFIER_POS = ++posIndex;
173 matchResultPositions.ensureCapacity(posIndex + 1);
174 matchResultPositions.add(TERM_STEP_REGEX_MODIFIER_POS, null);
175 i = termsFactory.getModifiersCollection().iterator();
176 while (i.hasNext()) {
177 TermDefinition td = i.next();
178 int size = td.getMatchSize() + 1;
179 for (int k = 0; k < size; k++) {
180 posIndex++;
181 matchResultPositions.ensureCapacity(posIndex + 1);
182 matchResultPositions.add(posIndex, td);
183 }
184 }
185
186 TERM_STEP_REGEX_QUALIFIER_POS = ++posIndex;
187 matchResultPositions.ensureCapacity(posIndex + 1);
188 matchResultPositions.add(posIndex, null);
189
190 TERM_STEP_REGEX_MECHANISM_POS = ++posIndex;
191 matchResultPositions.ensureCapacity(posIndex + 1);
192 matchResultPositions.add(TERM_STEP_REGEX_MECHANISM_POS, null);
193 i = termsFactory.getMechanismsCollection().iterator();
194 while (i.hasNext()) {
195 TermDefinition td = i.next();
196 int size = td.getMatchSize() + 1;
197 for (int k = 0; k < size; k++) {
198 posIndex++;
199 matchResultPositions.ensureCapacity(posIndex + 1);
200 matchResultPositions.add(posIndex, td);
201 }
202 }
203
204 if (log.isDebugEnabled()) {
205 log.debug("Parsing catch group positions: Modifiers["
206 + TERM_STEP_REGEX_MODIFIER_POS + "] Qualifier["
207 + TERM_STEP_REGEX_QUALIFIER_POS + "] Mechanism["
208 + TERM_STEP_REGEX_MECHANISM_POS + "]");
209 for (int k = 0; k < matchResultPositions.size(); k++) {
210 log
211 .debug(k
212 + ") "
213 + (matchResultPositions.get(k) != null ? ((TermDefinition) matchResultPositions
214 .get(k)).getPattern().pattern()
215 : null));
216 }
217 }
218
219 this.matchResultPositions = Collections.synchronizedList(matchResultPositions);
220 }
221
222 /**
223 * Loop the classes searching for a String static field named
224 * staticFieldName and create an OR regeex like this:
225 * (?:FIELD1|FIELD2|FIELD3)
226 *
227 * @param classes
228 * classes to analyze
229 * @param staticFieldName
230 * static field to concatenate
231 * @return regex The regex
232 */
233 private String createRegex(Collection<TermDefinition> commandMap) {
234 StringBuffer modifierRegex = new StringBuffer();
235 Iterator<TermDefinition> i = commandMap.iterator();
236 boolean first = true;
237 while (i.hasNext()) {
238 if (first) {
239 modifierRegex.append("(?:(");
240 first = false;
241 } else {
242 modifierRegex.append(")|(");
243 }
244 Pattern pattern = i.next().getPattern();
245 modifierRegex.append(pattern.pattern());
246 }
247 modifierRegex.append("))");
248 return modifierRegex.toString();
249 }
250
251 /**
252 * @see org.apache.james.jspf.core.SPFRecordParser#parse(java.lang.String)
253 */
254 public SPF1Record parse(String spfRecord) throws PermErrorException,
255 NoneException, NeutralException {
256
257 log.debug("Start parsing SPF-Record: " + spfRecord);
258
259 SPF1Record result = new SPF1Record();
260
261 // check the version "header"
262 if (spfRecord.toLowerCase().startsWith(SPF1Constants.SPF_VERSION1 + " ") || spfRecord.equalsIgnoreCase(SPF1Constants.SPF_VERSION1)) {
263 if (!spfRecord.toLowerCase().startsWith(SPF1Constants.SPF_VERSION1 + " ")) throw new NeutralException("Empty SPF Record");
264 } else {
265 throw new NoneException("No valid SPF Record: " + spfRecord);
266 }
267
268 // extract terms
269 String[] terms = termsSeparatorPattern.split(spfRecord.replaceFirst(
270 SPF1Constants.SPF_VERSION1, ""));
271
272 // cycle terms
273 for (int i = 0; i < terms.length; i++) {
274 if (terms[i].length() > 0) {
275 Matcher termMatcher = termPattern.matcher(terms[i]);
276 if (!termMatcher.matches()) {
277 throw new PermErrorException("Term [" + terms[i]
278 + "] is not syntactically valid: "
279 + termPattern.pattern());
280 }
281
282 // true if we matched a modifier, false if we matched a
283 // directive
284 String modifierString = termMatcher
285 .group(TERM_STEP_REGEX_MODIFIER_POS);
286
287 if (modifierString != null) {
288 // MODIFIER
289 Modifier mod = (Modifier) lookupAndCreateTerm(termMatcher,
290 TERM_STEP_REGEX_MODIFIER_POS);
291
292 if (mod.enforceSingleInstance()) {
293 Iterator<Modifier> it = result.getModifiers().iterator();
294 while (it.hasNext()) {
295 if (it.next().getClass().equals(mod.getClass())) {
296 throw new PermErrorException("More than one "
297 + modifierString
298 + " found in SPF-Record");
299 }
300 }
301 }
302
303 result.getModifiers().add(mod);
304
305 } else {
306 // DIRECTIVE
307 String qualifier = termMatcher
308 .group(TERM_STEP_REGEX_QUALIFIER_POS);
309
310 Object mech = lookupAndCreateTerm(termMatcher,
311 TERM_STEP_REGEX_MECHANISM_POS);
312
313 result.getDirectives().add(
314 new Directive(qualifier, (Mechanism) mech, log.getChildLogger(qualifier+"directive")));
315
316 }
317
318 }
319 }
320
321 return result;
322 }
323
324 /**
325 * @param res
326 * the MatchResult
327 * @param start
328 * the position where the terms starts
329 * @return
330 * @throws PermErrorException
331 */
332 private Object lookupAndCreateTerm(Matcher res, int start)
333 throws PermErrorException {
334 for (int k = start + 1; k < res.groupCount(); k++) {
335 if (res.group(k) != null && k != TERM_STEP_REGEX_QUALIFIER_POS) {
336 TermDefinition c = (TermDefinition) matchResultPositions.get(k);
337 Configuration subres = new MatcherBasedConfiguration(res, k, c
338 .getMatchSize());
339 try {
340 return termsFactory.createTerm(c.getTermDef(), subres);
341 } catch (InstantiationException e) {
342 e.printStackTrace();
343 // TODO is it ok to use a Runtime for this? Or should we use a PermError here?
344 throw new IllegalStateException("Unexpected error creating term: " + e.getMessage());
345 }
346
347 }
348 }
349 return null;
350 }
351
352 }