package eu.monnetproject.parser.stanford;

import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.ScoredObject;
import eu.monnetproject.lang.Language;
import eu.monnetproject.parser.DependencyParser;
import eu.monnetproject.parser.Parser;
import eu.monnetproject.parser.TreeNode;
import eu.monnetproject.tokens.Token;
import eu.monnetproject.util.Logger;
import eu.monnetproject.util.Logging;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:eu/monnetproject/parser/stanford/StanfordParser.class */
public class StanfordParser implements Parser, DependencyParser {
    private final Logger log;
    private final File file;
    private final String fileName;
    private final Language language;
    private LexicalizedParser lp;
    private TreebankLanguagePack tlp;

    public StanfordParser(File file) {
        this(file, file.getName());
    }

    public StanfordParser(File file, String str) {
        this.log = Logging.getLogger(this);
        this.file = file;
        this.fileName = str;
        Matcher matcher = Pattern.compile("(.*)\\.(.+)\\.ser(\\.gz)?").matcher(str);
        if (matcher.matches()) {
            this.language = Language.get(matcher.group(1));
        } else {
            this.language = null;
        }
    }

    private void init() {
        this.lp = new LexicalizedParser(this.file.getPath());
        this.lp.setOptionFlags(new String[]{"-maxLength", "80"});
        this.tlp = new PennTreebankLanguagePack();
    }

    private int getSize(Iterable<Token> iterable) {
        int i = 0;
        Iterator<Token> it = iterable.iterator();
        while (it.hasNext()) {
            it.next();
            i++;
        }
        return i;
    }

    public String getTagSet() {
        Matcher matcher = Pattern.compile("(.*)\\.(.+)\\.ser(\\.gz)?").matcher(this.fileName);
        if (matcher.matches()) {
            return matcher.group(2);
        }
        this.log.severe("Could not extract tag set from file name " + this.fileName);
        return "ERROR";
    }

    public Language getLanguage() {
        return this.language;
    }

    private GrammaticalStructure doParse(Iterable<Token> iterable) {
        if (this.lp == null) {
            init();
        }
        ArrayList arrayList = new ArrayList(getSize(iterable));
        ArrayList arrayList2 = new ArrayList(getSize(iterable));
        for (Token token : iterable) {
            arrayList.add(new Word(token.getValue()));
            arrayList2.add(token);
        }
        return this.tlp.grammaticalStructureFactory().newGrammaticalStructure(this.lp.apply(arrayList));
    }

    public TreeNode parse(Iterable<Token> iterable) {
        if (this.lp == null) {
            init();
        }
        ArrayList arrayList = new ArrayList(getSize(iterable));
        ArrayList arrayList2 = new ArrayList(getSize(iterable));
        for (Token token : iterable) {
            arrayList.add(new Word(token.getValue()));
            arrayList2.add(token);
        }
        Tree apply = this.lp.apply(arrayList);
        this.tlp.grammaticalStructureFactory().newGrammaticalStructure(apply);
        StanfordPOSSet stanfordPOSSet = new StanfordPOSSet(this.tlp);
        if (apply.children().length != 1) {
            this.log.warning("Multiple head nodes");
        }
        return new TreeNodeWrap(apply.children()[0], stanfordPOSSet, arrayList2, 0);
    }

    public TreeNode depParse(Iterable<Token> iterable) {
        GrammaticalStructure doParse = doParse(iterable);
        return new DepTreeWrap(doParse.typedDependenciesCollapsed(), doParse.root(), new StanfordPOSSet(this.tlp));
    }

    public List<TreeNode> bestParses(Iterable<Token> iterable, int i) {
        if (this.lp == null) {
            init();
        }
        ArrayList arrayList = new ArrayList(getSize(iterable));
        ArrayList arrayList2 = new ArrayList(getSize(iterable));
        for (Token token : iterable) {
            arrayList.add(new Word(token.getValue()));
            arrayList2.add(token);
        }
        this.lp.parse(arrayList);
        List<ScoredObject> kBestPCFGParses = this.lp.getKBestPCFGParses(i);
        ArrayList arrayList3 = new ArrayList(i);
        for (ScoredObject scoredObject : kBestPCFGParses) {
            double score = scoredObject.score();
            Tree tree = (Tree) scoredObject.object();
            this.tlp.grammaticalStructureFactory().newGrammaticalStructure(tree);
            StanfordPOSSet stanfordPOSSet = new StanfordPOSSet(this.tlp);
            if (tree.children().length != 1) {
                throw new RuntimeException("Stanford Parser returned more than one parse from top node?");
            }
            arrayList3.add(new TreeNodeWrap(tree.children()[0], stanfordPOSSet, arrayList2, 0, score));
        }
        return arrayList3;
    }
}
