package eu.monnetproject.tagger.stanford;

import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import eu.monnetproject.lang.Language;
import eu.monnetproject.pos.POSTag;
import eu.monnetproject.pos.POSToken;
import eu.monnetproject.tagger.Tagger;
import eu.monnetproject.tokens.Token;
import eu.monnetproject.util.Logger;
import eu.monnetproject.util.Logging;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:eu/monnetproject/tagger/stanford/StanfordTagger.class */
public class StanfordTagger implements Tagger {
    private final Logger log;
    private MaxentTagger tagger;
    private File modelFile;
    private final String fileName;
    private StanfordPOSSet posSet;
    private final Language lang;

    /* loaded from: input_file:eu/monnetproject/tagger/stanford/StanfordTagger$StanfordPOSToken.class */
    private static class StanfordPOSToken implements POSToken {
        final String token;
        final POSTag posTag;

        public StanfordPOSToken(String str, POSTag pOSTag) {
            this.token = str;
            this.posTag = pOSTag;
        }

        public String getValue() {
            return this.token;
        }

        public POSTag getPOSTag() {
            return this.posTag;
        }

        @Deprecated
        public String getLemma() {
            return new StanfordEnglishLemmatizer().stem(this).getLemma();
        }

        public String toString() {
            return this.token + "/" + this.posTag;
        }
    }

    public StanfordTagger(File file) {
        this(file, file.getName());
    }

    public StanfordTagger(File file, String str) {
        this.log = Logging.getLogger(this);
        this.modelFile = file;
        this.fileName = str;
        Matcher matcher = Pattern.compile("(.*)\\.(.+)\\.tagger").matcher(str);
        if (matcher.matches()) {
            this.lang = Language.get(matcher.group(1));
        } else {
            this.lang = null;
        }
    }

    public String getTagSet() {
        Matcher matcher = Pattern.compile("(.*)\\.(.+)\\.tagger").matcher(this.fileName);
        if (matcher.matches()) {
            return matcher.group(2);
        }
        this.log.warning("Could not extract tag set from file name " + this.fileName);
        return "ERROR";
    }

    public Language getLanguage() {
        return this.lang;
    }

    private void init() {
        try {
            this.tagger = new MaxentTagger(this.modelFile.getPath());
            this.posSet = new StanfordPOSSet(this.tagger.getTags());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private int getSize(List<Token> list) {
        int i = 0;
        Iterator<Token> it = list.iterator();
        while (it.hasNext()) {
            it.next();
            i++;
        }
        return i;
    }

    public List<POSToken> tag(List<Token> list) {
        if (this.tagger == null) {
            init();
        }
        ArrayList arrayList = new ArrayList(getSize(list));
        Iterator<Token> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(new Word(it.next().getValue()));
        }
        ArrayList tagSentence = this.tagger.tagSentence(arrayList);
        if (tagSentence.size() != getSize(list)) {
            throw new RuntimeException("Tag result length does not match input");
        }
        ArrayList arrayList2 = new ArrayList(tagSentence.size());
        Iterator it2 = tagSentence.iterator();
        while (it2.hasNext()) {
            TaggedWord taggedWord = (TaggedWord) it2.next();
            arrayList2.add(new StanfordPOSToken(taggedWord.word(), new StanfordPOSTag(this.posSet.getPOS(taggedWord.tag()))));
        }
        return arrayList2;
    }
}
