package eu.monnetproject.lemon.generator.lela;

import eu.monnetproject.lang.Language;
import eu.monnetproject.lang.Script;
import eu.monnetproject.lemon.LemonFactory;
import eu.monnetproject.lemon.LemonModel;
import eu.monnetproject.lemon.generator.ActorGenerationReport;
import eu.monnetproject.lemon.generator.GenerationState;
import eu.monnetproject.lemon.generator.GeneratorActor;
import eu.monnetproject.lemon.model.Component;
import eu.monnetproject.lemon.model.LexicalEntry;
import eu.monnetproject.lemon.model.LexicalForm;
import eu.monnetproject.lemon.model.Text;
import eu.monnetproject.tokenizer.Tokenizer;
import eu.monnetproject.tokenizer.TokenizerFactory;
import eu.monnetproject.tokens.Token;
import eu.monnetproject.util.Logger;
import eu.monnetproject.util.Logging;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:eu/monnetproject/lemon/generator/lela/TokenizerActor.class */
public class TokenizerActor implements GeneratorActor {
    private final TokenizerFactory tokenizerFactory;
    private Logger log = Logging.getLogger(this);

    public TokenizerActor(TokenizerFactory tokenizerFactory) {
        this.tokenizerFactory = tokenizerFactory;
    }

    @Override // eu.monnetproject.lemon.generator.GeneratorActor
    public double getPriority() {
        return 10.0d;
    }

    private static boolean checkScript(Text text, Script script) {
        return script == null || (script == Script.LATIN && Language.get(text.language).getScript() == null) || script.equals(Language.get(text.language).getScript());
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Text getForm(LexicalEntry lexicalEntry, Script script) {
        if (lexicalEntry.getCanonicalForm() != null) {
            if (lexicalEntry.getCanonicalForm().getWrittenRep() != null) {
                Text writtenRep = lexicalEntry.getCanonicalForm().getWrittenRep();
                if (checkScript(writtenRep, script)) {
                    return writtenRep;
                }
                return null;
            }
            Iterator it = lexicalEntry.getCanonicalForm().getRepresentations().values().iterator();
            while (it.hasNext()) {
                for (Text text : (Collection) it.next()) {
                    if (checkScript(text, script)) {
                        return text;
                    }
                }
            }
            return null;
        }
        for (LexicalForm lexicalForm : lexicalEntry.getForms()) {
            if (lexicalForm.getWrittenRep() != null) {
                Text writtenRep2 = lexicalForm.getWrittenRep();
                if (checkScript(writtenRep2, script)) {
                    return writtenRep2;
                }
            } else {
                Iterator it2 = lexicalForm.getRepresentations().values().iterator();
                while (it2.hasNext()) {
                    for (Text text2 : (Collection) it2.next()) {
                        if (checkScript(text2, script)) {
                            return text2;
                        }
                    }
                }
            }
        }
        return null;
    }

    @Override // eu.monnetproject.lemon.generator.GeneratorActor
    public void perform(LexicalEntry lexicalEntry, GenerationState generationState) {
        Script[] knownScriptsForLanguage;
        LemonModel model = generationState.getModel();
        Language language = generationState.getLanguage();
        LemonFactory factory = model.getFactory();
        if (!lexicalEntry.getDecompositions().isEmpty()) {
            this.log.info("Already tokenized skipping");
            generationState.report(new ActorGenerationReportImpl("Tokenizer", ActorGenerationReport.Status.UNNECESSARY, "Already tokenized"));
            return;
        }
        Script script = language.getScript();
        if (script == null && (knownScriptsForLanguage = Script.getKnownScriptsForLanguage(language)) != null) {
            script = knownScriptsForLanguage[0];
        }
        if (script == null) {
            this.log.warning("Could not deduce script for language " + language + " defaulting to Latin");
            generationState.report(new ActorGenerationReportImpl("Tokenizer", ActorGenerationReport.Status.FAILED, "Script defaulted to Latin"));
            script = Script.LATIN;
        }
        Tokenizer tokenizer = this.tokenizerFactory.getTokenizer(script);
        if (tokenizer == null) {
            this.log.warning("No support for script " + script);
            generationState.report(new ActorGenerationReportImpl("Tokenizer", ActorGenerationReport.Status.FAILED, "No support for script " + script));
            return;
        }
        Text form = getForm(lexicalEntry, script);
        if (form == null) {
            generationState.report(new ActorGenerationReportImpl("Tokenizer", ActorGenerationReport.Status.NO_INFO, "Could not get form"));
            return;
        }
        List<Token> list = tokenizer.tokenize(form.value);
        if (list.isEmpty()) {
            generationState.report(new ActorGenerationReportImpl("Tokenizer", ActorGenerationReport.Status.EXCEPTION, "Tokenizer returned empty list"));
            return;
        }
        if (list.size() <= 1) {
            generationState.report(new ActorGenerationReportImpl("Tokenizer", ActorGenerationReport.Status.UNNECESSARY, "Single word"));
            return;
        }
        LinkedList linkedList = new LinkedList();
        for (Token token : list) {
            if (!token.getValue().matches("\\W*") && !token.getValue().matches("_+")) {
                this.log.info("Tokenizer Actor: Adding " + token.getValue());
                Component makeComponent = factory.makeComponent(generationState.namer().name(generationState.getLexiconName(), generationState.getEntryName(), "comp"));
                linkedList.add(makeComponent);
                makeComponent.setElement(generationState.addLexicalEntry(token.getValue(), language));
            }
        }
        lexicalEntry.addDecomposition(linkedList);
        generationState.report(new ActorGenerationReportImpl("Tokenizer", ActorGenerationReport.Status.OK, "Adding decomposition"));
        if (lexicalEntry.getCanonicalForm() == null) {
            LexicalForm lexicalForm = (LexicalForm) lexicalEntry.getForms().iterator().next();
            lexicalEntry.removeForm(lexicalForm);
            lexicalEntry.setCanonicalForm(lexicalForm);
        }
    }

    public LemonModel getAuxiliaryLexicon() {
        return null;
    }
}
