package de.julielab.jcore.ae.lingscope;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.types.Lemma;
import de.julielab.jcore.types.LikelihoodIndicator;
import de.julielab.jcore.types.POSTag;
import de.julielab.jcore.types.Scope;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import lingscope.algorithms.Annotator;
import lingscope.drivers.CueAndPosFilesMerger;
import lingscope.drivers.SentenceTagger;
import lingscope.structures.AnnotatedSentence;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name = "JCoRe Lingscope AE", description = "This component uses the Lingscope negation/hedge detection algorithm and models to annotate negation/hedge cues and the scope to which the cues apply.")
@TypeCapability(inputs = {"de.julielab.jcore.types.Token", "de.julielab.jcore.types.PennBioIEPOSTag"}, outputs = {"de.julielab.jcore.types.LikelihoodIndicator", "de.julielab.jcore.types.Scope"})
/* loaded from: input_file:de/julielab/jcore/ae/lingscope/LingscopePosAnnotator.class */
public class LingscopePosAnnotator extends JCasAnnotator_ImplBase {
    public static final String PARAM_CUE_MODEL = "CueModel";
    public static final String PARAM_SCOPE_MODEL = "ScopeModel";
    public static final String PARAM_LIKELIHOOD_DICT_PATH = "LikelihoodDict";
    public static final String PARAM_IS_NEGATION_ANNOTATOR = "IsNegationAnnotator";
    private static final Logger log = LoggerFactory.getLogger(LingscopePosAnnotator.class);
    private Annotator cueAnnotator;
    private Annotator scopeAnnotator;
    private Map<String, String> likelihoodDict = new HashMap();

    @ConfigurationParameter(name = PARAM_CUE_MODEL, description = "The model that is used to recognize the negation or hedge cue words in text. There are different models for negation and hedge detection in Lingscope, indicated by the directory names 'negation_models' and 'hedge_models' in the respective downloads from the Lingscope SourceForge page. The cue detection models are always those where the string 'cue' follows the 'baseline' or 'crf' string in the filename. Thus, all 'baseline_cue_*' and 'crf_cue_*' files are cue identification models. The 'crf_scope_cue_*' models, in contrast, are scope detection models that replace the cue words by the string CUE.")
    private String cueModelLocation;

    @ConfigurationParameter(name = PARAM_SCOPE_MODEL, description = "The model that is used to detect the scope of a previously found negation or hedge cue word. There are different models for negation and hedge detection in Lingscope, indicated by the directory names 'negation_models' and 'hedge_models' in the respective downloads from the Lingscope SourceForge page. The cue detection models are always those where the string 'cue' follows the 'baseline' or 'crf' string in the filename. Thus, all 'baseline_cue_*' and 'crf_cue_*' files are cue identification models. The 'crf_scope_cue_*' models, in contrast, are scope detection models that replace the cue words by the string CUE.")
    private String scopeModelLocation;

    @ConfigurationParameter(name = PARAM_LIKELIHOOD_DICT_PATH, mandatory = false, description = "String parameter indicating path to likelihood dictionary (One entry per line; Entries consist of tab-separated lemmatized likelihood indicators and assigned likelihood category). The dictionary passed here is only used to assign likelihood scores (low, medium, high) to negation and hedge cues. It is not used to detect the cues in the first place.")
    private String likelihoodDictFile;

    @ConfigurationParameter(name = PARAM_IS_NEGATION_ANNOTATOR, mandatory = false, defaultValue = {"false"}, description = "If set to true, the recognized cue words will all be assigned the 'negation' likelihood, even if the model used is a hedge model.")
    private boolean isNegationAnnotator;
    private boolean replaceCue;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        this.cueModelLocation = (String) uimaContext.getConfigParameterValue(PARAM_CUE_MODEL);
        this.scopeModelLocation = (String) uimaContext.getConfigParameterValue(PARAM_SCOPE_MODEL);
        Optional.ofNullable((String) uimaContext.getConfigParameterValue(PARAM_LIKELIHOOD_DICT_PATH)).ifPresent(str -> {
            LikelihoodUtils.loadLikelihoodDict(str, this.likelihoodDict);
        });
        Optional.ofNullable((Boolean) uimaContext.getConfigParameterValue(PARAM_IS_NEGATION_ANNOTATOR)).ifPresent(bool -> {
            this.isNegationAnnotator = bool.booleanValue();
        });
        File file = new File(this.cueModelLocation);
        String str2 = file.getName().startsWith("baseline") ? "baseline" : file.getName().startsWith("crf") ? "crf" : "negex";
        log.info("Inferred the cue detection type '{}' from the cue model file '{}'", str2, this.cueModelLocation);
        this.replaceCue = !this.scopeModelLocation.contains("words");
        log.info("Inferred the strategy as to whether to replace found cue words with the CUE string or not from the scope model file '{}' to: Replace: {}", this.scopeModelLocation, Boolean.valueOf(this.replaceCue));
        try {
            this.cueAnnotator = SentenceTagger.getAnnotator(str2, "cue");
            this.cueAnnotator.loadAnnotator(FileUtilities.findResource(this.cueModelLocation));
            this.scopeAnnotator = SentenceTagger.getAnnotator("crf", "scope");
            this.scopeAnnotator.loadAnnotator(FileUtilities.findResource(this.scopeModelLocation));
        } catch (IOException e) {
            log.error("Could not initialize Lingscope annotators", e);
            throw new ResourceInitializationException(e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        AnnotationIndex annotationIndex = jCas.getAnnotationIndex(Token.type);
        FSIterator it = jCas.getAnnotationIndex(Sentence.type).iterator();
        while (it.hasNext()) {
            Annotation annotation = (Annotation) it.next();
            FSIterator subiterator = annotationIndex.subiterator(annotation);
            StringBuilder sb = new StringBuilder();
            ArrayList arrayList = new ArrayList();
            while (subiterator.hasNext()) {
                Token token = (Token) subiterator.next();
                POSTag posTag = token.getPosTag(0);
                if (posTag == null) {
                    throw new AnalysisEngineProcessException(new IllegalArgumentException("PoS tags are required but the current token has none."));
                }
                sb.append(posTag.getValue()).append(" ");
                arrayList.add(token);
            }
            if (sb.length() > 0) {
                sb.deleteCharAt(sb.length() - 1);
                String sb2 = sb.toString();
                AnnotatedSentence annotatedSentence = null;
                AnnotatedSentence annotatedSentence2 = null;
                AnnotatedSentence annotatedSentence3 = null;
                try {
                    annotatedSentence = this.cueAnnotator.annotateSentence(((String) arrayList.stream().map((v0) -> {
                        return v0.getCoveredText();
                    }).collect(Collectors.joining(" "))).replace("|", "/"), true);
                    annotatedSentence2 = CueAndPosFilesMerger.merge(annotatedSentence, sb2, this.replaceCue);
                    annotatedSentence3 = this.scopeAnnotator.annotateSentence(annotatedSentence2.getSentenceText(), true);
                    List addAnnotationToCas = addAnnotationToCas(arrayList, annotatedSentence, () -> {
                        return new LikelihoodIndicator(jCas);
                    });
                    List addAnnotationToCas2 = addAnnotationToCas(arrayList, annotatedSentence3, () -> {
                        return new Scope(jCas);
                    });
                    if (addAnnotationToCas.size() == addAnnotationToCas2.size()) {
                        for (int i = 0; i < addAnnotationToCas2.size(); i++) {
                            ((Scope) addAnnotationToCas2.get(i)).setCue((LikelihoodIndicator) addAnnotationToCas.get(i));
                        }
                    } else {
                        log.debug("Not assigning negation or hedge cues to their scopes because the number of cues and scopes differs.");
                        log.trace("The respective sentence is: '{}'. Cue tags: '{}', Scope tags: '{}'", new Object[]{annotation.getCoveredText(), annotatedSentence.getTags(), annotatedSentence3.getTags()});
                    }
                } catch (Throwable th) {
                    log.error("Lingscope error in sentence '{}'", annotation.getCoveredText(), th);
                    log.error("PosCueMerged Sent Text: {}", annotatedSentence2 != null ? annotatedSentence2.getSentenceText() : "<null>");
                    log.error("Tokens: {}", arrayList.stream().map((v0) -> {
                        return v0.getCoveredText();
                    }).collect(Collectors.joining(" ")));
                    log.error("Lemmas: {}", arrayList.stream().map((v0) -> {
                        return v0.getLemma();
                    }).map((v0) -> {
                        return v0.getValue();
                    }).collect(Collectors.joining(" ")));
                    log.error("PoS: {}", sb2);
                    log.error("Cue tags: {}", annotatedSentence != null ? annotatedSentence.getTags() : "<null>");
                    log.error("POS Cue merged: {}", annotatedSentence2 != null ? annotatedSentence2.getTags() : "<null>");
                    log.error("Scope tags: {}", annotatedSentence3 != null ? annotatedSentence3.getTags() : "<null>");
                    log.error("StackTrace:", th);
                    throw th;
                }
            }
        }
    }

    private <T extends Annotation> List<T> addAnnotationToCas(List<Token> list, AnnotatedSentence annotatedSentence, Supplier<T> supplier) throws AnalysisEngineProcessException {
        ArrayList arrayList = new ArrayList();
        T t = null;
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            Token token = list.get(i);
            String str = annotatedSentence.getTags().get(i);
            if (str.startsWith("B")) {
                if (t != null) {
                    endAnnotation(list, arrayList, t, arrayList2, i);
                }
                t = supplier.get();
                t.setBegin(token.getBegin());
                arrayList2.add(token);
            }
            if (str.startsWith("I")) {
                arrayList2.add(token);
            }
            if (str.equals(SentenceTagger.OTHER)) {
                if (t != null) {
                    endAnnotation(list, arrayList, t, arrayList2, i);
                }
                t = null;
            }
        }
        return arrayList;
    }

    private <T extends Annotation> void endAnnotation(List<Token> list, List<T> list2, T t, List<Token> list3, int i) throws AnalysisEngineProcessException {
        t.setEnd(list.get(i - 1).getEnd());
        if (t instanceof LikelihoodIndicator) {
            if (!this.likelihoodDict.isEmpty() && !this.isNegationAnnotator) {
                StringBuilder sb = new StringBuilder();
                Iterator<Token> it = list3.iterator();
                while (it.hasNext()) {
                    Lemma lemma = it.next().getLemma();
                    if (lemma == null) {
                        throw new AnalysisEngineProcessException(new IllegalArgumentException("Lemmas are required when a likelihood dictionary is passed but the current token has none."));
                    }
                    sb.append(lemma.getValue().toLowerCase()).append(" ");
                }
                list3.clear();
                sb.deleteCharAt(sb.length() - 1);
                String str = this.likelihoodDict.get(sb.toString());
                if (str != null) {
                    ((LikelihoodIndicator) t).setLikelihood(str);
                }
            } else if (this.isNegationAnnotator) {
                ((LikelihoodIndicator) t).setLikelihood("negation");
            }
        }
        t.addToIndexes();
        list2.add(t);
    }
}
