package de.julielab.jcore.reader.bc2gm;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import de.julielab.jcore.types.Gene;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.Sentence;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.util.Iterator;
import java.util.TreeMap;
import org.apache.commons.io.IOUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name = "JCoRe BioCreative II Gene Mention reader", description = "This component reads gene annotated sentences in the BioCreative II Gene Mention challenge format. Each CAS will contain one annotated sentence.")
@TypeCapability(outputs = {"de.julielab.jcore.types.Gene"})
/* loaded from: input_file:de/julielab/jcore/reader/bc2gm/BC2GMReader.class */
public class BC2GMReader extends JCasCollectionReader_ImplBase {
    private static final Logger log = LoggerFactory.getLogger(BC2GMReader.class);
    public static final String PARAM_SENTENCES = "SentencesFile";
    public static final String PARAM_GENES = "GenesFile";

    @ConfigurationParameter(name = PARAM_SENTENCES, description = "The BC2GM data is comprised of one file holding one sentence per line and another file holding the annotations. This parameter should be set to the file containing the sentences.")
    private String sentenceFile;

    @ConfigurationParameter(name = PARAM_GENES, mandatory = false, description = "The BC2GM data is comprised of one file holding one sentence per line and another file holding the annotations. This parameter should be set to the file holding the gene annotations.")
    private String genesFile;
    private Multimap<String, GeneAnnotation> geneAnnotations;
    private Iterator<String> sentencesIterator;

    /* loaded from: input_file:de/julielab/jcore/reader/bc2gm/BC2GMReader$GeneAnnotation.class */
    public class GeneAnnotation {
        public String sentenceId;
        public int start;
        public int end;
        public String text;

        public GeneAnnotation() {
        }
    }

    public static TreeMap<Integer, Integer> createNumWsMap(String str) {
        boolean z;
        TreeMap<Integer, Integer> treeMap = new TreeMap<>();
        treeMap.put(0, 0);
        int i = 0;
        boolean z2 = false;
        for (int i2 = 0; i2 < str.length(); i2++) {
            if (z2) {
                treeMap.put(Integer.valueOf(i2), Integer.valueOf(i));
            }
            if (Character.isWhitespace(str.charAt(i2))) {
                i++;
                z = true;
            } else {
                z = false;
            }
            z2 = z;
        }
        return treeMap;
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.sentenceFile = (String) uimaContext.getConfigParameterValue(PARAM_SENTENCES);
        this.genesFile = (String) uimaContext.getConfigParameterValue(PARAM_GENES);
        if (null == this.sentenceFile) {
            throw new ResourceInitializationException(new IllegalArgumentException("Sentences file parameter is null."));
        }
        log.info("Reading sentences from {}", this.sentenceFile);
        if (null != this.genesFile) {
            try {
                log.info("Reading gene annotations from {}", this.genesFile);
                this.geneAnnotations = readGeneAnnotations(this.genesFile);
            } catch (IOException e) {
                throw new ResourceInitializationException(e);
            }
        } else {
            log.info("No gene annotation file specified.");
        }
        try {
            this.sentencesIterator = Files.readAllLines(new File(this.sentenceFile).toPath(), Charset.forName("UTF-8")).iterator();
        } catch (IOException e2) {
            throw new ResourceInitializationException(e2);
        }
    }

    private Multimap<String, GeneAnnotation> readGeneAnnotations(String str) throws FileNotFoundException, IOException {
        HashMultimap create = HashMultimap.create();
        FileInputStream fileInputStream = new FileInputStream(str);
        try {
            Iterator it = IOUtils.readLines(fileInputStream).iterator();
            while (it.hasNext()) {
                GeneAnnotation geneAnnotation = new GeneAnnotation();
                String[] split = ((String) it.next()).split("\\|");
                String[] split2 = split[1].split(" ");
                geneAnnotation.sentenceId = split[0];
                geneAnnotation.start = Integer.parseInt(split2[0].trim());
                geneAnnotation.end = Integer.parseInt(split2[1].trim());
                geneAnnotation.text = split[2];
                create.put(geneAnnotation.sentenceId, geneAnnotation);
            }
            fileInputStream.close();
            if (log.isInfoEnabled()) {
                log.info("Got {} gene annotations in {} sentences", Integer.valueOf(create.size()), Integer.valueOf(create.keySet().size()));
            }
            return create;
        } catch (Throwable th) {
            try {
                fileInputStream.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    public void getNext(JCas jCas) throws IOException, CollectionException {
        int indexOf;
        Integer value;
        String[] split = this.sentencesIterator.next().split(" ", 2);
        String str = split[0];
        String str2 = split[1];
        jCas.setDocumentText(str2);
        Header header = new Header(jCas);
        header.setDocId(str);
        header.addToIndexes();
        new Sentence(jCas, 0, str2.length()).addToIndexes();
        TreeMap<Integer, Integer> createNumWsMap = createNumWsMap(str2);
        if (this.geneAnnotations != null) {
            for (GeneAnnotation geneAnnotation : this.geneAnnotations.get(str)) {
                int i = geneAnnotation.start;
                String str3 = geneAnnotation.text;
                int i2 = 0;
                boolean z = false;
                while (true) {
                    indexOf = str2.indexOf(str3, i2);
                    if (indexOf == -1) {
                        break;
                    }
                    value = createNumWsMap.floorEntry(Integer.valueOf(indexOf)).getValue();
                    if (i + value.intValue() == indexOf || i + value.intValue() == indexOf + 1 || i + value.intValue() == indexOf - 1) {
                        break;
                    } else {
                        i2 = indexOf + str3.length();
                    }
                }
                new Gene(jCas, i + value.intValue(), geneAnnotation.end + createNumWsMap.floorEntry(Integer.valueOf(indexOf + str3.length())).getValue().intValue() + 1).addToIndexes();
                z = true;
                if (!z) {
                    throw new IllegalStateException("The document-relative, whitespace-including position of the gene \"" + str3 + "\" with BC2GM offsets " + geneAnnotation.start + "-" + geneAnnotation.end + " in sentence " + str + " could not be found.");
                }
            }
        }
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        return null;
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.sentencesIterator.hasNext();
    }
}
