package opennlp.tools.formats.ad;

import com.github.jaiimageio.plugins.tiff.EXIFGPSTagSet;
import com.sun.mail.imap.IMAPStore;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import org.apache.ivy.core.IvyPatternHelper;
import org.apache.lucene.analysis.pattern.PatternTokenizerFactory;
import org.apache.xmpbox.type.VersionType;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.index.query.CommonTermsQueryBuilder;
import org.gagravarr.vorbis.VorbisStyleComments;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;

/* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.9.4.jar:opennlp/tools/formats/ad/ADNameSampleStream.class */
public class ADNameSampleStream implements ObjectStream<NameSample> {
    private static final Pattern tagPattern = Pattern.compile("<(NER:)?(.*?)>");
    private static final Pattern whitespacePattern = Pattern.compile("\\s+");
    private static final Pattern underlinePattern = Pattern.compile("[_]+");
    private static final Pattern hyphenPattern = Pattern.compile("((\\p{L}+)-$)|(^-(\\p{L}+)(.*))|((\\p{L}+)-(\\p{L}+)(.*))");
    private static final Pattern alphanumericPattern = Pattern.compile("^[\\p{L}\\p{Nd}]+$");
    private static final Map<String, String> HAREM;
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private final boolean splitHyphenatedTokens;
    private Pattern metaPattern;
    private String leftContractionPart = null;
    private int textID = -1;
    private Type corpusType = null;
    private int textIdMeta2 = -1;
    private String textMeta2 = "";

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.9.4.jar:opennlp/tools/formats/ad/ADNameSampleStream$Type.class */
    public enum Type {
        ama,
        cie,
        lit
    }

    public ADNameSampleStream(ObjectStream<String> objectStream, boolean z) {
        this.adSentenceStream = new ADSentenceStream(objectStream);
        this.splitHyphenatedTokens = z;
    }

    @Deprecated
    public ADNameSampleStream(InputStreamFactory inputStreamFactory, String str, boolean z) throws IOException {
        try {
            this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(inputStreamFactory, str));
            this.splitHyphenatedTokens = z;
        } catch (UnsupportedEncodingException e) {
            throw new IllegalStateException(e);
        }
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // opennlp.tools.util.ObjectStream
    public NameSample read() throws IOException {
        ADSentenceStream.Sentence read = this.adSentenceStream.read();
        if (read == null) {
            return null;
        }
        int textID = getTextID(read);
        boolean z = false;
        if (textID != this.textID) {
            z = true;
            this.textID = textID;
        }
        ADSentenceStream.SentenceParser.Node root = read.getRoot();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        process(root, arrayList, arrayList2);
        return new NameSample((String[]) arrayList.toArray(new String[arrayList.size()]), (Span[]) arrayList2.toArray(new Span[arrayList2.size()]), z);
    }

    private void process(ADSentenceStream.SentenceParser.Node node, List<String> list, List<Span> list2) {
        if (node != null) {
            for (ADSentenceStream.SentenceParser.TreeElement treeElement : node.getElements()) {
                if (treeElement.isLeaf()) {
                    processLeaf((ADSentenceStream.SentenceParser.Leaf) treeElement, list, list2);
                } else {
                    process((ADSentenceStream.SentenceParser.Node) treeElement, list, list2);
                }
            }
        }
    }

    private void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, List<String> list, List<Span> list2) {
        boolean z = false;
        if (this.leftContractionPart != null) {
            String contraction = PortugueseContractionUtility.toContraction(this.leftContractionPart, leaf.getLexeme());
            if (contraction != null) {
                list.addAll(Arrays.asList(whitespacePattern.split(contraction)));
                z = true;
            } else {
                list.add(this.leftContractionPart);
            }
            this.leftContractionPart = null;
        }
        String str = null;
        int i = -1;
        String secondaryTag = leaf.getSecondaryTag();
        boolean z2 = false;
        if (secondaryTag != null) {
            if (secondaryTag.contains("<sam->") && !z) {
                String[] split = underlinePattern.split(leaf.getLexeme());
                if (split.length > 1) {
                    list.addAll(Arrays.asList(split).subList(0, split.length - 1));
                }
                this.leftContractionPart = split[split.length - 1];
                return;
            }
            if (secondaryTag.contains("<NER2>")) {
                z2 = true;
            }
            str = getNER(secondaryTag);
        }
        if (str != null) {
            i = list.size();
        }
        if (!z) {
            list.addAll(processLexeme(leaf.getLexeme()));
        }
        if (str != null) {
            list2.add(new Span(i, list.size(), str));
        }
        if (z2) {
            int size = list2.size() - 1;
            if (list2.size() > 0) {
                Span span = list2.get(size);
                if (span.getEnd() == list.size() - 1) {
                    list2.set(size, new Span(span.getStart(), list.size(), span.getType()));
                }
            }
        }
    }

    private List<String> processLexeme(String str) {
        ArrayList arrayList = new ArrayList();
        for (String str2 : underlinePattern.split(str)) {
            if (str2.length() <= 1 || alphanumericPattern.matcher(str2).matches()) {
                arrayList.add(str2);
            } else {
                arrayList.addAll(processTok(str2));
            }
        }
        return arrayList;
    }

    private List<String> processTok(String str) {
        boolean z = false;
        List<String> arrayList = new ArrayList<>();
        LinkedList linkedList = new LinkedList();
        char charAt = str.charAt(0);
        if (charAt == 171) {
            arrayList.add(Character.toString(charAt));
            str = str.substring(1);
        }
        char charAt2 = str.charAt(str.length() - 1);
        if (charAt2 == 187 || charAt2 == ':' || charAt2 == ',' || charAt2 == '!') {
            linkedList.add(Character.toString(charAt2));
            str = str.substring(0, str.length() - 1);
        }
        if (this.splitHyphenatedTokens && str.contains("-") && str.length() > 1) {
            Matcher matcher = hyphenPattern.matcher(str);
            String str2 = null;
            String str3 = null;
            String str4 = null;
            if (matcher.matches()) {
                if (matcher.group(1) != null) {
                    str2 = matcher.group(2);
                } else if (matcher.group(3) != null) {
                    str3 = matcher.group(4);
                    str4 = matcher.group(5);
                } else if (matcher.group(6) != null) {
                    str2 = matcher.group(7);
                    str3 = matcher.group(8);
                    str4 = matcher.group(9);
                }
                addIfNotEmpty(str2, arrayList);
                addIfNotEmpty("-", arrayList);
                addIfNotEmpty(str3, arrayList);
                addIfNotEmpty(str4, arrayList);
                z = true;
            }
        }
        if (!z) {
            if (str.equals(str) || str.length() <= 1 || alphanumericPattern.matcher(str).matches()) {
                arrayList.add(str);
            } else {
                arrayList.addAll(processTok(str));
            }
        }
        arrayList.addAll(linkedList);
        return arrayList;
    }

    private void addIfNotEmpty(String str, List<String> list) {
        if (str == null || str.length() <= 0) {
            return;
        }
        list.addAll(processTok(str));
    }

    private static String getNER(String str) {
        if (str.contains("<NER2>")) {
            return null;
        }
        for (String str2 : str.split("\\s+")) {
            Matcher matcher = tagPattern.matcher(str2);
            if (matcher.matches()) {
                String group = matcher.group(2);
                if (HAREM.containsKey(group)) {
                    return HAREM.get(group);
                }
            }
        }
        return null;
    }

    @Override // opennlp.tools.util.ObjectStream
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    @Override // opennlp.tools.util.ObjectStream, java.lang.AutoCloseable
    public void close() throws IOException {
        this.adSentenceStream.close();
    }

    private int getTextID(ADSentenceStream.Sentence sentence) {
        String metadata = sentence.getMetadata();
        if (this.corpusType == null) {
            if (metadata.startsWith("LIT")) {
                this.corpusType = Type.lit;
                this.metaPattern = Pattern.compile("^([a-zA-Z\\-]+)(\\d+).*?p=(\\d+).*");
            } else if (metadata.startsWith("CIE")) {
                this.corpusType = Type.cie;
                this.metaPattern = Pattern.compile("^.*?source=\"(.*?)\".*");
            } else {
                this.corpusType = Type.ama;
                this.metaPattern = Pattern.compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*");
            }
        }
        if (this.corpusType.equals(Type.lit)) {
            Matcher matcher = this.metaPattern.matcher(metadata);
            if (!matcher.matches()) {
                throw new RuntimeException("Invalid metadata: " + metadata);
            }
            String group = matcher.group(1);
            if (!group.equals(this.textMeta2)) {
                this.textIdMeta2++;
                this.textMeta2 = group;
            }
            return this.textIdMeta2;
        }
        if (!this.corpusType.equals(Type.cie)) {
            if (!this.corpusType.equals(Type.ama)) {
                return 0;
            }
            Matcher matcher2 = this.metaPattern.matcher(metadata);
            if (matcher2.matches()) {
                return Integer.parseInt(matcher2.group(1));
            }
            throw new RuntimeException("Invalid metadata: " + metadata);
        }
        Matcher matcher3 = this.metaPattern.matcher(metadata);
        if (!matcher3.matches()) {
            throw new RuntimeException("Invalid metadata: " + metadata);
        }
        String group2 = matcher3.group(1);
        if (!group2.equals(this.textMeta2)) {
            this.textIdMeta2++;
            this.textMeta2 = group2;
        }
        return this.textIdMeta2;
    }

    static {
        HashMap hashMap = new HashMap();
        hashMap.put("hum", "person");
        hashMap.put("official", "person");
        hashMap.put("member", "person");
        hashMap.put("admin", IvyPatternHelper.ORGANISATION_KEY2);
        hashMap.put("org", IvyPatternHelper.ORGANISATION_KEY2);
        hashMap.put("inst", IvyPatternHelper.ORGANISATION_KEY2);
        hashMap.put("media", IvyPatternHelper.ORGANISATION_KEY2);
        hashMap.put("party", IvyPatternHelper.ORGANISATION_KEY2);
        hashMap.put("suborg", IvyPatternHelper.ORGANISATION_KEY2);
        hashMap.put("groupind", PatternTokenizerFactory.GROUP);
        hashMap.put("groupofficial", PatternTokenizerFactory.GROUP);
        hashMap.put("top", "place");
        hashMap.put("civ", "place");
        hashMap.put(IMAPStore.ID_ADDRESS, "place");
        hashMap.put("site", "place");
        hashMap.put("virtual", "place");
        hashMap.put("astro", "place");
        hashMap.put("occ", VersionType.EVENT);
        hashMap.put(VersionType.EVENT, VersionType.EVENT);
        hashMap.put("history", VersionType.EVENT);
        hashMap.put("tit", "artprod");
        hashMap.put("pub", "artprod");
        hashMap.put("product", "artprod");
        hashMap.put(EXIFGPSTagSet.STATUS_MEASUREMENT_INTEROPERABILITY, "artprod");
        hashMap.put("artwork", "artprod");
        hashMap.put("brand", BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE);
        hashMap.put(VorbisStyleComments.KEY_GENRE, BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE);
        hashMap.put("school", BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE);
        hashMap.put("idea", BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE);
        hashMap.put("plan", BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE);
        hashMap.put("author", BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE);
        hashMap.put("absname", BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE);
        hashMap.put("disease", BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE);
        hashMap.put(ObjectMapper.CONTENT_TYPE, "thing");
        hashMap.put(CommonTermsQueryBuilder.NAME, "thing");
        hashMap.put("mat", "thing");
        hashMap.put("class", "thing");
        hashMap.put("plant", "thing");
        hashMap.put("currency", "thing");
        hashMap.put("date", "time");
        hashMap.put("hour", "time");
        hashMap.put("period", "time");
        hashMap.put("cyclic", "time");
        hashMap.put("quantity", "numeric");
        hashMap.put("prednum", "numeric");
        hashMap.put("currency", "numeric");
        HAREM = Collections.unmodifiableMap(hashMap);
    }
}
