package org.apache.lucene.classification;

import java.io.Closeable;
import java.io.IOException;
import java.util.LinkedList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;

/* loaded from: input_file:org/apache/lucene/classification/SimpleNaiveBayesClassifier.class */
public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
    private AtomicReader atomicReader;
    private String[] textFieldNames;
    private String classFieldName;
    private int docsWithClassSize;
    private Analyzer analyzer;
    private IndexSearcher indexSearcher;
    private Query query;

    @Override // org.apache.lucene.classification.Classifier
    public void train(AtomicReader atomicReader, String str, String str2, Analyzer analyzer) throws IOException {
        train(atomicReader, str, str2, analyzer, (Query) null);
    }

    @Override // org.apache.lucene.classification.Classifier
    public void train(AtomicReader atomicReader, String str, String str2, Analyzer analyzer, Query query) throws IOException {
        train(atomicReader, new String[]{str}, str2, analyzer, query);
    }

    @Override // org.apache.lucene.classification.Classifier
    public void train(AtomicReader atomicReader, String[] strArr, String str, Analyzer analyzer, Query query) throws IOException {
        this.atomicReader = atomicReader;
        this.indexSearcher = new IndexSearcher(this.atomicReader);
        this.textFieldNames = strArr;
        this.classFieldName = str;
        this.analyzer = analyzer;
        this.docsWithClassSize = countDocsWithClass();
        this.query = query;
    }

    private int countDocsWithClass() throws IOException {
        int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
        if (docCount == -1) {
            TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
            BooleanQuery booleanQuery = new BooleanQuery();
            booleanQuery.add(new BooleanClause(new WildcardQuery(new Term(this.classFieldName, String.valueOf('*'))), BooleanClause.Occur.MUST));
            if (this.query != null) {
                booleanQuery.add(this.query, BooleanClause.Occur.MUST);
            }
            this.indexSearcher.search(booleanQuery, totalHitCountCollector);
            docCount = totalHitCountCollector.getTotalHits();
        }
        return docCount;
    }

    private String[] tokenizeDoc(String str) throws IOException {
        LinkedList linkedList = new LinkedList();
        for (String str2 : this.textFieldNames) {
            Closeable closeable = this.analyzer.tokenStream(str2, str);
            try {
                CharTermAttribute addAttribute = closeable.addAttribute(CharTermAttribute.class);
                closeable.reset();
                while (closeable.incrementToken()) {
                    linkedList.add(addAttribute.toString());
                }
                closeable.end();
                IOUtils.closeWhileHandlingException(new Closeable[]{closeable});
            } catch (Throwable th) {
                IOUtils.closeWhileHandlingException(new Closeable[]{closeable});
                throw th;
            }
        }
        return (String[]) linkedList.toArray(new String[linkedList.size()]);
    }

    @Override // org.apache.lucene.classification.Classifier
    public ClassificationResult<BytesRef> assignClass(String str) throws IOException {
        if (this.atomicReader == null) {
            throw new IOException("You must first call Classifier#train");
        }
        double d = -1.7976931348623157E308d;
        BytesRef bytesRef = new BytesRef();
        TermsEnum it = MultiFields.getTerms(this.atomicReader, this.classFieldName).iterator((TermsEnum) null);
        String[] strArr = tokenizeDoc(str);
        while (true) {
            BytesRef next = it.next();
            if (next == null) {
                return new ClassificationResult<>(bytesRef, 10.0d / Math.abs(d));
            }
            double calculateLogPrior = calculateLogPrior(next) + calculateLogLikelihood(strArr, next);
            if (calculateLogPrior > d) {
                d = calculateLogPrior;
                bytesRef = BytesRef.deepCopyOf(next);
            }
        }
    }

    private double calculateLogLikelihood(String[] strArr, BytesRef bytesRef) throws IOException {
        double d = 0.0d;
        for (String str : strArr) {
            d += Math.log((getWordFreqForClass(str, bytesRef) + 1) / (getTextTermFreqForClass(bytesRef) + this.docsWithClassSize));
        }
        return d;
    }

    private double getTextTermFreqForClass(BytesRef bytesRef) throws IOException {
        double d = 0.0d;
        for (String str : this.textFieldNames) {
            Terms terms = MultiFields.getTerms(this.atomicReader, str);
            d += terms.getSumDocFreq() / terms.getDocCount();
        }
        return d * this.atomicReader.docFreq(new Term(this.classFieldName, bytesRef));
    }

    private int getWordFreqForClass(String str, BytesRef bytesRef) throws IOException {
        BooleanQuery booleanQuery = new BooleanQuery();
        BooleanQuery booleanQuery2 = new BooleanQuery();
        for (String str2 : this.textFieldNames) {
            booleanQuery2.add(new BooleanClause(new TermQuery(new Term(str2, str)), BooleanClause.Occur.SHOULD));
        }
        booleanQuery.add(new BooleanClause(booleanQuery2, BooleanClause.Occur.MUST));
        booleanQuery.add(new BooleanClause(new TermQuery(new Term(this.classFieldName, bytesRef)), BooleanClause.Occur.MUST));
        if (this.query != null) {
            booleanQuery.add(this.query, BooleanClause.Occur.MUST);
        }
        TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
        this.indexSearcher.search(booleanQuery, totalHitCountCollector);
        return totalHitCountCollector.getTotalHits();
    }

    private double calculateLogPrior(BytesRef bytesRef) throws IOException {
        return Math.log(docCount(bytesRef)) - Math.log(this.docsWithClassSize);
    }

    private int docCount(BytesRef bytesRef) throws IOException {
        return this.atomicReader.docFreq(new Term(this.classFieldName, bytesRef));
    }
}
