package org.codelibs.elasticsearch.extension.kuromoji.index.analysis;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ja.JapaneseTokenizer;
import org.apache.lucene.analysis.ja.dict.UserDictionary;
import org.apache.lucene.analysis.ja.util.CSVUtil;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
import org.elasticsearch.index.analysis.Analysis;

/* loaded from: input_file:org/codelibs/elasticsearch/extension/kuromoji/index/analysis/KuromojiTokenizerFactory.class */
public class KuromojiTokenizerFactory extends AbstractTokenizerFactory {
    private static final String USER_DICT_PATH_OPTION = "user_dictionary";
    private static final String USER_DICT_RULES_OPTION = "user_dictionary_rules";
    private static final String NBEST_COST = "nbest_cost";
    private static final String NBEST_EXAMPLES = "nbest_examples";
    private final UserDictionary userDictionary;
    private final JapaneseTokenizer.Mode mode;
    private final String nBestExamples;
    private final int nBestCost;
    private boolean discartPunctuation;

    public KuromojiTokenizerFactory(IndexSettings indexSettings, Environment environment, String str, Settings settings) {
        super(indexSettings, settings, str);
        this.mode = getMode(settings);
        this.userDictionary = getUserDictionary(environment, settings);
        this.discartPunctuation = settings.getAsBoolean("discard_punctuation", true).booleanValue();
        this.nBestCost = settings.getAsInt(NBEST_COST, -1).intValue();
        this.nBestExamples = settings.get(NBEST_EXAMPLES);
    }

    public static UserDictionary getUserDictionary(Environment environment, Settings settings) {
        if (settings.get(USER_DICT_PATH_OPTION) != null && settings.get(USER_DICT_RULES_OPTION) != null) {
            throw new IllegalArgumentException("It is not allowed to use [user_dictionary] in conjunction with [user_dictionary_rules]");
        }
        try {
            List<String> wordList = Analysis.getWordList(environment, settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION, false);
            if (wordList == null || wordList.isEmpty()) {
                return null;
            }
            HashSet hashSet = new HashSet();
            int i = 0;
            for (String str : wordList) {
                if (!str.startsWith("#")) {
                    String[] parse = CSVUtil.parse(str);
                    if (!hashSet.add(parse[0])) {
                        throw new IllegalArgumentException("Found duplicate term [" + parse[0] + "] in user dictionary at line [" + i + "]");
                    }
                }
                i++;
            }
            StringBuilder sb = new StringBuilder();
            Iterator it = wordList.iterator();
            while (it.hasNext()) {
                sb.append((String) it.next()).append(System.lineSeparator());
            }
            return UserDictionary.open(new StringReader(sb.toString()));
        } catch (IOException e) {
            throw new ElasticsearchException("failed to load kuromoji user dictionary", e, new Object[0]);
        }
    }

    public static JapaneseTokenizer.Mode getMode(Settings settings) {
        JapaneseTokenizer.Mode mode = JapaneseTokenizer.DEFAULT_MODE;
        String str = settings.get("mode", (String) null);
        if (str != null) {
            if ("search".equalsIgnoreCase(str)) {
                mode = JapaneseTokenizer.Mode.SEARCH;
            } else if ("normal".equalsIgnoreCase(str)) {
                mode = JapaneseTokenizer.Mode.NORMAL;
            } else if ("extended".equalsIgnoreCase(str)) {
                mode = JapaneseTokenizer.Mode.EXTENDED;
            }
        }
        return mode;
    }

    public Tokenizer create() {
        JapaneseTokenizer japaneseTokenizer = new JapaneseTokenizer(this.userDictionary, this.discartPunctuation, this.mode);
        int i = this.nBestCost;
        if (this.nBestExamples != null) {
            i = Math.max(i, japaneseTokenizer.calcNBestCost(this.nBestExamples));
        }
        japaneseTokenizer.setNBestCost(i);
        return japaneseTokenizer;
    }
}
