package org.deeplearning4j.spark.text;

import java.util.List;
import org.apache.spark.api.java.function.Function;
import org.deeplearning4j.berkeley.Pair;
import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;

/* loaded from: input_file:org/deeplearning4j/spark/text/TokenizerFunction.class */
public class TokenizerFunction implements Function<String, Pair<List<String>, Long>> {
    private String tokenizerFactoryClazz;
    private transient TokenizerFactory tokenizerFactory;

    public TokenizerFunction(String str) {
        this.tokenizerFactoryClazz = str;
    }

    public TokenizerFunction() {
        this(DefaultTokenizerFactory.class.getName());
    }

    public Pair<List<String>, Long> call(String str) throws Exception {
        if (this.tokenizerFactory == null) {
            this.tokenizerFactory = getTokenizerFactory();
        }
        return new Pair<>(this.tokenizerFactory.create(str).getTokens(), Long.valueOf(r0.size()));
    }

    private TokenizerFactory getTokenizerFactory() {
        try {
            this.tokenizerFactory = (TokenizerFactory) Class.forName(this.tokenizerFactoryClazz).newInstance();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return this.tokenizerFactory;
    }
}
