package dev.langchain4j.model.embedding;

import ai.djl.modality.nlp.DefaultVocabulary;
import ai.djl.modality.nlp.bert.BertFullTokenizer;
import dev.langchain4j.agent.tool.ToolSpecification;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.model.Tokenizer;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:dev/langchain4j/model/embedding/BertTokenizer.class */
public class BertTokenizer implements Tokenizer {
    private final BertFullTokenizer tokenizer;

    public BertTokenizer() {
        try {
            this.tokenizer = new BertFullTokenizer(DefaultVocabulary.builder().addFromTextFile(getClass().getResource("/bert-vocabulary.txt")).build(), true);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public int estimateTokenCountInText(String str) {
        return this.tokenizer.tokenize(str).size();
    }

    public int estimateTokenCountInMessage(ChatMessage chatMessage) {
        return estimateTokenCountInText(chatMessage.text());
    }

    public int estimateTokenCountInMessages(Iterable<ChatMessage> iterable) {
        int i = 0;
        Iterator<ChatMessage> it = iterable.iterator();
        while (it.hasNext()) {
            i += estimateTokenCountInMessage(it.next());
        }
        return i;
    }

    public int estimateTokenCountInToolSpecifications(Iterable<ToolSpecification> iterable) {
        throw new RuntimeException("Not implemented yet");
    }

    public List<String> tokenize(String str) {
        return this.tokenizer.tokenize(str);
    }

    public long tokenId(String str) {
        return this.tokenizer.getVocabulary().getIndex(str);
    }
}
