package org.canova.cli.vectorization;

import java.io.IOException;
import java.util.Collection;
import org.canova.api.writable.Writable;
import org.canova.cli.shuffle.Shuffler;
import org.canova.cli.transforms.text.nlp.TfidfTextVectorizerTransform;

/* loaded from: input_file:org/canova/cli/vectorization/TextVectorizationEngine.class */
public class TextVectorizationEngine extends VectorizationEngine {
    @Override // org.canova.cli.vectorization.VectorizationEngine
    public void execute() throws IOException {
        TfidfTextVectorizerTransform tfidfTextVectorizerTransform = new TfidfTextVectorizerTransform();
        this.conf.setInt(TfidfTextVectorizerTransform.MIN_WORD_FREQUENCY, 1);
        tfidfTextVectorizerTransform.initialize(this.conf);
        int i = 0;
        while (this.reader.hasNext()) {
            tfidfTextVectorizerTransform.collectStatistics(this.reader.next());
            i++;
        }
        if (this.printStats) {
            System.out.println("Total Records: " + i);
            System.out.println("Total Labels: " + tfidfTextVectorizerTransform.getNumberOfLabelsSeen());
            System.out.println("Vocabulary Size of Corpus: " + tfidfTextVectorizerTransform.getVocabularySize());
            tfidfTextVectorizerTransform.debugPrintVocabList();
        }
        this.reader.close();
        try {
            this.reader = this.inputFormat.createReader(this.split, this.conf);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        if (!this.shuffleOn) {
            while (this.reader.hasNext()) {
                Collection<Writable> next = this.reader.next();
                tfidfTextVectorizerTransform.transform(next);
                this.writer.write(next);
            }
            this.reader.close();
            this.writer.close();
            return;
        }
        Shuffler shuffler = new Shuffler();
        while (this.reader.hasNext()) {
            Collection<Writable> next2 = this.reader.next();
            tfidfTextVectorizerTransform.transform(next2);
            shuffler.addRecord(next2);
        }
        while (shuffler.hasNext()) {
            this.writer.write(shuffler.next());
        }
        this.reader.close();
        this.writer.close();
    }
}
