package com.github.steveash.jg2p.stress;

import cc.mallet.fst.CRF;
import cc.mallet.fst.CRFTrainerByThreadedLabelLikelihood;
import cc.mallet.fst.TransducerTrainer;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.Target2Label;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.TokenSequenceLowercase;
import cc.mallet.types.Alphabet;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import com.github.steveash.jg2p.align.Alignment;
import com.github.steveash.jg2p.seq.LeadingTrailingFeature;
import com.github.steveash.jg2p.seq.NeighborShapeFeature;
import com.github.steveash.jg2p.seq.NeighborTokenFeature;
import com.github.steveash.jg2p.seq.SurroundingTokenFeature;
import com.github.steveash.jg2p.seq.TokenSequenceToFeature;
import com.github.steveash.jg2p.seq.TokenWindow;
import com.github.steveash.jg2p.syll.SWord;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableList;
import java.util.List;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/github/steveash/jg2p/stress/StressTrainer.class */
public class StressTrainer {
    private static final Logger log = LoggerFactory.getLogger(StressTrainer.class);
    private static final String NONE = "NONE";
    private static final String LATE = "LATE";
    private static final int LATE_INDEX = 4;

    public StressModel train(List<Alignment> list) {
        log.info("About to train the stress predictor...");
        InstanceList makeExamplesFromAligns = makeExamplesFromAligns(list);
        Pipe pipe = makeExamplesFromAligns.getPipe();
        log.info("Training test-time syll chain tagger on whole data...");
        return new StressModel(trainOnce(pipe, makeExamplesFromAligns).getTransducer());
    }

    private TransducerTrainer trainOnce(Pipe pipe, InstanceList instanceList) {
        Stopwatch createStarted = Stopwatch.createStarted();
        CRF crf = new CRF(pipe, (Pipe) null);
        crf.addOrderNStates(instanceList, new int[]{1}, (boolean[]) null, (String) null, (Pattern) null, (Pattern) null, false);
        crf.addStartState();
        log.info("Starting syllchain training...");
        CRFTrainerByThreadedLabelLikelihood cRFTrainerByThreadedLabelLikelihood = new CRFTrainerByThreadedLabelLikelihood(crf, 8);
        cRFTrainerByThreadedLabelLikelihood.setGaussianPriorVariance(2.0d);
        cRFTrainerByThreadedLabelLikelihood.train(instanceList);
        cRFTrainerByThreadedLabelLikelihood.shutdown();
        createStarted.stop();
        log.info("SyllChain CRF Training took " + createStarted.toString());
        crf.getInputAlphabet().stopGrowth();
        crf.getOutputAlphabet().stopGrowth();
        return cRFTrainerByThreadedLabelLikelihood;
    }

    private InstanceList makeExamplesFromAligns(List<Alignment> list) {
        int i = 0;
        InstanceList instanceList = new InstanceList(makePipe());
        for (Alignment alignment : list) {
            instanceList.addThruPipe(new Instance(alignment, convertStressIndexToLabel((SWord) Preconditions.checkNotNull(alignment.getSyllWord())), (Object) null, (Object) null));
            i++;
        }
        log.info("Read {} instances of training data for stress training ", Integer.valueOf(i));
        return instanceList;
    }

    private static String convertStressIndexToLabel(SWord sWord) {
        int firstSyllableWithStress = sWord.firstSyllableWithStress();
        return firstSyllableWithStress < 0 ? NONE : firstSyllableWithStress >= LATE_INDEX ? LATE : Integer.toString(firstSyllableWithStress);
    }

    private Pipe makePipe() {
        Alphabet alphabet = new Alphabet();
        Target2Label target2Label = new Target2Label();
        return new SerialPipes(ImmutableList.of(new AlignToStressPipe(alphabet, target2Label.getTargetAlphabet(), ImmutableList.of()), new TokenSequenceLowercase(), new NeighborTokenFeature(true, makeNeighbors()), new SurroundingTokenFeature(false), new SurroundingTokenFeature(true), new NeighborShapeFeature(true, makeShapeNeighs()), new LeadingTrailingFeature(), new TokenSequenceToFeature(), new TokenSequence2FeatureVectorSequence(alphabet, true, false), target2Label));
    }

    private static List<TokenWindow> makeShapeNeighs() {
        return ImmutableList.of(new TokenWindow(-4, LATE_INDEX), new TokenWindow(-3, 3), new TokenWindow(-2, 2), new TokenWindow(-1, 1), new TokenWindow(1, 1), new TokenWindow(1, 2), new TokenWindow(1, 3), new TokenWindow(1, LATE_INDEX));
    }

    private List<TokenWindow> makeNeighbors() {
        return ImmutableList.of(new TokenWindow(1, 1), new TokenWindow(1, 2), new TokenWindow(2, 1), new TokenWindow(1, 3), new TokenWindow(LATE_INDEX, 1), new TokenWindow(-1, 1), new TokenWindow(-2, 2), new TokenWindow(-3, 3), new TokenWindow(-4, 1));
    }
}
