package org.tweetyproject.machinelearning.rl.mdp.algorithms;

import java.util.Map;
import org.tweetyproject.machinelearning.rl.mdp.Action;
import org.tweetyproject.machinelearning.rl.mdp.FixedPolicy;
import org.tweetyproject.machinelearning.rl.mdp.MarkovDecisionProcess;
import org.tweetyproject.machinelearning.rl.mdp.Policy;
import org.tweetyproject.machinelearning.rl.mdp.State;

/* JADX WARN: Classes with same name are omitted:
  input_file:org.tweetyproject.machinelearning-1.27.jar:org/tweetyproject/machinelearning/rl/mdp/algorithms/OfflineAlgorithm.class
 */
/* loaded from: input_file:org.tweetyproject.machinelearning-1.26.jar:org/tweetyproject/machinelearning/rl/mdp/algorithms/OfflineAlgorithm.class */
public abstract class OfflineAlgorithm<S extends State, A extends Action> {
    public abstract Policy<S, A> getPolicy(MarkovDecisionProcess<S, A> markovDecisionProcess, double d);

    public Policy<S, A> getPolicy(Map<S, Double> map, MarkovDecisionProcess<S, A> markovDecisionProcess, double d) {
        FixedPolicy fixedPolicy = new FixedPolicy();
        for (S s : markovDecisionProcess.getStates()) {
            if (!markovDecisionProcess.isTerminal(s)) {
                A a = null;
                double d2 = Double.NEGATIVE_INFINITY;
                for (A a2 : markovDecisionProcess.getActions()) {
                    double d3 = 0.0d;
                    for (S s2 : markovDecisionProcess.getStates()) {
                        d3 += markovDecisionProcess.getProb(s, a2, s2) * (markovDecisionProcess.getReward(s, a2, s2) + (d * map.get(s2).doubleValue()));
                    }
                    if (d3 > d2) {
                        d2 = d3;
                        a = a2;
                    }
                }
                fixedPolicy.set(s, a);
            }
        }
        return fixedPolicy;
    }
}
