package org.tweetyproject.machinelearning.rl.mdp.algorithms;

import java.util.Iterator;
import java.util.Map;
import org.tweetyproject.machinelearning.rl.mdp.Action;
import org.tweetyproject.machinelearning.rl.mdp.FixedPolicy;
import org.tweetyproject.machinelearning.rl.mdp.MarkovDecisionProcess;
import org.tweetyproject.machinelearning.rl.mdp.Policy;
import org.tweetyproject.machinelearning.rl.mdp.State;

/* JADX WARN: Classes with same name are omitted:
  input_file:org.tweetyproject.machinelearning-1.27.jar:org/tweetyproject/machinelearning/rl/mdp/algorithms/PolicyIteration.class
 */
/* loaded from: input_file:org.tweetyproject.machinelearning-1.26.jar:org/tweetyproject/machinelearning/rl/mdp/algorithms/PolicyIteration.class */
public class PolicyIteration<S extends State, A extends Action> extends OfflineAlgorithm<S, A> {
    private PolicyEvaluation<S, A> pe;

    public PolicyIteration(PolicyEvaluation<S, A> policyEvaluation) {
        this.pe = policyEvaluation;
    }

    @Override // org.tweetyproject.machinelearning.rl.mdp.algorithms.OfflineAlgorithm
    public Policy<S, A> getPolicy(MarkovDecisionProcess<S, A> markovDecisionProcess, double d) {
        Policy<S, A> fixedPolicy = new FixedPolicy();
        Iterator<S> it = markovDecisionProcess.getStates().iterator();
        while (it.hasNext()) {
            ((FixedPolicy) fixedPolicy).set(it.next(), markovDecisionProcess.getActions().iterator().next());
        }
        while (true) {
            System.out.println(fixedPolicy);
            Map<S, Double> utilities = this.pe.getUtilities(markovDecisionProcess, fixedPolicy, d);
            System.out.println(utilities);
            Policy<S, A> policy = getPolicy(utilities, markovDecisionProcess, d);
            if (policy.equals(fixedPolicy)) {
                return fixedPolicy;
            }
            fixedPolicy = policy;
        }
    }
}
