/**
 * This is the template of a class that should define a Q-learning
 * agent for an MDP with parameters passed into the constructor.  You
 * will need to fill in three parts: <ol> <li> The constructor to
 * initialize any fields you need.  <li> The exploration function to
 * evaluate the trade-off between exploration and exploitation. <li>
 * The <tt>doStep</tt> function in which you can update any of the
 * Q-learning agent's internal structures based on the state and
 * reward, and return the next action to take.  </ol> You may wish to
 * add other fields with other useful information.
 */


import java.util.*;

public class QLearningAgent {
    
    /** the Q-value table. <tt>q[a][s]</tt> is the action value where
     * <tt>a</tt> is the action and <tt>s</tt> is the state.
     **/
    public double Q[][];
    
    /** the table of frequencies for state-action pairs.
     * <tt>N[s][a]</tt> is the frequency of state <tt>s</tt> and
     * action <tt>a</tt>.
     **/
    public int N[][];
    
    
    /**
     * The constructor for this class.  Initializes any internal
     * structures needed for an MDP problem having <tt>numStates</tt>
     * states and <tt>numActions</tt> actions.  The reward discount
     * factor of this system is given by <tt>discount</tt>.
     **/
    public QLearningAgent(int numStates, int numActions, double discount) {

        // your code here

    }


    /**
     * This function should return the utility of each state.
     **/
    public double[] getUtility() {

	// your code here

    }


    /**
     * This function should return the utility of each state.
     **/
    public int[] getPolicy() {

	// your code here

    }
    

    /**
     * The exploration function, as a function of utility <tt>u</tt>
     * and the number of times an action-state pair has been taken,
     * <tt>n</tt>.
     **/
    public double explorationFunction(double u, int n) {
        
        // your code here

    }

    
    /**
     * Do a single step of the Q-learning agent.  The inputs to the
     * agent are the current state <tt>state</tt> and the reward
     * signal <tt>reward</tt>.  This function should return the action
     * taken by the agent.
     **/
    public int doStep(int state, double reward) {
        
        // your code here

    }
};
