#ifndef _ENVIRONMENT_HPP_
#define _ENVIRONMENT_HPP_

/*
This is the base class for all environments that we implement. This file specifies
the interface of how to interact with any of the environments.
*/

#include <Eigen/Dense>      // For Eigen::VectorXd
#include <random>

enum ENVTYPE { et_MountainCar, et_PuddleWorld, et_Acrobot, et_CartPole, et_Pendulum, et_Bicycle };

class Environment {
public:
    virtual ~Environment() {};

    virtual int getNumActions() const = 0;                      // Get the number of allowed actions. The actions are integers from 0 to numActions-1.
    virtual int getStateDim() const = 0;                        // Get the number of state variables
    virtual Eigen::VectorXd getState() const = 0;               // Get the current state as a vector of *normalized* variables. That is, the state variables should all be in the range [0,1]
    virtual void setState(const Eigen::VectorXd & state,
                          std::mt19937_64 & generator) = 0;     // Set the state.
    virtual void newEpisode(std::mt19937_64 & generator) = 0;   // Reset the environment for a new episode
    virtual bool terminate() const = 0;                         // Is the agent in a terminal state currently? I.e., would the next state alway the absorbing zero-reward state?
    virtual double update(int action,
                          std::mt19937_64 & generator) = 0;     // Apply the specified action and return the resulting reward
    virtual double getInitialValue() const = 0;                 // How should the value function be initialized? To zero?
    virtual double getGamma() const = 0;                        // Get the reward discount parameter
    virtual int getNumMCSamplesForPolicyEvaluation() const = 0; // Get the number of episodes that should be sampled when evaluating a policy by Monte Carlo returns. If the environment is deterministic, it should be 1.
    virtual int getMaxTForPolicyEvaluation() const = 0;         // How long should episodes run, maximum, when doing Monte Carlo performance estimation?
    virtual int getNumSamplesPerState() const = 0;              // How many samples should be generated from each state in value iteration? This is to sample stochastic state transitions. It should be one if state transitions are deterministic
    virtual double getPlottableStatistic() const = 0;           // For some environments we don't want to plot return (it's hard to visualize). At the end of a trajectory, before newEpisode is called, this function can be called to get a statistic for how good that episode was.
    virtual std::string getPlottableStatisticName() const = 0;  // A string to say what the plottable statistic encodes.
};

Environment * createEnvironment(const ENVTYPE & type, std::mt19937_64 & generator);

#endif // _ENVIRONMENT_HPP_
