#ifndef _CARTPOLE_H_
#define _CARTPOLE_H_

#include "Environment.hpp"

/*
This is cart pole balancing, using discrete actions.
*/

#define _USE_MATH_DEFINES	// For M_PI in math.h
#include <math.h>
#include <random>

#include "Environment.hpp"

class CartPole : public Environment
{
public:
    CartPole(std::mt19937_64 & generator);						// Create the object
	~CartPole() override;										// Clean up memory (we don't 'new' any actually...)
	int getNumActions() const override;                      	// Get the number of allowed actions. The actions are integers from 0 to numActions-1.
    int getStateDim() const override;                        	// Get the number of state variables
    Eigen::VectorXd getState() const override;               	// Get the current state as a vector of *normalized* variables. That is, the state variables should all be in the range [0,1]
    void setState(const Eigen::VectorXd & state,
				  std::mt19937_64 & generator) override;		// Set the state.
    void newEpisode(std::mt19937_64 & generator) override;      // Reset the environment for a new episode
    bool terminate() const override;                         	// Is the agent in a terminal state currently? I.e., would the next state alway the absorbing zero-reward state?
    double update(int action,
				  std::mt19937_64 & generator) override;		// Apply the specified action and return the resulting reward    double getInitialValue() const;                 // How should the value function be initialized? To zero?
    double getInitialValue() const override;                 	// How should the value function be initialized? To zero?
    double getGamma() const override;                        	// Get the reward discount parameter
    int getNumMCSamplesForPolicyEvaluation() const override; 	// Get the number of episodes that should be sampled when evaluating a policy by Monte Carlo returns. If the environment is deterministic, it should be 1.
    int getMaxTForPolicyEvaluation() const override;         	// How long should episodes run, maximum, when doing Monte Carlo performance estimation?
    int getNumSamplesPerState() const override;              	// How many samples should be generated from each state in value iteration? This is to sample stochastic state transitions. It should be one if state transitions are deterministic
    double getPlottableStatistic() const override;           	// For some environments we don't want to plot return (it's hard to visualize). At the end of a trajectory, before newEpisode is called, this function can be called to get a statistic for how good that episode was.
    std::string getPlottableStatisticName() const override;		// A string to say what the plottable statistic encodes.

private:
    double x;       // Cart position
	double v;       // Cart velocity
	double theta;   // Pole angle
	double omega;   // Pole angular velocity

	double dt;      // Time step duration
	int simSteps;   // Number of forward Euler steps to perform per dt
	double uMax;    // Max force on cart
	double l;       // Pole length
	double g;       // Gravity
	double m;       // Mass of pole
	double mc;      // Mass of cart
	double muc;     // Coefficient of friction, cart to ground
	double mup;     // Coefficient of friction, cart to pole
	double xMax;    // Maximum horizontal position of cart

	double t;       // Time step

	static std::pair<Eigen::VectorXd,Eigen::VectorXd> getStateRange();	// Returns a pair contining the min and maximum continuous state variable values. This is for normalizing the states when using function approximation
	double WrapPosNegPI(const double & theta);
	double sign(const double & x);
};

#endif

