#ifndef _PENDULUM_HPP_
#define _PENDULUM_HPP_

#define _USE_MATH_DEFINES	// For M_PI in math.h

#include <math.h>			// For M_PI, and trig functions
#include <stdlib.h>			// For rand()

#include "Environment.hpp"

/*
The Pendulum class implements the pendulum from Kenji Doya's paper. However, we implement
it as in his source code, where theta=0 means the pendulum is hanging down, not vertical!

Also, we remove the over-rotation special case (of termination and reward). In some old tests,
it never overrotates, and his source code did not have a test implemented either.

We also change actions to be discrete. This code has been used for many things now, and there
may now be other differences from Kenji's original.
*/
class Pendulum : public Environment
{
public:
	Pendulum(std::mt19937_64 & generator);						// Create the pendulum
	~Pendulum() override;						            	// Clean up memory (we don't 'new' any actually...)
	int getNumActions() const override;                      	// Get the number of allowed actions. The actions are integers from 0 to numActions-1.
    int getStateDim() const override;                        	// Get the number of state variables
    Eigen::VectorXd getState() const override;               	// Get the current state as a vector of *normalized* variables. That is, the state variables should all be in the range [0,1]
    void setState(const Eigen::VectorXd & state,
				  std::mt19937_64 & generator) override;		// Set the state.
    void newEpisode(std::mt19937_64 & generator) override;      // Reset the environment for a new episode
    bool terminate() const override;                         	// Is the agent in a terminal state currently? I.e., would the next state alway the absorbing zero-reward state?
    double update(int action,
				  std::mt19937_64 & generator) override;		// Apply the specified action and return the resulting reward    double getInitialValue() const;                 // How should the value function be initialized? To zero?
    double getInitialValue() const override;                 	// How should the value function be initialized? To zero?
    double getGamma() const override;                        	// Get the reward discount parameter
    int getNumMCSamplesForPolicyEvaluation() const override; 	// Get the number of episodes that should be sampled when evaluating a policy by Monte Carlo returns. If the environment is deterministic, it should be 1.
    int getMaxTForPolicyEvaluation() const override;         	// How long should episodes run, maximum, when doing Monte Carlo performance estimation?
    int getNumSamplesPerState() const override;              	// How many samples should be generated from each state in value iteration? This is to sample stochastic state transitions. It should be one if state transitions are deterministic
    double getPlottableStatistic() const override;           	// For some environments we don't want to plot return (it's hard to visualize). At the end of a trajectory, before newEpisode is called, this function can be called to get a statistic for how good that episode was.
    std::string getPlottableStatisticName() const override;		// A string to say what the plottable statistic encodes.

private:
    static std::pair<Eigen::VectorXd,Eigen::VectorXd> getStateRange();

	double theta;	// Angle of pendulum (0 = down, pi = up)
	double omega;	// Time derivative of theta
	double dt;		// Time step size
	double uMax;	// Maximum torque in either direction
	int simSteps;	// Dynamics simulated at dt/simSteps
	double m;		// Mass
	double l;		// Length
	double g;		// Gravity
	double mu;		// Friction constant
	double c;		// Weighting of reward function bang-bang-ness: control cost, 'cc' in Kenji's code

	double timeUp;

	static double angleWrap(double x);				// Forces 'x' to be between -pi and pi
	static double angleWrap2Pi(double x);
};

#endif
