#ifndef _ACROBOT_H_
#define _ACROBOT_H_

#include "Environment.hpp"

// Based on http://www.cse.iitm.ac.in/~cs670/book/node110.html
// Updated URL: https://webdocs.cs.ualberta.ca/~sutton/book/ebook/node110.html
// Leaving old outdated link in case there are discrepencies.

class Acrobot : public Environment
{
public:
	Acrobot(std::mt19937_64 & generator);
	~Acrobot() override;
	int getNumActions() const override;                      	// Get the number of allowed actions. The actions are integers from 0 to numActions-1.
    int getStateDim() const override;                        	// Get the number of state variables
    Eigen::VectorXd getState() const override;               	// Get the current state as a vector of *normalized* variables. That is, the state variables should all be in the range [0,1]
    void setState(const Eigen::VectorXd & state,
				  std::mt19937_64 & generator) override;		// Set the state.
    void newEpisode(std::mt19937_64 & generator) override;      // Reset the environment for a new episode
    bool terminate() const override;                         	// Is the agent in a terminal state currently? I.e., would the next state alway the absorbing zero-reward state?
    double update(int action,
				  std::mt19937_64 & generator) override;		// Apply the specified action and return the resulting reward    double getInitialValue() const;                 // How should the value function be initialized? To zero?
    double getInitialValue() const override;                 	// How should the value function be initialized? To zero?
    double getGamma() const override;                        	// Get the reward discount parameter
    int getNumMCSamplesForPolicyEvaluation() const override; 	// Get the number of episodes that should be sampled when evaluating a policy by Monte Carlo returns. If the environment is deterministic, it should be 1.
    int getMaxTForPolicyEvaluation() const override;         	// How long should episodes run, maximum, when doing Monte Carlo performance estimation?
    int getNumSamplesPerState() const override;              	// How many samples should be generated from each state in value iteration? This is to sample stochastic state transitions. It should be one if state transitions are deterministic
    double getPlottableStatistic() const override;           	// For some environments we don't want to plot return (it's hard to visualize). At the end of a trajectory, before newEpisode is called, this function can be called to get a statistic for how good that episode was.
    std::string getPlottableStatisticName() const override;		// A string to say what the plottable statistic encodes.

private:
    double m1;	// Mass of first link
	double m2;	// Mass of second link
	double l1;	// Length of first link
	double l2;	// Length of second link
	double lc1;	// Length to center of mass of first link
	double lc2;	// Length to center of mass of second link
	double i1;	// Moment of inertia of first link
	double i2;	// Moment of inertia of second link
	double g;	// Gravity (negative?)
	double fmax;// Maximum torque caused by action. Not included in source document, but added so domain can be tweaked
	double dt;	// Time step in seconds
	double numEulerSteps;	// Number of forward Euler steps for approximating dynamics

	double theta1;			// First link angle
	double theta2;			// Second link angle
	double theta1Dot;		// First link angular velocity
	double theta2Dot;		// Second link angular velocity

	double t;				// Time that has passed (used as the plottable statistic)

	double mod2pi(double x);// Used to keep angles in [-pi,pi]

	// Get the min and max possible state vectors for normalization during getState(...)
	static std::pair<Eigen::VectorXd,Eigen::VectorXd> getStateRange();
};

#endif
