#ifndef _BICYCLE_H_
#define _BICYCLE_H_

#include <stdlib.h>
#include <math.h>
#include <random>

#include "Environment.hpp"

/*
The canonical bicycle domain introduced by:

Jette Randlov and Preben Alstrom.
Learning to drive a bicycle using reinforcement learning and shaping.
ICML 1998

It was also used in the popular JMLR paper on LSPI:

Least-Squares Policy Iteration
Michail G. Lagoudakis, Ronald Parr
Journal of Machine Learning Research 4(Dec):1107-1149, 2003.

This implementation is an amalgam of the two. The goal is to drive the bicycle 1km north to a goal that has a 10 meter radius.
The bicycle begins starting west.
*/

class Bicycle : public Environment
{
public:
	Bicycle(std::mt19937_64 & generator);
	~Bicycle() override;
	int getNumActions() const override;                      	// Get the number of allowed actions. The actions are integers from 0 to numActions-1.
    int getStateDim() const override;                        	// Get the number of state variables
    Eigen::VectorXd getState() const override;               	// Get the current state as a vector of *normalized* variables. That is, the state variables should all be in the range [0,1]
    void setState(const Eigen::VectorXd & state,
				  std::mt19937_64 & generator) override;		// Set the state.
    void newEpisode(std::mt19937_64 & generator) override;      // Reset the environment for a new episode
    bool terminate() const override;                         	// Is the agent in a terminal state currently? I.e., would the next state alway the absorbing zero-reward state?
    double update(int action,
				  std::mt19937_64 & generator) override;		// Apply the specified action and return the resulting reward    double getInitialValue() const;                 // How should the value function be initialized? To zero?
    double getInitialValue() const override;                 	// How should the value function be initialized? To zero?
    double getGamma() const override;                        	// Get the reward discount parameter
    int getNumMCSamplesForPolicyEvaluation() const override; 	// Get the number of episodes that should be sampled when evaluating a policy by Monte Carlo returns. If the environment is deterministic, it should be 1.
    int getMaxTForPolicyEvaluation() const override;         	// How long should episodes run, maximum, when doing Monte Carlo performance estimation?
    int getNumSamplesPerState() const override;              	// How many samples should be generated from each state in value iteration? This is to sample stochastic state transitions. It should be one if state transitions are deterministic
    double getPlottableStatistic() const override;           	// For some environments we don't want to plot return (it's hard to visualize). At the end of a trajectory, before newEpisode is called, this function can be called to get a statistic for how good that episode was.
    std::string getPlottableStatisticName() const override;		// A string to say what the plottable statistic encodes.

private:
    std::pair<Eigen::VectorXd,Eigen::VectorXd> getStateRange() const; // Get the min and max possible state vectors for normalization

	// State
	double theta;		// Angle of the front tire and handlebar. Zero = straight in line with the bike.
	double theta_dot;	// Angular velocity of the front tire---the time derivative of theta.
	double theta_d_dot;	// Second time derivative of theta --- the angular acceleration of the front tire.
	double omega;		// Sideways tilt of the bicycle. If this is too far from vertical (which is omega = 0), then the bike fell over
	double omega_dot;	// First time-derivative of omega - the angular velocity of the tilt --- how fast is the bike falling sideways?
	double omega_d_dot;	// Second time derivative of omega - the angular acceleration of the tilt
	double psi;			// Angle from front tire to the goal. I don't think this is defined in the paper.

	// Some internal variables that are stored (e.g. current position of the bicycle)
	double lastdtg; 	// Last distance to the goal - only compute it once and keep it around. Part of the state
	double xb;			// Back tire position on x-axis
	double yb;			// Back time position on y-axis
	double xf;			// Front tire position on x-axis. If plotting agent position, use this
	double yf;			// Front tire position on y-axis. If plotting agent position, use this
	double psi_goal;	// The angle to the goal from the front tire. This is the output of calc_angle_to_goal

	// Parameters defined by Jette Randlov and Preben Alstrom.
	double dt;			// Time step
	double v;			// Velocity of the bicycle 10 km / hour
	double g;			// Acceleration due to gravity, 9.8 m/s
	double dCM;			// Vertical distance between bike and rider centers of mass
	double c;			// Horizontal distance between where front wheel touches the ground and the center of mass of the bike and rider
	double h;			// Height of center of mass of bike and rider
	double Mc;			// Mass of bicycle
	double Md;			// Mass of tire
	double Mp;			// Mass of cyclist
	double M;			// Mass of bicycle, but not cyclist---Mc + Md
	double R;			// Radius of a tire. Lowercase in the paper.
	double sigma_dot;	// Angular velocity of a tire
	double I_bike;		// Moment of inertia of the bike
	double I_dc;		// One component of moment of inertia of a tire
	double I_dv;		// One component of moment of inertia of a tire
	double I_dl;		// One component of moment of inertia of a tire
	double l;			// Distance between front and back tires at the points where they touch the ground
	double x_goal;		// Position of goal on x-axis
	double y_goal;		// Position of goal on y-axis
	double radius_goal; // How close the front tire must be to the goal for the episode to end
	double default_angle;	// Initial direct of the bicycle (west)
	double rCM;			// Defined in Eq 6 of appendix of Randlov & Alstrom.
	double rf;			// Radius of front tire
	double rb;			// Radius of back tire
	double dtg;			// distance to goal
	double maxNoise;	// The biggest noise that can be added
	double phi;         // Bike's angle to the y-axis. This is for the center of mass, not the physical bike (the bike's angle is omega)

	bool shouldTerminate;

	double calc_dist_to_goal(double xf, double xb, double yf, double yb) const;		// Distance of the front tire to the goal
	double calc_angle_to_goal(double xf, double xb, double yf, double yb) const;	// Angle from the front tire to the goal
	double sign(double x) const;		// +1, 0, or -1 depending on sign of x
	double angleWrap(double x) const;	// To 0-2*pi
	double angleWrapPi(double x) const;	// To -pi -- pi
};

#endif
