#include "Bicycle.hpp"
#include <vector>
#include <cstdlib>

using namespace std;
using namespace Eigen;

Bicycle::Bicycle(mt19937_64 & generator) {
    // Set arguments
	dt = 0.01;
	v = (10.0/3.6);	// 10 km/h in m/s
	g = 9.8;
	dCM = 0.3;
	c = 0.66;
	h = 0.94;
	Mc = 15.0;
	Md = 1.7;
	Mp = 60.0;
	M = (Mc + Mp);
	R = 0.34;
	sigma_dot = (((double)v) / R);
	I_bike = (13.0/3.0)*Mc*h*h + Mp*(h+dCM)*(h + dCM);

	I_dc = (Md * R * R);
	I_dv = (3.0/2.0) * Md * R*R;
	I_dl = (1.0/2.0) * Md * R*R;
	l = 1.11;
	maxNoise = 0.02; // 2 cm
	default_angle = M_PI;
	x_goal = 0;
	y_goal = 1000;
	radius_goal = 10;

	newEpisode(generator);
}

Bicycle::~Bicycle() {}

int Bicycle::getNumActions() const {
    return 9;
}

int Bicycle::getStateDim() const {
    return 6;
}

VectorXd Bicycle::getState() const {
    VectorXd result(getStateDim());
    result[0] = theta;
	result[1] = theta_dot;
	result[2] = omega;
	result[3] = omega_dot;
	result[4] = angleWrapPi(psi - psi_goal);
    result[5] = lastdtg;
    // Normalize to be in the range 0,1
	pair<VectorXd,VectorXd> stateRange = getStateRange();
	result = (result - stateRange.first).array() / (stateRange.second - stateRange.first).array();
	// Threshold to really stay in that range
	for (int i = 0; i < getStateDim(); i++)
        result[i] = min(1.0, max(0.0, result[i]));
	return result;
}

void Bicycle::setState(const VectorXd & state, mt19937_64 & generator) {
	pair<VectorXd,VectorXd> stateRange = getStateRange();
    VectorXd unnormalizedState = state.array() * (stateRange.second - stateRange.first).array() + stateRange.first.array();
    theta = unnormalizedState[0];
    theta_dot = unnormalizedState[1];
    omega = unnormalizedState[2];
    omega_dot = unnormalizedState[3];
    lastdtg = unnormalizedState[5];

	// We don't know psi_goal---the state provided to the agent isn't really Markovian. So, sample possible states that we could be in.
    uniform_real_distribution<double> distribution(-M_PI, M_PI);
    psi_goal = distribution(generator);
    psi = angleWrapPi(unnormalizedState[4] + psi_goal);

    // Based on psi_goal and and lastdtg, reconstruct position
    xf = x_goal - lastdtg * cos(psi_goal);
    yf = y_goal - lastdtg * sin(psi_goal);
    xb = xf - l * cos(psi);
    yb = yf - l * sin(psi);

    shouldTerminate = false;
}

void Bicycle::newEpisode(mt19937_64 & generator) {
    theta = theta_dot = theta_d_dot = 0.0;
	omega = omega_dot = omega_d_dot = 0.0;
	xb = 0.0;
	yb = 0.0;
	xf = l * cos(default_angle);
	yf = l * sin(default_angle);
    psi = atan2(yf - yb, xf - xb);
	psi_goal = calc_angle_to_goal(xf, xb, yf, yb);
    lastdtg = calc_dist_to_goal(xf, xb, yf, yb);
	shouldTerminate = false;
}

bool Bicycle::terminate() const {
    return shouldTerminate;
}

double Bicycle::update(int action, mt19937_64 & generator) {
	// Convert the action to a two continuous values, T and d.
	double discreteActions[][2] = {
        { 0, 0 },     { -2, 0 },     { 2, 0 },
        { 0, -0.02 }, { -2, -0.02 }, { 2, -0.02 },
        { 0, 0.02 },  { -2, 0.02 },  { 2, 0.02 } };
	double T, d;
	T = discreteActions[action][0];
	d = discreteActions[action][1];	// Displacement of rider

	uniform_real_distribution<double> distribution(-maxNoise, maxNoise);
	double noise = distribution(generator);	// Noise in displacement
	d = d + maxNoise * noise;
	if (theta == 0)
		rCM = rf = rb = 9999999; // Just a large number
	else {
		rCM = sqrt(pow(l-c,2) + l*l/(pow(tan(theta),2)));
		rf = l / fabs(sin(theta));
		rb = l / fabs(tan(theta));
	}
	phi = omega + atan(d/h);
	omega_d_dot = ( h*M*g*sin(phi)
					- cos(phi)*(I_dc*sigma_dot*theta_dot
					+ sign(theta)*v*v*(Md*R*(1.0/rf + 1.0/rb)
					+  M*h/rCM) )
				  ) / I_bike;
	theta_d_dot = (T - I_dv*omega_dot*sigma_dot) /  I_dl;
	omega_dot += omega_d_dot * dt;
	omega += omega_dot * dt;
	theta_dot += theta_d_dot * dt;
	theta += theta_dot * dt;
	if (fabs(theta) > 1.3963)	// handlebars cannot turn more than 80 degrees
		theta = sign(theta) * 1.3963;

	// Compute new position of front tire
	double temp = v*dt/(2*rf);
	double sgn = sign(angleWrapPi(psi + theta));
    if (temp > 1)
		temp = sgn * M_PI/2;
	else
		temp = sgn * asin(temp);
	xf += v * dt * cos(psi + theta + temp);
	yf += v * dt * sin(psi + theta + temp);

	// Compute new position of back tire
	temp = v*dt/(2*rb);
	if (temp > 1)
		temp = sign(psi) * M_PI/2;
	else
		temp = sign(psi) * asin(temp);
	xb += v * dt * (cos(psi + temp));
	yb += v * dt * (sin(psi + temp));

	// Round off errors accumulate so the length of the bike changes over many iterations. The following take care of that by correcting the back wheel:
	temp = sqrt((xf-xb)*(xf-xb)+(yf-yb)*(yf-yb));
	if (fabs(temp - l) > 0.001) {
		xb += (xb-xf)*(l-temp)/temp;
		yb += (yb-yf)*(l-temp)/temp;
	}

    psi = atan2(yf - yb, xf - xb);
	psi_goal = calc_angle_to_goal(xf, xb, yf, yb);
	dtg = calc_dist_to_goal(xf, xb, yf, yb);

	double reward;
	if (fabs(omega) > (M_PI/15)) {
		// The bike has fallen over - it is more than 12 degrees from vertical
		shouldTerminate = true;
		reward = ((-1.0 - M_PI*M_PI * 3.0 / 4.0) * 0.001) / (1.0 - 0.99);
    }
    else if (dtg <= radius_goal) {
    	// The bike reached the goal
        reward = 1.0;
        shouldTerminate = true;
    }
	else {
		// The bike is not at the goal, nor has it fallen. Give a shaped reward based on how far away from the goal the bike is *pointed* (actual distance doesn't matter)
		shouldTerminate = false; // Should already be set this way... may be slightly redundant, but that's ok
        double delta_angle = angleWrapPi(psi - psi_goal);
        reward = -0.001 + (M_PI*M_PI / 4 - delta_angle * delta_angle) * 0.001;
    }
	lastdtg = dtg;

	return reward;
}

double Bicycle::getInitialValue() const {
    return 0;
}

double Bicycle::getGamma() const {
    return 0.99;	// If you change this, you need to change the rewards. See appendix if Marc's AAAI paper.
}

int Bicycle::getNumMCSamplesForPolicyEvaluation() const {
    return 16;		// Once the bike stays up, these episodes take a long time. This is the number of cores on my machine.
}

int Bicycle::getMaxTForPolicyEvaluation() const {
    return 200000;
}

int Bicycle::getNumSamplesPerState() const {
    return 100;
}

double Bicycle::getPlottableStatistic() const {
	// Check if the bike fell over. If it did, then return 0, otherwise 1
	if (fabs(omega) > (M_PI/15))
		return 0;
	return 1;
}

string Bicycle::getPlottableStatisticName() const {
	return "Goal reached (0-1 Boolean)";
}

/////////////////////////////////
// Private functions:
/////////////////////////////////

pair<VectorXd,VectorXd> Bicycle::getStateRange() const {
    pair<VectorXd,VectorXd> result;
    result.first.resize(getStateDim());
    result.second.resize(getStateDim());

    result.first[0]  = -M_PI * 4/9;
    result.second[0] = M_PI * 4/9;	// theta
    result.first[1]  = -2;
    result.second[1] = 2;			// theta_dot
    result.first[2]  = -M_PI / 15;
    result.second[2] = M_PI / 15;	// omega
    result.first[3]  = -0.5;
    result.second[3] = 0.5;			// omega_dot
    result.first[4]  = -M_PI;
    result.second[4] = M_PI;		// psi - psi_goal
    result.first[5]  = radius_goal;
    result.second[5] = y_goal + 200.0;		// distance to goal
    return result;
}

double Bicycle::sign(double x) const {
  if (x == 0)
    return 0;
  else if (x > 0)
    return 1;
  else
    return -1;
}

double Bicycle::angleWrap(double x) const {
	while (x < 0)
		x += 2.0*M_PI;
	while (x > 2.0*M_PI)
		x -= 2.0*M_PI;
	return x;
}

double Bicycle::angleWrapPi(double x) const {
	while (x < -M_PI)
		x += 2.0*M_PI;
	while (x > M_PI)
		x -= 2.0*M_PI;
	return x;
}

double Bicycle::calc_dist_to_goal(double xf, double xb, double yf, double yb) const {
	return sqrt((x_goal-xf)*(x_goal-xf) + (y_goal-yf)*(y_goal-yf));
}

double Bicycle::calc_angle_to_goal(double xf, double xb, double yf, double yb) const {
    return atan2(y_goal - yf, x_goal - xf);
}

