#include "MountainCar.hpp"

using namespace Eigen;
using namespace std;

MountainCar::MountainCar(mt19937_64 & generator) {
    // Set the constants that specify the domain boundaries. These are hard-coded because they are standard, and should not be changed.
	minX = -1.2;
	maxX = 0.5;
	minXDot = -0.07;
	maxXDot = 0.07;

	newEpisode(generator);		            // Place the agent
}

MountainCar::~MountainCar() { }

int MountainCar::getNumActions() const {
    return 3;                       // {reverse, neutral, forwards}
}

int MountainCar::getStateDim() const {
    return 2;                       // Position and velocity
}

VectorXd MountainCar::getState() const {
    VectorXd result(2);
    result[0] = (x - minX)/ (maxX - minX);              // Normalize the position, store in result
    result[1] = (xDot - minXDot) / (maxXDot - minXDot); // Normalize the velocity, store in result
    return result;
}

void MountainCar::setState(const VectorXd & state, mt19937_64 & generator) {
    // Undo the normalization and set the state
    x = state[0] * (maxX - minX) + minX;
    xDot = state[1] * (maxXDot - minXDot) + minXDot;
}

void MountainCar::newEpisode(mt19937_64 & generator) {
    x = -0.5;
    xDot = 0;
	t = 0;
}

bool MountainCar::terminate() const {
    return (x >= maxX);
}

double MountainCar::update(int action, mt19937_64 & generator) {
    double a = (double)action - 1.0;                    // Convert the action to a double. Left = -1, neutral = 0, right = 1
	xDot = xDot + 0.001*a - 0.0025*cos(3.0*x);          // Update xDot first
	xDot = min(maxXDot, max(minXDot, xDot));            // Enforce the bounds on the allowed velocities
	x = x + xDot;                                       // Update x
	x = min(maxX, x);                                   // Enforce upper bound
	if (x < minX) {                                     // Implement the inelastic wall at minX
		x = minX;
		xDot = 0;
	}
	t++;
	return -1;                                          // A reward of -1 at every time step.
}

double MountainCar::getInitialValue() const {
    return 0;                                       // For Mountain Car, this is an optimistic value.
}

double MountainCar::getGamma() const {
    return 1.0;
}

int MountainCar::getNumMCSamplesForPolicyEvaluation() const {
    return 1; // Policy is greedy and deterministic and environment is deterministic
}

int MountainCar::getMaxTForPolicyEvaluation() const {
    return 1000;	// Really it can take tens of thousands of time steps... But all policies beyond 1000 aren't great. So, for our purposes this works.
}

int MountainCar::getNumSamplesPerState() const {
    return 1;
}

double MountainCar::getPlottableStatistic() const {
	return t;
}

string MountainCar::getPlottableStatisticName() const {
	return "Timesteps in episode";
}
