#include "PuddleWorld.hpp"

using namespace Eigen;
using namespace std;

PuddleWorld::PuddleWorld(mt19937_64 & generator) {
    distribution = new normal_distribution<double>(0,0.01); // Normal distribution with standard deviation 0.01
    initialDistribution = new uniform_real_distribution<double>(0,1);   // Distribution used to place the agent initially
    pos.resize(2);
    newEpisode(generator);  // Place the agent
}

PuddleWorld::~PuddleWorld() {
    delete distribution;
    delete initialDistribution;
}

int PuddleWorld::getNumActions() const {
    return 4;   // 0 = up, 1 = down, 2 = right, 3 = left
}

int PuddleWorld::getStateDim() const {
    return 2;   // Cartesian coordinates, x,y
}

Eigen::VectorXd PuddleWorld::getState() const {
    return pos;
}

void PuddleWorld::setState(const Eigen::VectorXd & state, mt19937_64 & generator) {
    pos = state;
}

void PuddleWorld::newEpisode(mt19937_64 & generator) {
    pos[0] = 0.3;//(*initialDistribution)(generator);
    pos[1] = 0.6;//(*initialDistribution)(generator);
    rSum = 0;
}

bool PuddleWorld::terminate() const {
    return (pos.sum() >= 1.9);
}

double PuddleWorld::update(int action, mt19937_64 & generator) {
    // Apply the action
    if (action == 0) // up
        pos[1] += 0.05;
    else if (action == 1) // down
        pos[1] -= 0.05;
    else if (action == 2) // right
        pos[0] += 0.05;
    else
        pos[0] -= 0.05;
    // Add some noise
    pos[0] += (*distribution)(generator);
    pos[1] += (*distribution)(generator);
    // Force in bounds
    pos[0] = min(1.0,max(0.0, pos[0]));
    pos[1] = min(1.0,max(0.0, pos[1]));
    // Compute the reward
    double reward = -1; // Specified in "Kernel-Based Models for Reinformcement Learning" by Jong and Stone

    // Determine whether inside the puddles and by how much
    double puddleDist1 = distToSegment(pos[0], pos[1], 0.1, 0.75, 0.45, 0.75),
           puddleDist2 = distToSegment(pos[0], pos[1], 0.45, 0.4, 0.45, 0.8);
    if (puddleDist1 < 0.1)
        reward -= 400.0*(.1-puddleDist1);   // Inside the first puddle. Get the punishment
    if (puddleDist2 < 0.1)
        reward -= 400.0*(.1-puddleDist2);

    rSum += reward;

    // Return the reward
    return reward;
}

double PuddleWorld::getInitialValue() const {
    return 0;
}

double PuddleWorld::getGamma() const {
    return 1.0;
}

// Get the distance from the point (px,py) to the segment with endpoings (x1,y1) and (x2,y2).
// Based on the code here: http://stackoverflow.com/questions/849211/shortest-distance-between-a-point-and-a-line-segment
double PuddleWorld::distToSegment(const double & px, const double & py, const double & x1, const double & y1, const double & x2, const double & y2) {
    const double l2 = (x2-x1)*(x2-x1) + (y2-y1)*(y2-y1);
    if (l2 == 0.0)
        return sqrt((px - x1)*(px - x1) + (py - y1)*(py - y1));
    const double t = ((px-x1)*(x2-x1) + (py-y1)*(y2-y1))/l2;
    if (t < 0.0)
        return sqrt((px - x1)*(px - x1) + (py - y1)*(py - y1));
    else if (t > 1.0)
        return sqrt((px - x2)*(px - x2) + (py - y2)*(py - y2));
    const double x = x1 + t*(x2 - x1), y = y1 + t*(y2 - y1);
    return sqrt((px - x)*(px - x) + (py - y)*(py - y));
}

int PuddleWorld::getNumMCSamplesForPolicyEvaluation() const {
    return 1000; // Even though policy is deterministic, there is randomness in state transitions so we need many samples to get an accurate estimate
}

int PuddleWorld::getMaxTForPolicyEvaluation() const {
    return 1000;
}

int PuddleWorld::getNumSamplesPerState() const {
    return 10; // Transitions are stochastic, so this should be > 1. The bigger the better (but more computationally demanding)
}

double PuddleWorld::getPlottableStatistic() const {
	return rSum;
}

string PuddleWorld::getPlottableStatisticName() const {
	return "Undiscounted return";
}
