#ifndef STATISTICS_H
#define STATISTICS_H

#include <vector>
#include <limits>

/*
Use these classes to accumulate running statistics values easily.
reset() - zero the internal variables
update() - add a new data value, updating current average
get_n, mean - return the current values
*/

class Mean_accumulator{
public:
	Mean_accumulator()
		{
			reset();
		}
	void reset()
		{
			n = 0;
			total = 0.0;
			total2 = 0.0;
            min_value = std::numeric_limits<double>::max();
            max_value = std::numeric_limits<double>::min();
		}
	long get_n() const
		{return n;}
	double get_mean() const
		{return (n > 0) ? (total / n) : 0.0;}
	long get_total() const
		{return total;}
	double get_sample_var() const;
	double get_sample_sd() const;
	double get_est_var() const;
	double get_est_sd() const;
	double get_sdm() const;
    double get_half_95_ci() const;
    double get_min() const {return min_value;}
    double get_max() const {return max_value;}
	void update(double x)
		{
			total += x;
			total2 += x*x;
			n++;
            if(x < min_value) min_value = x;
            if(x > max_value) max_value = x;
		}
    void operator() (double x)
        {update(x);}
			
private:
	long n;
	long double total;
	long double total2;
    double min_value;
    double max_value;
};


// This class provides a proportion of the calls to update with a true argument.
// reset() - zero the internal variables
// update() - add a new data value, updating current statistics
// get_count, _n, _proportion - return the current values

class Proportion_accumulator{
public:
	Proportion_accumulator()
		{
			reset();
		}
	void reset()
		{
			count = 0;
			n = 0;
		}
	
	long get_count() const
		{return count;}
	long get_n() const
		{return n;}
	double get_proportion() const
		{return (n > 0) ? double(count) / n : 0.;}
			
	void update(bool count_it)
		{
			n++;
			if(count_it)
				count++;
		}
    void operator() (bool count_it)
        {update(count_it);}
private:
	long count;
	long n;
};

// Accumulate data values into bins and provide the proportion in each bin.
// Initialize with the number of bins and the size in each bin.
// The first bin starts at 0.  Values too big or too small are accumulated
// in the smallest or largest bin.
// Output for( i = 0; i < n_bins; i++) then i*bin_size is the upper bound on values in that bin
// so for bins 0, 25, 50, count for first is number < 0; second is number >= 0 and < 25, third is >= 25 and < 50
class Distribution_accumulator{
public:
	Distribution_accumulator(int n_bins_, double bin_size_, double baseline_ = 0.0) :
        n_bins(n_bins_), bin_size(bin_size_), baseline(baseline_)
		{
			reset();
		}
	void reset()
		{
			n = 0;
            bins.clear();
            bins.resize(n_bins);
            min_value = std::numeric_limits<double>::max();
            max_value = std::numeric_limits<double>::min();
		}

    // These accessor provide individual bins, or the whole distribution
    int get_n_bins() const
        {return n_bins;}
    double get_bin_size() const
        {return bin_size;}
    double get_baseline() const
        {return baseline;}
	long get_n() const
		{return n;}
//    double get_bin_upper_bound(int bin) const
//        {return bin_size * (bin + 1);}
    double get_bin_lower_bound(int bin) const
        {return bin_size * bin + baseline;}
    double get_bin_upper_bound(int bin) const
        {return bin_size * (bin + 1) + baseline;}
	double get_min() const
		{return min_value;}
	double get_max() const
		{return max_value;}
    int get_bin_count(int bin) const
        {
            return bins[bin];
        }
    double get_bin_proportion(int bin) const
        {
            return (n) ? double(bins[bin]) / n : 0.0;
        }
	// subscript operator returns the proportion
    double operator[] (int bin) const
        {
            return (n) ? double(bins[bin]) / n : 0.0;
        }
    std::vector<double> get_distribution() const
        {
            std::vector<double> result(n_bins);
            for(int i = 0; i < n_bins; i++)
                result[i] = (n) ? double(bins[i]) / n : 0.0;
            return result;
        }
    
    void update(double x)
		{
			n++;
/*            if(x < 0.) {
                bins[0]++;
                return;
                }
            int i = int(x / bin_size);
            if(i < 0)
                bins[0]++;
            else if(i >= n_bins)
                bins[n_bins - 1]++;
            else
                bins[i]++;
*/
            int i = int((x - baseline)/ bin_size);
            if(i < 0)
                bins[0]++;
            else if(i >= n_bins)
                bins[n_bins - 1]++;
            else
                bins[i]++;

            if(x < min_value)
                min_value = x;
            if(x > max_value)
                max_value = x;
		}

    void operator() (double x)
        {update(x);}
			
    // add the other's counts to this accumulator, update min/max values
    // (n_bins, bin_size must be the same)
    void add_counts(const Distribution_accumulator& other);
    void update(const Distribution_accumulator& other)
    	{add_counts(other);}
			
private:
    int n_bins;
    double bin_size;
    double baseline;
	long n;
    double min_value;
    double max_value;
    std::vector<int> bins;
};

// Accumulate integer data values into bins and provide the proportion in each bin.
// Each integer value corresponds to one bin.
// Initialize with the number of bins and the first bin value (defaults to 0).
// The number of bins = last bin value - first bin value + 1.
// The value of the last bin = number of bins - first bin value - 1.
// If count_out_of_range_into_bins is true, values too big or too small are accumulated in the smallest or largest bin;
// if false, they are not included in the bin but are still counted as part of the total n for the distribution.
// Output for( i = 0; i < n_bins; i++) access the number or proportion in each bin

class Discrete_distribution_accumulator{
public:
	Discrete_distribution_accumulator(int n_bins_, int first_bin_value_ = 0, bool count_out_of_range_into_bins = false);

    void reset()
		{
			n = 0;
            bins.clear();
            bins.resize(n_bins);
            min_value = std::numeric_limits<int>::max();
            max_value = std::numeric_limits<int>::min();
		}

    // These accessor provide individual bins, or the whole distribution
    int get_n_bins() const
        {return n_bins;}
    int get_bin_size() const
        {return bin_size;}
    int get_first_bin_value() const
        {return first_bin_value;}
    int get_last_bin_value() const
        {return last_bin_value;}
	long get_n() const
		{return n;}
	int get_min() const
		{return min_value;}
	int get_max() const
		{return max_value;}
    int get_bin_count(int bin) const
        {
            return bins[bin];
        }
    double get_bin_proportion(int bin) const
        {
            return (n) ? double(bins[bin]) / n : 0.0;
        }
	// subscript operator returns the proportion
    double operator[] (int bin) const
        {
            return (n) ? double(bins[bin]) / n : 0.0;
        }
    std::vector<double> get_distribution() const
        {
            std::vector<double> result(n_bins);
            for(int i = 0; i < n_bins; i++)
                result[i] = (n) ? double(bins[i]) / n : 0.0;
            return result;
        }
    
    void update(int x)
		{
			n++;
            if(x < min_value)
                min_value = x;
            if(x > max_value)
                max_value = x;
            // normalize x by subtracting first_bin_value
            int i = x - first_bin_value;
            if(i < 0) {
				if(count_out_of_range_into_bins) {
					bins[0]++;
					}
                }
            else if(i >= n_bins) {
				if(count_out_of_range_into_bins) {
					bins[n_bins - 1]++;
					}
				}
            else
                bins[i]++;
		}

    void operator() (double x)
        {update(x);}
			
    // add the other's counts to this accumulator, update min/max values
    // (n_bins, bin_size must be the same)
    void add_counts(const Discrete_distribution_accumulator& other);
    void update(const Discrete_distribution_accumulator& other)
    	{add_counts(other);}
			
private:
    int n_bins;
    int first_bin_value;
    int last_bin_value;
	bool count_out_of_range_into_bins;
    int bin_size = 1; // always one
    long n;
    int min_value;
    int max_value;
    std::vector<int> bins;
};

// This class manages a vector of Proportion_accumulators; get_bin(int bin) returns
// a reference to the Proportion_accumulator bin.
// Given data pairs (x, v), where v is true or false, accumulate the
// proportion of true values for different values of x classifed into bins
// This is different from a distribution because each bin contains Proportion_accumulator,
// not just a simple count. The total number of data pairs is given by the total number of
// true/false cases counted in all of the bins.
class Binned_proportion_accumulators {
public:
	Binned_proportion_accumulators(int n_bins_, double bin_size_) :
        n_bins(n_bins_), bin_size(bin_size_)
		{
			reset();
		}
	void reset()
		{
			n = 0;
            bins.clear();
            bins.resize(n_bins);
		}

    int get_n_bins() const
        {return n_bins;}
    double get_bin_size() const
        {return bin_size;}
	// get the total count in all bins - number of cases supplied
	int get_n() const
		{
			int sum = 0;
			for(const auto& bin : bins) {sum += bin.get_n();}
			return sum;
		}
    // These accessors provide access to individual proportion accumulators
	const Proportion_accumulator& get_bin(int bin) const
		{return bins[bin];}
	const Proportion_accumulator& operator[] (int bin) const
		{return bins[bin];}
	void update(double x, bool v)
		{
			int i = int(x / bin_size);
            if(i < 0)
                bins[0].update(v);
            else if(i >= n_bins)
                bins[n_bins - 1].update(v);
            else
                bins[i].update(v);;
		}
     void operator() (double x, bool v)
        {update(x, v);}
    
private:
    int n_bins;
    double bin_size;
	long n;
    std::vector<Proportion_accumulator> bins;
};

// This class manages a vector of Mean_accumulators; get_bin(int bin) returns
// a reference to the Mean_accumulators bin.
// Given data pairs (x, v), accumulate the mean, etc, for the value v
// for different values of x classifed into bins
// This is different from a distribution because each bin contains Mean_accumulator,
// not just a simple count. The total number of data pairs is given by the total number of
// data values supplied to all of the bins.

class Binned_mean_accumulators {
public:
	Binned_mean_accumulators(int n_bins_, double bin_size_) :
        n_bins(n_bins_), bin_size(bin_size_)
		{
			reset();
		}
	void reset()
		{
            bins.clear();
            bins.resize(n_bins);
		}

    // These accessor provide individual bins, or the whole distribution
    int get_n_bins() const
        {return n_bins;}
    double get_bin_size() const
        {return bin_size;}
    double get_bin_upper_bound(int bin) const
        {return bin_size * bin + 1;}
	// get the total count in all bins - number of cases supplied
	int get_n() const
		{
			int sum = 0;
			for(const auto& bin : bins) {sum += bin.get_n();}
			return sum;
		}
	// get the minimum value over all cells
	double get_min() const
		{
			double min_value = std::numeric_limits<double>::max();
			for(const auto& bin : bins) {
				if(bin.get_min() < min_value) min_value = bin.get_min();
				}
			return min_value;
		}
	// get the maximum value over all cells
	double get_max() const
		{
			double max_value = std::numeric_limits<double>::min();
			for(const auto& bin : bins) {
				if(bin.get_max() > max_value) max_value = bin.get_max();
				}
			return max_value;
		}
	const Mean_accumulator& get_bin(int bin) const
		{return bins[bin];}
	const Mean_accumulator& operator[] (int bin) const
		{return bins[bin];}
	void update(double x, double v)
		{
			int i = int(x / bin_size);
            if(i < 0)
                bins[0].update(v);
            else if(i >= n_bins)
                bins[n_bins - 1].update(v);
            else
                bins[i].update(v);;
		}
     void operator() (double x, double v)
        {update(x, v);}
			
	void update_with_bin_means(const Binned_mean_accumulators& other);
	void update_with_bin_proportions(const Binned_proportion_accumulators& other);
	
private:
    int n_bins;
    double bin_size;
    std::vector<Mean_accumulator> bins;
};

// Accumulate data for a correlation coeficient and regression line
// Like the others, this class uses the one-pass approach which
// can be numerically unreliable under some conditions
class Correl_accumulator {
public:
	Correl_accumulator()
		{reset();}
	void reset()
		{
			n = 0;
			sumx = 0.;
			sumy = 0.;
			sumxy = 0.;
			sumx2 = 0.;
			sumy2 = 0.;
		}

	void update(double x, double y)
		{
			n++;
			sumx += x;
			sumy += y;
			sumxy += x*y;
			sumx2 += x*x;
			sumy2 += y*y;
		}

     void operator() (double x, double y)
        {update(x, y);}
			
	int get_n() const
		{return n;}

	double get_r() const;

	double get_slope() const
		{
			double numerator = n * sumxy - sumx * sumy;
			double denominator = n * sumx2 - sumx * sumx;
			return (denominator > 0.) ? numerator / denominator : 0.0;
		}

	double get_intercept() const
		{
			return (n) ? (sumy - get_slope() * sumx) / n : 0.0;
		}

	double get_rsq() const
		{
			double r = get_r();
			return r*r;
		}

private:
	int n;
	long double sumx;
	long double sumy;
	long double sumxy;
	long double sumx2;
	long double sumy2;
};

// Give this class object a series of predicted and observed values,
// and then get the goodness-of-fit metrics for them
// using regression fit and simple average absolute error
class PredObs_accumulator {
public:
	PredObs_accumulator() : sum_abs_error(0.), sum_error_prop(0.), total_error2(0.)
		{}
	void reset()
		{
			corr.reset();
            sum_abs_error = 0.;
			sum_error_prop = 0.;
			total_error2 = 0.;
		}

	void update(double predicted, double observed);
	void operator() (double predicted, double observed)
        {update(predicted, observed);}
    
	int get_n() const
		{return corr.get_n();}

	double get_rsq() const
		{return corr.get_rsq();}

	double get_slope() const
		{return corr.get_slope();}

	double get_intercept() const
		{return corr.get_intercept();}

    double get_avg_abs_error() const
        {return (sum_abs_error / corr.get_n());}

	double get_avg_abs_rel_error() const
		{return (sum_error_prop / corr.get_n()) * 100.;}

	double get_rmse() const;

private:
	Correl_accumulator corr;
    long double sum_abs_error;
	long double sum_error_prop;
    long double total_error2;
};



#endif
