/**
 * Composition Profiler (calculates confidence intervals for residue
 * frequencies using bootstrapping)
 * 
 * Vladimir Vacic, University of California, Riverside
 * Vladimir N. Uversky, Indiana University School of Medicine, Indianapolis 
 * A. Keith Dunker, Indiana University School of Medicine, Indianapolis
 * Stefano Lonardi, University of California, Riverside
 *
 * Sep-20-2006
 */


#include <ctime>
#include <cstdlib>
#include <math.h>
using namespace std;

#include "sequence.h"


/** Returns mean and standard deviation of a vector of numbers. */
pair<double, double> mean_std(vector<double> val)  {
    float mean(0);

    for (int i=0; i<val.size(); i++)
        mean += val[i];

    mean /= val.size();

    double std(0);
    for (int i=0; i<val.size(); i++)
        std += (val[i]-mean) * (val[i]-mean);

    std = sqrt(std/val.size());

    return pair<double,double>(mean, std);
}

int main(int argc, char** argv)  {
    if (argc != 3)  {
        cerr << "Usage: " << argv[0] << " FASTA_file bootstrap_iterations" << endl;
        exit(1);
    } 

    // Count AA
    vector<vector<int> > counts = count_aa(argv[1]);
    int seqnum = counts.size();

    int iterations = atoi(argv[2]);
    vector<double> stats[20];

    vector<int> index(seqnum, 0);

    double over = (double) seqnum / (1.0 + RAND_MAX);

    srand((unsigned)time(0)); 

    for (int i=0; i<iterations; i++)  {
        // Resample P on the sequence level, with remplacement
        for (int j=0; j<seqnum; j++)
            index[j] = (int) floor(over * rand());

        vector<double> freq = freq_aa(index, counts);

        for (int j=0; j<20; j++)
            stats[j].push_back(freq[j]);
    }

    // Output means and standard deviations for every residue 
    for(int i=0; i<20; i++)  {
        pair<double, double> ret = mean_std(stats[i]);
        cout << ret.first * 100 << "\t" << ret.second * 100 << endl;
    }

    exit(0);
}
