/**
 * Auxiliary header file for reading FASTA files.
 *
 * Vladimir Vacic
 * Vladimir@vacic.org
 *
 * University of California, Riverside
 * Sep-20-2006
 */


#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;


#ifndef _SEQUENCE_H_
#define _SEQUENCE_H_

// All amino acids in alpha order of their names
char *AA = "ARNDCQEGHILKMFPSTWYV";


typedef struct Sequence {
    Sequence(const string &h, const string &s) : header(h), sequence(s) {}

    string header;
    string sequence;
} Sequence;


vector<Sequence> read_fasta(const char *filename)  {
    ifstream input_file(filename, ios::in);

    if (!input_file)  {
        cerr << "ERROR: " << filename  << " could not be opened." << endl;
        exit(1);
    }

    vector<Sequence> temp;
    string line;

    while(getline(input_file, line))  {
        if (line.empty()) continue;

        if (line[0]=='>')
            temp.push_back(Sequence(line.substr(1), ""));
        else  {
            temp.back().sequence.append(line.substr(0,line.length()));
	}
    }

    return temp;
}

/** Per sequence aa counts in the FASTA file */
vector<vector<int> > count_aa(const char *file)  {
    vector<Sequence> S = read_fasta(file);

    vector<vector<int> > counts(S.size(), vector<int>(25,0));

    for (int i=0; i<S.size(); i++)  {
        for (int j=0; j<S[i].sequence.size(); j++)  {
            char c = toupper(S[i].sequence[j]);

            if (isalpha(c) && c!='B' && c!='J' && c!='O' && c!='Z')
      	        counts[i][c-'A']++;
        }
    }
    return counts;
}

vector<double> freq_aa(const vector<int> &indices, const vector<vector<int> > &counts)  {
    vector<double> temp(25,0);

    for (int i=0; i<indices.size(); i++)
        for (int j=0; j<25; j++)
            temp[j] += counts[indices[i]][j];

    double total(0);
    for (int i=0; i<25; i++)  total += temp[i];
    total = 1 / total;

    vector<double> freq(20,0);
    for (int i=0; i<20; i++)
        freq[i] = temp[AA[i]-'A'] * total;

    return freq;
}

#endif
