#!/usr/bin/ruby -w
#
# Composition Profiler
#
# Vladimir Vacic, University of California, Riverside
# Predrag Radivojac, Indiana University, Bloomington
# A. Keith Dunker, Indiana University School of Medicine, Indianapolis
# Stefano Lonardi, University of California, Riverside
#
# Nov-21-2006


require "./AminoAcid.rb"
require "./DrawChart.rb"
require "./Fasta.rb"
require "./Util.rb"



def error(message)
    $stderr.print "ERROR: #{message}\n"

    usage =<<END

Usage: cprofile.rb -Q <query file> -O <output file> [options]
Creates a composition profile for the input FastA file.

Mandatory arguments:
  -Q <query file>

  -O <output file>           Output file name.

Optional arguments:
  -B <background file>
     or
  -D <known distribution>    One of the following:
                             disprot       Disordered regions from DisProt 3.4
                             pdbs25        PDB Select 25
                             sprot         Proteins from SwissProt
                             surface       Surface residues of monomers from PDB
                             Defaults to sprot.

  -C <color scheme>          One of the following:        
                             alpha_n       Alpha helix frequency (N)
                             amino         Amino color scheme
                             aromatics     Aromatics
                             beta_n        Beta structure frequency (N)
                             bw            Black and white
                             bulkiness_z   Bulkiness (Z)
                             charge        Charge
                             coil_n        Coil propensity (N)
                             discolor_d    Discolor propensity (D)
                             flex_v        Flexibility
                             hydro_e       Hydrophobicity (E)
                             hydro_kd      Hydrophobicity (K-D)
                             hydro_fp      Hydrophobicity (F-P)
                             interface_jt  Interface propensity (J-T)
                             linker_gh     Linker propensity (G-H)
                             polarity_z    Polarity (Z)
                             shapley       Shapley color scheme
                             size_d        Size (D)
                             surface_j     Surface exposure (J)
                             solvation_jt  Solvation potential (J-T)
                             weblogo       Weblogo color scheme
                             Defaults to bw.

  -F <format>                Format of output (EPS, GIF, PDF, PNG, TXT). 
                             Defaults to PNG.

  -H <image height>          Height of output image.
                             Defaults to 3.5".

  -I <iterations>            Number of bootstrap iterations.
                             Deafults to 10000.

  -R <resolution>            Bitmap resolution. 
                             Defaults to 96.

  -S <order>                 Sorts residues in the increasing order of
                             one of the physico-chemical or structural properties:
                             alpha        Alphabetical order
                             alpha_n      Alpha helix frequency (Nagano)
                             diff         By observed differences
                             beta_n       Beta structure frequency (Nagano)
                             bulikness_z  Bulkiness (Zimmerman)
                             coil_n       Coil propensity (Nagano)
                             flex_v       Flexibility (Vihinen)
                             hydro_e      Hydrophobicity (Eisenberg)
                             hydro_kd     Hydrophobicity (Kyte-Doolittle)
                             hydro_fp     Hydrophobicity (Fauchere-Pliska)
                             interface_jt Interface propensity (Jones-Thornton)
                             linker_gh    Linker propensity (George-Heringa)
                             polarity_z   Polarity (Zimmerman)
                             size_d       Size (Dawson)
                             surface_j    Surface exposure (Janin)
                             solvation_jt Solvation potential (Jones-Thornton)
                             Defaults to alphabetical order.

  -U <units>                 Chart dimensions units (cm, inch, pixel, point).
                             Defaults to inch. 

  -W <image width>           Width of output image. Defaults to 5".

  -X <res units>             Resolution units when bitmap resolution is 
                             specified (ppi, ppc, ppp). Defaults to ppi.

  -Y                         Y-axis label.


Optional toggles (no values associated):
  -a                         Toggle antialiasing

END
    $stderr.print usage
    exit
end


def init
    opts = Util.getopts('Q:B:D:O:I:C:F:H:R:S:T:U:V:W:X:Y:a')

    # Query sample file
    if nil==opts['Q']
        error("Query file not specified, terminating.")
    elsif !FileTest.exist?(opts['Q'])
        error("Could not open query sample file #{opts['Q']}.")
    end

    # Output file
    if opts['O']==nil
        error("Output file not specified, terminating.")
    end 

    if nil!=opts['B'] && nil!=opts['D']
        error("Both background file and background distribution specified.")
    end

    # Background sample file
    if nil!=opts['B'] && !File.exist?(opts['B'])
        error("Could not open background distribution file #{opts['B']}.")
    end

    # Background distribution
    if nil!=opts['D']
        if false==AminoAcid::COMPOSITION.keys.include?(opts['D'])        
            error("Unknown background distribution #{opts['D']}.")
        end
    else
        opts['D'] = 'sprot'
    end

    # Number of bootrstrap iterations
    if nil==opts['I']
        opts['I']=10000
    elsif opts['I'].to_i < 1
        error("Number of bootstrap iterations has to be a positive integer.")
    end

    # Amino acid color-coding
    if nil==opts['C']
        opts['C']="bw"
    end

    # Amino acid ordering
    if nil!=opts['S']
        if false==AminoAcid::ORDER.keys.include?(opts['S']) && "diff"!=opts['S']       
            error("Unknown amino-acid ordering #{opts['S']}.")
        end
    else
        opts['S'] = 'alpha'
    end


    # Output format
    if nil!=opts['F']
        if opts['F'].upcase!='GIF' && opts['F'].upcase!='PNG' && opts['F'].upcase!='EPS' && opts['F'].upcase!='PDF' && opts['F'].upcase!='TXT'
            error("Supported output formats are PNG, GIF, EPS, PDF and TXT. Found #{opts['F']}.")
        end
    else
        opts['F']='PNG'
    end

    # Image height
    if nil==opts['H']
        opts['H'] = 3.5
    elsif opts['H'].to_f <= 0
        error("Option H must be a positive real, but is #{opts['H']}.")
    end

    # Image width
    if nil==opts['W']
        opts['W'] = 5
    elsif opts['W'].to_f <= 0
        error("Option W must be a positive real, but is #{opts['W']}.")
    end

    # Bitmap resolution and resolution units
    opts['R'] = 96    if nil==opts['R']
    opts['X'] = 'ppi' if nil==opts['X']

    # Units of size
    opts['U']="inch" if nil==opts['U']

    if opts['U']=="cm"
        opts['H'] /= 2.54
        opts['W'] /= 2.54
    elsif opts['U']=="pixel"
        opts['H'] /= 1.0 * opts['R']
        opts['W'] /= 1.0 * opts['R']
    elsif opts['U']=="point"
        opts['H'] /= 72.0
        opts['W'] /= 72.0
    end

    opts['H'] = (opts['H'] * 72).round
    opts['W'] = (opts['W'] * 72).round

    # Antialiasing
    opts['a']=false if opts['a']==nil

    return opts
end



############################################################

opts = init()

mean_q = Array.new
std_q  = Array.new

result = `./frequency #{opts['Q']} #{opts['I']}`
result.each_line do |line|
    fi = line.split
    mean_q << fi[0].strip.to_f
    std_q  << fi[1].strip.to_f
end

mean_b = Array.new
std_b  = Array.new

if nil==opts['B']
    AminoAcid::ALL_1.each do |aa|
        mean_b << AminoAcid::COMPOSITION[opts['D']][aa]
        std_b  << AminoAcid::STDEV[opts['D']][aa]
    end
else
    result = `./frequency #{opts['B']} #{opts['I']}`
    result.each_line do |line|
        fields = line.split
        mean_b << fields[0].strip.to_f
        std_b  << fields[1].strip.to_f
    end
end

mean  = Hash.new
stdev = Hash.new

for i in 0...20 do
    b = (mean_q[i] - mean_b[i]) / mean_b[i]
    e = ((mean_q[i] + std_q[i] - mean_b[i] - std_b[i]) / (mean_b[i] + std_b[i]) - b).abs
    e *= -1 if b < 0

    aa = AminoAcid::ALL_1[i]
    mean[aa] = b
    stdev[aa] = e
end


order  = Array.new
bars   = Array.new
errors = Array.new

if "diff"==opts['S']
    unsorted = Array.new
    for i in 0...20
        pair = Array.new
        pair << mean[AminoAcid::ALL_1[i]] << AminoAcid::ALL_1[i]
        unsorted << pair
    end

    unsorted = unsorted.sort  { |aaa,bbb| aaa[0] <=> bbb[0] }

    for i in 0...20
        order << unsorted[i][1]
        bars    << mean[order.last]
        errors  << stdev[order.last]
    end
else
    for i in 0...20
        order  << AminoAcid::ORDER[opts['S']][i,1]
        bars   << mean[order.last]
        errors << stdev[order.last]
    end
end



if 'TXT'==opts['F'].upcase
    out = File.open(opts['O'], 'w')
    for i in 0...20
        aa = order[i]
        out.print "#{aa}\t#{mean[aa]}\t#{stdev[aa]}\n"
    end
    out.close
else
    options = {
        'HEIGHT'       => opts['H'],
        'WIDTH'        => opts['W'],
        'UNITS'        => opts['U'],
        'COLOR_SCHEME' => opts['C'],
        'RES'          => opts['R'],
        'RES_UNITS'    => opts['X'],
        'FORMAT'       => opts['F'].upcase,
        'YAXIS'        => opts['Y'],
        'ANTIALIAS'    => opts['a'],
    }

    DrawChart.create(order, bars, errors, options, opts['O'], "")
end

