# Amino acid properties.
#
# Vladimir Vacic, University of California, Riverside
# Vladimir N. Uversky, Indiana University School of Medicine, Indianapolis
# A. Keith Dunker, Indiana University School of Medicine, Indianapolis
# Stefano Lonardi, University of California, Riverside
# 
# Nov-28-2006

module AminoAcid
    ALL_1 = %w{A R N D C Q E G H I L K M F P S T W Y V}
    ALL_3 = %w{Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val}

    CODE = {
        'A' => 'Ala', 'Ala' => 'A',
        'C' => 'Cys', 'Arg' => 'R', 
        'D' => 'Asp', 'Asn' => 'N',
        'E' => 'Glu', 'Asp' => 'D',
        'F' => 'Phe', 'Cys' => 'C',
        'G' => 'Gly', 'Gln' => 'Q', 
        'H' => 'His', 'Glu' => 'E',
        'I' => 'Ile', 'Gly' => 'G',
        'K' => 'Lys', 'His' => 'H',
        'L' => 'Leu', 'Ile' => 'I',
        'M' => 'Met', 'Leu' => 'L',
        'N' => 'Asn', 'Lys' => 'K', 
        'P' => 'Pro', 'Met' => 'M',
        'Q' => 'Gln', 'Phe' => 'F',
        'R' => 'Arg', 'Pro' => 'P',
        'S' => 'Ser', 'Ser' => 'S',
        'T' => 'Thr', 'Thr' => 'T',
        'V' => 'Val', 'Trp' => 'W',
        'W' => 'Trp', 'Tyr' => 'Y',
        'Y' => 'Tyr', 'Val' => 'V'
    }

    ORDER = {
        'alpha'        => 'ACDEFGHIKLMNPQRSTVWY',
        'hydro_e'      => 'RKDQNEHSTPYCGAMWLVFI',
        'hydro_kd'     => 'RKDENQHPYWSTGAMCFLVI',
        'hydro_fp'     => 'RKDENQSGHTAPYVCLFIMW',
        'surface_j'    => 'KREQDNYPTHSAGWMLFVIC',
        'flex_v'       => 'WCFIYVLHMATRGQSNPDEK', 
        'interface_jt' => 'DKSPTAEQGNRVLHCIMYFW',
        'solvation_jt' => 'WFILMYVCHAGSTPNRQDEK',
        'bulkiness_z'  => 'GSADNCEHRQKTMPYFILVW',
        'polarity_z'   => 'AGILVFMCPYTSWNQKDEHR',
        'linker_gh'    => 'CGWDINKSVAYHTMQELFRP',
        'alpha_n'      => 'YPGNSRTCIVDWQLKMFAHE',
        'beta_n'       => 'ERNPSKHDGAYCWQTLMFVI',
        'coil_n'       => 'FMLAEHIQVKWCTDRSGYNP',
        'size_d'       => 'GADCSNETVILPQHMFKWYR'
    }

    SPLIT = {
        'aromatics'         => 'FWY',
        'charge'            => 'KRDE',
        'pos_charge'        => 'KR',
        'neg_charge'        => 'DE',
        'hydro_e'           => 'PYCGAMWLVFI',
        'hydro_kd'          => 'AMCFLVI',
        'hydro_fp'          => 'HTAPYVCLFIMW',
        'surface_exposed_j' => 'DEHKNPQRSTY',
        'high_flex_v'       => 'RGQSNPDEK',
        'high_interface_jt' => 'NRVLHCIMYFW',
        'high_solvation_jt' => 'AGSTPNRQDEK',
        'disorder_d'        => 'ARSQEGKP',
        'order_d'           => 'NCILFWYV',
        'bulkiness_z'       => 'ILVW',
        'polarity_z'        => 'KDEHR',
        'alpha_n'           => 'LKMFAHE',
        'beta_n'            => 'YCWQTLMFVI',
        'coil_n'            => 'CTDRSGYNP',
        'linker_gh'         => 'YHTMQELFRP',
        'size_d'            => 'NETVILPQHMFKWYR'
    }

    SPLIT_NAME = {
        'aromatics'         => 'Aromatic content',
        'charge'            => 'Charged residues',
        'pos_charge'        => 'Positively charged',
        'neg_charge'        => 'Negatively charged',
        'polarity_z'        => 'Polar (Zimmerman)',
        'hydro_e'           => 'Hydrophobic (Eisenberg)',
        'hydro_kd'          => 'Hydrophobic (K-D)',
        'hydro_fp'          => 'Hydrophobic (F-P)',
        'surface_exposed_j' => 'Exposed (Janin)',
        'high_flex_v'       => 'Flexible (Vihinen)',
        'high_interface_jt' => 'High interface prop. (J-T)',
        'high_solvation_jt' => 'High solvation poten. (J-T)',
        'alpha_n'           => 'Frequent in alpha hel. (N)',
        'beta_n'            => 'Frequent in beta struc. (N)',
        'coil_n'            => 'Frequent in coils (N)',
        'linker_gh'         => 'High linker propensity (G-H)',
        'disorder_d'        => 'Disorder promoting (Dunker)',
        'order_d'           => 'Order promoting (Dunker)',
        'bulkiness_z'       => 'Bulky (Zimmerman)',
        'size_d'            => 'Large (Dawson)'
    }

    SPLIT_ORDER = %w{aromatics charge pos_charge neg_charge polarity_z hydro_e hydro_kd hydro_fp surface_exposed_j high_flex_v high_interface_jt high_solvation_jt alpha_n beta_n coil_n linker_gh disorder_d order_d bulkiness_z size_d}

    # Eisenberg D, Schwarz E, Komaromy M, and Wall R. (1984)
    # "Analysis of membrane and surface protein sequences with the hydrophobic moment plot."
    # J. Mol. Biol. 179:125-142.
    HYDROPHOBICITY_EISENBERG = { 
        'R' => -2.53,  
        'K' => -1.50,  
        'D' => -0.90,  
        'Q' => -0.85,  
        'N' => -0.78,  
        'E' => -0.74,  
        'H' => -0.40,  
        'S' => -0.18,  
        'T' => -0.05,  
        'P' =>  0.12,  
        'Y' =>  0.26,  
        'C' =>  0.29,  
        'G' =>  0.48,  
        'A' =>  0.62,  
        'M' =>  0.64,  
        'W' =>  0.81,  
        'L' =>  1.06,  
        'V' =>  1.08,  
        'F' =>  1.19,  
        'I' =>  1.38  
    }

    # Kyte J, and Doolittle RF. (1982)
    # "A simple method for displaying the hydropathic character of a protein."
    # J. Mol. Biol. 157:105-132. 
    HYDROPHOBICITY_KD = {
        'R' => -4.5,
        'K' => -3.9,
        'D' => -3.5,
        'E' => -3.5,
        'N' => -3.5,
        'Q' => -3.5,
        'H' => -3.2,
        'P' => -1.6,
        'Y' => -1.3,
        'W' => -0.9,
        'S' => -0.8,
        'T' => -0.7,
        'G' => -0.4,
        'A' =>  1.8,
        'M' =>  1.9,
        'C' =>  2.5,
        'F' =>  2.8,
        'L' =>  3.8,
        'V' =>  4.2,
        'I' =>  4.5
    }


    # Fauchere J.-L. and Pliska V.E. (1983) 
    # "Hydrophobic parameters pi of amino acid side chains from partitioning of N-acetyl-amino-acid amides." 
    # Eur. J. Med. Chem. 18:369-375.
    HYDROPHOBICITY_FP = {
        'R' => -1.01,  
        'K' => -0.99,  
        'D' => -0.77,  
        'E' => -0.64,  
        'N' => -0.60,  
        'Q' => -0.22,  
        'S' => -0.04, 
        'G' =>  0.00,  
        'H' =>  0.13,  
        'T' =>  0.26,  
        'A' =>  0.31,  
        'P' =>  0.72,  
        'Y' =>  0.96,  
        'V' =>  1.22,  
        'C' =>  1.54,  
        'L' =>  1.70,  
        'F' =>  1.79,  
        'I' =>  1.80,  
        'M' =>  1.23,  
        'W' =>  2.25  
    } 


    # Janin J. (1979) 
    # "Surface and inside volumes in globular proteins."
    # Nature, 277(5696):491-2.
    SURFACE_JANIN = {
        'K' => -1.8,  
        'R' => -1.4,  
        'E' => -0.7,  
        'Q' => -0.7,  
        'D' => -0.6,  
        'N' => -0.5,  
        'Y' => -0.4,  
        'P' => -0.3,  
        'T' => -0.2,  
        'H' => -0.1,  
        'S' => -0.1,  
        'A' =>  0.3,  
        'G' =>  0.3,  
        'W' =>  0.3,  
        'M' =>  0.4,  
        'L' =>  0.5,  
        'F' =>  0.5,  
        'V' =>  0.6,
        'I' =>  0.7,  
        'C' =>  0.9  
    }


    # Vihinen M, Torkkila E. and Riikonen P. (1994)
    # "Accuracy of protein flexibility predictions." 
    # Proteins, 19, 141-149. 
    FLEXIBILITY_VIHINEN = {
        'W' => 0.904,
        'C' => 0.906,
        'F' => 0.915,
        'I' => 0.927,
        'Y' => 0.929,
        'V' => 0.931,
        'L' => 0.935,
        'H' => 0.950,
        'M' => 0.952,
        'A' => 0.984,
        'T' => 0.997,
        'R' => 1.008,
        'G' => 1.031,
        'Q' => 1.037,
        'S' => 1.046,
        'N' => 1.048,
        'P' => 1.049,
        'D' => 1.068,
        'E' => 1.094,
        'K' => 1.102
    }

    # Jones and Thornton. (1996)
    # "Principles of protein-protein interactions."
    # PNAS USA, 93:13-20.
    INTERFACE_PROPENSITY_JT = {
        'D' => -0.38,
        'K' => -0.36,
        'S' => -0.33,
        'P' => -0.25,
        'T' => -0.18,
        'A' => -0.17,
        'E' => -0.13,
        'Q' => -0.11,
        'G' => -0.07,
        'N' =>  0.12,
        'R' =>  0.27,
        'V' =>  0.27,
        'L' =>  0.40,
        'H' =>  0.41,
        'C' =>  0.43,
        'I' =>  0.44,
        'M' =>  0.66,
        'Y' =>  0.66,
        'F' =>  0.82,
        'W' =>  0.83
    }

    # Jones and Thornton J. (1997)
    # "Analysis of protein-proteins interaction sites using surface patches."
    # J. Mol. Biol. 272:121-132.
    SOLVATION_POTENTIAL_JT = {
        'W' => -0.68,
        'F' => -0.55,
        'I' => -0.49,
        'L' => -0.49,
        'M' => -0.40,
        'Y' => -0.32,
        'V' => -0.31,
        'C' => -0.30,
        'H' => -0.06,
        'A' =>  0.05,
        'G' =>  0.08,
        'S' =>  0.15,
        'T' =>  0.16, 
        'P' =>  0.19,
        'N' =>  0.22,
        'R' =>  0.41,
        'Q' =>  0.45,
        'D' =>  0.64,
        'E' =>  0.77,
        'K' =>  1.61
    }

    BULKINESS_ZIMMERMAN = {
        'G' => 	3.4,
        'S' => 	9.47,
        'A' => 	11.5,
        'D' => 	11.68,
        'N' => 	12.82,
        'C' => 	13.46,
        'E' => 	13.57,
        'H' => 	13.69,
        'R' => 	14.28,
        'Q' => 	14.45,
        'K' => 	15.71,
        'T' => 	15.77,
        'M' => 	16.25,
        'P' => 	17.43,
        'Y' => 	18.03,
        'F' => 	19.8,
        'I' => 	21.4,
        'L' => 	21.4,
        'V' => 	21.57,
        'W' => 	21.67
    }

    POLARITY_ZIMMERMAN = {
        'A' => 	0.0,
        'G' => 	0.0,
        'I' => 	0.13,
        'L' => 	0.13,
        'V' => 	0.13,
        'F' => 	0.35,
        'M' => 	1.43,
        'C' => 	1.48,
        'P' => 	1.58,
        'Y' => 	1.61,
        'T' => 	1.66,
        'S' => 	1.67,
        'W' => 	2.1,
        'N' => 	3.38,
        'Q' => 	3.53,
        'K' => 	49.5,
        'D' => 	49.7,
        'E' => 	49.9,
        'H' => 	51.6,
        'R' => 	52.0
    }

    LINKER_GH = {
        'C' =>  0.778,
        'G' =>  0.835,
        'W' =>  0.895,
        'D' =>  0.916,
        'I' =>  0.922,
        'N' =>  0.944,
        'K' =>  0.944,
        'S' =>  0.947,
        'V' =>  0.955,
        'A' =>  0.964,
        'Y' =>  1.0,
        'H' =>  1.014,
        'T' =>  1.017,
        'M' =>  1.032,
        'Q' =>  1.047,
        'E' =>  1.051,
        'L' =>  1.085,
        'F' =>  1.119,
        'R' =>  1.143,
        'P' =>  1.299
    }

    ALPHA_NAGANO = {
        'Y' =>  0.63,
        'P' =>  0.7,
        'G' =>  0.72,
        'N' =>  0.77,
        'S' =>  0.78,
        'R' =>  0.83,
        'T' =>  0.87,
        'C' =>  0.94,
        'I' =>  0.94,
        'V' =>  0.97,
        'D' =>  1.0,
        'W' =>  1.06,
        'Q' =>  1.1,
        'L' =>  1.23,
        'K' =>  1.23,
        'M' =>  1.23,
        'F' =>  1.23,
        'A' =>  1.29,
        'H' =>  1.29,
        'E' =>  1.54
    }

    BETA_NAGANO = {
        'E' =>  0.33,
        'R' =>  0.67,
        'N' =>  0.72,
        'P' =>  0.75,
        'S' =>  0.77,
        'K' =>  0.81,
        'H' =>  0.87,
        'D' =>  0.9,
        'G' =>  0.9,
        'A' =>  0.96,
        'Y' =>  1.07,
        'C' =>  1.13,
        'W' =>  1.13,
        'Q' =>  1.18,
        'T' =>  1.23,
        'L' =>  1.26,
        'M' =>  1.29,
        'F' =>  1.37,
        'V' =>  1.41,
        'I' =>  1.54,
    }

    COIL_NAGANO = {
        'F' =>  0.58,
        'M' =>  0.62,
        'L' =>  0.63,
        'A' =>  0.72,
        'E' =>  0.75,
        'H' =>  0.76,
        'I' =>  0.8,
        'Q' =>  0.81,
        'V' =>  0.83,
        'K' =>  0.84,
        'W' =>  0.87,
        'C' =>  1.01,
        'T' =>  1.03,
        'D' =>  1.04,
        'R' =>  1.33,
        'S' =>  1.34,
        'G' =>  1.35,
        'Y' =>  1.35,
        'N' =>  1.38,
        'P' =>  1.43,
    }

    SIZE_DAWSON = {
        'G' =>  0.5,
        'A' =>  2.5,
        'D' =>  2.5,
        'C' =>  3.0,
        'S' =>  3.0,
        'N' =>  5.0,
        'E' =>  5.0,
        'T' =>  5.0,
        'V' =>  5.0,
        'I' =>  5.5,
        'L' =>  5.5,
        'P' =>  5.5,
        'Q' =>  6.0,
        'H' =>  6.0,
        'M' =>  6.0,
        'F' =>  6.5,
        'K' =>  7.0,
        'W' =>  7.0,
        'Y' =>  7.0,
        'R' =>  7.5
    }


    COMPOSITION = {
        'sprot' => {
            'A' => 7.89,
            'R' => 5.40,
            'N' => 4.13,
            'D' => 5.35,
            'C' => 1.50,
            'Q' => 3.95,
            'E' => 6.67,
            'G' => 6.96,
            'H' => 2.29,
            'I' => 5.90,
            'L' => 9.65,
            'K' => 5.92,
            'M' => 2.38,
            'F' => 3.96,
            'P' => 4.83,
            'S' => 6.83,
            'T' => 5.41,
            'W' => 1.13,
            'Y' => 3.03,
            'V' => 6.73
        },
        'pdbs25' => {
            'A' => 7.70,
            'R' => 4.93,
            'N' => 4.58,
            'D' => 5.83,
            'C' => 1.74,
            'Q' => 3.95,
            'E' => 6.65,
            'G' => 7.16,
            'H' => 2.41,
            'I' => 5.61,
            'L' => 8.68,
            'K' => 6.37,
            'M' => 2.22,
            'F' => 3.98,
            'P' => 4.57,
            'S' => 6.19,
            'T' => 5.63,
            'W' => 1.44,
            'Y' => 3.50,
            'V' => 6.72
        },
        'surface' => {
            'A' => 6.03,
            'R' => 6.56,
            'N' => 6.23,
            'D' => 8.18,
            'C' => 0.78,
            'Q' => 5.21,
            'E' => 8.70,
            'G' => 7.06,
            'H' => 2.60,
            'I' => 2.77,
            'L' => 5.11,
            'K' => 9.75,
            'M' => 1.13,
            'F' => 2.38,
            'P' => 5.63,
            'S' => 6.87,
            'T' => 6.08,
            'W' => 1.33,
            'Y' => 3.58,
            'V' => 4.01
        },
        'disprot' => {
            'A' => 8.10,
            'R' => 4.82,
            'N' => 3.82,
            'D' => 5.80,
            'C' => 0.80,
            'Q' => 5.27,
            'E' => 9.89,
            'G' => 7.41,
            'H' => 1.93,
            'I' => 3.24,
            'L' => 6.22,
            'K' => 7.85,
            'M' => 1.87,
            'F' => 2.44,
            'P' => 8.11,
            'S' => 8.65,
            'T' => 5.56,
            'W' => 0.67,
            'Y' => 2.13,
            'V' => 5.41 
        }
    }


    STDEV = {
        'sprot' => {
            'A' => 0.05,
            'R' => 0.04,
            'N' => 0.04,
            'D' => 0.03,
            'C' => 0.02,
            'Q' => 0.03,
            'E' => 0.04,
            'G' => 0.04,
            'H' => 0.02,
            'I' => 0.04,
            'L' => 0.04,
            'K' => 0.05,
            'M' => 0.02,
            'F' => 0.03,
            'P' => 0.03,
            'S' => 0.04,
            'T' => 0.02,
            'W' => 0.01,
            'Y' => 0.02,
            'V' => 0.03
        },
        'pdbs25' => {
            'A' => 0.08,
            'R' => 0.06,
            'N' => 0.06,
            'D' => 0.05,
            'C' => 0.05,
            'Q' => 0.05,
            'E' => 0.07,
            'G' => 0.07,
            'H' => 0.04,
            'I' => 0.06,
            'L' => 0.08,
            'K' => 0.08,
            'M' => 0.04,
            'F' => 0.04,
            'P' => 0.05,
            'S' => 0.06,
            'T' => 0.05,
            'W' => 0.03,
            'Y' => 0.04,
            'V' => 0.06
        },
        'surface' => {
            'A' => 0.13,
            'R' => 0.13,
            'N' => 0.15,
            'D' => 0.10,
            'C' => 0.04,
            'Q' => 0.09,
            'E' => 0.17,
            'G' => 0.11,
            'H' => 0.06,
            'I' => 0.07,
            'L' => 0.08,
            'K' => 0.16,
            'M' => 0.04,
            'F' => 0.05,
            'P' => 0.10,
            'S' => 0.13,
            'T' => 0.11,
            'W' => 0.05,
            'Y' => 0.08,
            'V' => 0.06
        },
        'disprot' => {
            'A' => 0.35,
            'R' => 0.23,
            'N' => 0.27,
            'D' => 0.30,
            'C' => 0.08,
            'Q' => 0.37,
            'E' => 0.61,
            'G' => 0.40,
            'H' => 0.11,
            'I' => 0.13,
            'L' => 0.25,
            'K' => 0.45,
            'M' => 0.10,
            'F' => 0.13,
            'P' => 0.63,
            'S' => 0.43,
            'T' => 0.24,
            'W' => 0.06,
            'Y' => 0.15,
            'V' => 0.44 
        }
    }

    COUNTS = {
        'sprot' => {
            'A' => 6990248,
            'R' => 4784200,
            'N' => 3659027,
            'D' => 4739902,
            'C' => 1328945,
            'Q' => 3499554,
            'E' => 5909373,
            'G' => 6166303,
            'H' => 2028855,
            'I' => 5227182,
            'L' => 8549543,
            'K' => 5244901,
            'M' => 2108592,
            'F' => 3508414,
            'P' => 4279201,
            'S' => 6051127,
            'T' => 4793060,
            'W' => 1001138,
            'Y' => 2684468,
            'V' => 5962531
        },
        'pdbs25' => {
            'A' => 23428,
            'R' => 14998,
            'N' => 13928,
            'D' => 17742,
            'C' => 5306,
            'Q' => 12031,
            'E' => 20225,
            'G' => 21785,
            'H' => 7338,
            'I' => 17066,
            'L' => 26402,
            'K' => 19384,
            'M' => 6767,
            'F' => 12119,
            'P' => 13906,
            'S' => 18824,
            'T' => 17137,
            'W' => 4380,
            'Y' => 10646,
            'V' => 20441 
        },
        'surface' => {
            'A' => 6623,
            'R' => 7204,
            'N' => 6845,
            'D' => 8983,
            'C' => 851,
            'Q' => 5717,
            'E' => 9560,
            'G' => 7756,
            'H' => 2855,
            'I' => 3046,
            'L' => 5610,
            'K' => 10708,
            'M' => 1243,
            'F' => 2610,
            'P' => 6187,
            'S' => 7548,
            'T' => 6671,
            'W' => 1457,
            'Y' => 3931,
            'V' => 4407
        },
        'disprot' => {
            'A' => 4204,
            'R' => 2502,
            'N' => 1978,
            'D' => 3009,
            'C' => 416,
            'Q' => 2733,
            'E' => 5151,
            'G' => 3839,
            'H' => 1000,
            'I' => 1683,
            'L' => 3230,
            'K' => 4085,
            'M' => 971,
            'F' => 1267,
            'P' => 4225,
            'S' => 4490,
            'T' => 2889,
            'W' => 345,
            'Y' => 1108,
            'V' => 2820,
        }
    }

    def aromatic?(aa) 
        return true if aa=='F' || aa=='W' || aa=='Y'
        return false 
    end

    def charged?(aa)
        return true if aa=='K' || aa=='R' || aa=='D' || aa=='E'
        return false
    end

    def charge(aa)
        return 1  if aa=='K' || aa=='R'
        return -1 if aa=='D' || aa=='E'
        return 0
    end
end

