Spaces:
Sleeping
Sleeping
| """ | |
| Amino Acid Properties from AAindex Database | |
| Auto-generated by AAindexDownloader | |
| Total features: 20 | |
| """ | |
| import numpy as np | |
| # Raw values from AAindex | |
| AA_PROPERTIES_AAINDEX = { | |
| 'A': { | |
| 'BIGC670101': 52.600000, | |
| 'CHAM820101': 0.046000, | |
| 'CHOP780201': 1.420000, | |
| 'CHOP780202': 0.830000, | |
| 'CHOP780203': 0.740000, | |
| 'EISD860101': 0.670000, | |
| 'FASG760101': 89.090000, | |
| 'FAUJ830101': 0.310000, | |
| 'GRAR740102': 8.100000, | |
| 'GRAR740103': 31.000000, | |
| 'GUYH850101': 0.100000, | |
| 'HOPT810101': -0.500000, | |
| 'JANJ780101': 27.800000, | |
| 'KARP850101': 1.041000, | |
| 'KYTJ820101': 1.800000, | |
| 'ROSM880101': -0.670000, | |
| 'VINM940101': 0.984000, | |
| 'WERD780101': 0.520000, | |
| 'ZIMJ680101': 0.830000, | |
| 'ZIMJ680104': 6.000000, | |
| }, | |
| 'R': { | |
| 'BIGC670101': 109.100000, | |
| 'CHAM820101': 0.291000, | |
| 'CHOP780201': 0.980000, | |
| 'CHOP780202': 0.930000, | |
| 'CHOP780203': 1.010000, | |
| 'EISD860101': -2.100000, | |
| 'FASG760101': 174.200000, | |
| 'FAUJ830101': -1.010000, | |
| 'GRAR740102': 10.500000, | |
| 'GRAR740103': 124.000000, | |
| 'GUYH850101': 1.910000, | |
| 'HOPT810101': 3.000000, | |
| 'JANJ780101': 94.700000, | |
| 'KARP850101': 1.038000, | |
| 'KYTJ820101': -4.500000, | |
| 'ROSM880101': 12.100000, | |
| 'VINM940101': 1.008000, | |
| 'WERD780101': 0.490000, | |
| 'ZIMJ680101': 0.830000, | |
| 'ZIMJ680104': 10.760000, | |
| }, | |
| 'N': { | |
| 'BIGC670101': 75.700000, | |
| 'CHAM820101': 0.134000, | |
| 'CHOP780201': 0.670000, | |
| 'CHOP780202': 0.890000, | |
| 'CHOP780203': 1.460000, | |
| 'EISD860101': -0.600000, | |
| 'FASG760101': 132.120000, | |
| 'FAUJ830101': -0.600000, | |
| 'GRAR740102': 11.600000, | |
| 'GRAR740103': 56.000000, | |
| 'GUYH850101': 0.480000, | |
| 'HOPT810101': 0.200000, | |
| 'JANJ780101': 60.100000, | |
| 'KARP850101': 1.117000, | |
| 'KYTJ820101': -3.500000, | |
| 'ROSM880101': 7.230000, | |
| 'VINM940101': 1.048000, | |
| 'WERD780101': 0.420000, | |
| 'ZIMJ680101': 0.090000, | |
| 'ZIMJ680104': 5.410000, | |
| }, | |
| 'D': { | |
| 'BIGC670101': 68.400000, | |
| 'CHAM820101': 0.105000, | |
| 'CHOP780201': 1.010000, | |
| 'CHOP780202': 0.540000, | |
| 'CHOP780203': 1.520000, | |
| 'EISD860101': -1.200000, | |
| 'FASG760101': 133.100000, | |
| 'FAUJ830101': -0.770000, | |
| 'GRAR740102': 13.000000, | |
| 'GRAR740103': 54.000000, | |
| 'GUYH850101': 0.780000, | |
| 'HOPT810101': 3.000000, | |
| 'JANJ780101': 60.600000, | |
| 'KARP850101': 1.033000, | |
| 'KYTJ820101': -3.500000, | |
| 'ROSM880101': 8.720000, | |
| 'VINM940101': 1.068000, | |
| 'WERD780101': 0.370000, | |
| 'ZIMJ680101': 0.640000, | |
| 'ZIMJ680104': 2.770000, | |
| }, | |
| 'C': { | |
| 'BIGC670101': 68.300000, | |
| 'CHAM820101': 0.128000, | |
| 'CHOP780201': 0.700000, | |
| 'CHOP780202': 1.190000, | |
| 'CHOP780203': 0.960000, | |
| 'EISD860101': 0.380000, | |
| 'FASG760101': 121.150000, | |
| 'FAUJ830101': 1.540000, | |
| 'GRAR740102': 5.500000, | |
| 'GRAR740103': 55.000000, | |
| 'GUYH850101': -1.420000, | |
| 'HOPT810101': -1.000000, | |
| 'JANJ780101': 15.500000, | |
| 'KARP850101': 0.960000, | |
| 'KYTJ820101': 2.500000, | |
| 'ROSM880101': -0.340000, | |
| 'VINM940101': 0.906000, | |
| 'WERD780101': 0.830000, | |
| 'ZIMJ680101': 1.480000, | |
| 'ZIMJ680104': 5.050000, | |
| }, | |
| 'Q': { | |
| 'BIGC670101': 89.700000, | |
| 'CHAM820101': 0.180000, | |
| 'CHOP780201': 1.110000, | |
| 'CHOP780202': 1.100000, | |
| 'CHOP780203': 0.960000, | |
| 'EISD860101': -0.220000, | |
| 'FASG760101': 146.150000, | |
| 'FAUJ830101': -0.220000, | |
| 'GRAR740102': 10.500000, | |
| 'GRAR740103': 85.000000, | |
| 'GUYH850101': 0.950000, | |
| 'HOPT810101': 0.200000, | |
| 'JANJ780101': 68.700000, | |
| 'KARP850101': 1.165000, | |
| 'KYTJ820101': -3.500000, | |
| 'ROSM880101': 6.390000, | |
| 'VINM940101': 1.037000, | |
| 'WERD780101': 0.350000, | |
| 'ZIMJ680101': 0.000000, | |
| 'ZIMJ680104': 5.650000, | |
| }, | |
| 'E': { | |
| 'BIGC670101': 84.700000, | |
| 'CHAM820101': 0.151000, | |
| 'CHOP780201': 1.510000, | |
| 'CHOP780202': 0.370000, | |
| 'CHOP780203': 0.950000, | |
| 'EISD860101': -0.760000, | |
| 'FASG760101': 147.130000, | |
| 'FAUJ830101': -0.640000, | |
| 'GRAR740102': 12.300000, | |
| 'GRAR740103': 83.000000, | |
| 'GUYH850101': 0.830000, | |
| 'HOPT810101': 3.000000, | |
| 'JANJ780101': 68.200000, | |
| 'KARP850101': 1.094000, | |
| 'KYTJ820101': -3.500000, | |
| 'ROSM880101': 7.350000, | |
| 'VINM940101': 1.094000, | |
| 'WERD780101': 0.380000, | |
| 'ZIMJ680101': 0.650000, | |
| 'ZIMJ680104': 3.220000, | |
| }, | |
| 'G': { | |
| 'BIGC670101': 36.300000, | |
| 'CHAM820101': 0.000000, | |
| 'CHOP780201': 0.570000, | |
| 'CHOP780202': 0.750000, | |
| 'CHOP780203': 1.560000, | |
| 'EISD860101': 0.000000, | |
| 'FASG760101': 75.070000, | |
| 'FAUJ830101': 0.000000, | |
| 'GRAR740102': 9.000000, | |
| 'GRAR740103': 3.000000, | |
| 'GUYH850101': 0.330000, | |
| 'HOPT810101': 0.000000, | |
| 'JANJ780101': 24.500000, | |
| 'KARP850101': 1.142000, | |
| 'KYTJ820101': -0.400000, | |
| 'ROSM880101': 0.000000, | |
| 'VINM940101': 1.031000, | |
| 'WERD780101': 0.410000, | |
| 'ZIMJ680101': 0.100000, | |
| 'ZIMJ680104': 5.970000, | |
| }, | |
| 'H': { | |
| 'BIGC670101': 91.900000, | |
| 'CHAM820101': 0.230000, | |
| 'CHOP780201': 1.000000, | |
| 'CHOP780202': 0.870000, | |
| 'CHOP780203': 0.950000, | |
| 'EISD860101': 0.640000, | |
| 'FASG760101': 155.160000, | |
| 'FAUJ830101': 0.130000, | |
| 'GRAR740102': 10.400000, | |
| 'GRAR740103': 96.000000, | |
| 'GUYH850101': -0.500000, | |
| 'HOPT810101': -0.500000, | |
| 'JANJ780101': 50.700000, | |
| 'KARP850101': 0.982000, | |
| 'KYTJ820101': -3.200000, | |
| 'ROSM880101': 3.820000, | |
| 'VINM940101': 0.950000, | |
| 'WERD780101': 0.700000, | |
| 'ZIMJ680101': 1.100000, | |
| 'ZIMJ680104': 7.590000, | |
| }, | |
| 'I': { | |
| 'BIGC670101': 102.000000, | |
| 'CHAM820101': 0.186000, | |
| 'CHOP780201': 1.080000, | |
| 'CHOP780202': 1.600000, | |
| 'CHOP780203': 0.470000, | |
| 'EISD860101': 1.900000, | |
| 'FASG760101': 131.170000, | |
| 'FAUJ830101': 1.800000, | |
| 'GRAR740102': 5.200000, | |
| 'GRAR740103': 111.000000, | |
| 'GUYH850101': -1.130000, | |
| 'HOPT810101': -1.800000, | |
| 'JANJ780101': 22.800000, | |
| 'KARP850101': 1.002000, | |
| 'KYTJ820101': 4.500000, | |
| 'ROSM880101': -3.020000, | |
| 'VINM940101': 0.927000, | |
| 'WERD780101': 0.790000, | |
| 'ZIMJ680101': 3.070000, | |
| 'ZIMJ680104': 6.020000, | |
| }, | |
| 'L': { | |
| 'BIGC670101': 102.000000, | |
| 'CHAM820101': 0.186000, | |
| 'CHOP780201': 1.210000, | |
| 'CHOP780202': 1.300000, | |
| 'CHOP780203': 0.500000, | |
| 'EISD860101': 1.900000, | |
| 'FASG760101': 131.170000, | |
| 'FAUJ830101': 1.700000, | |
| 'GRAR740102': 4.900000, | |
| 'GRAR740103': 111.000000, | |
| 'GUYH850101': -1.180000, | |
| 'HOPT810101': -1.800000, | |
| 'JANJ780101': 27.600000, | |
| 'KARP850101': 0.967000, | |
| 'KYTJ820101': 3.800000, | |
| 'ROSM880101': -3.020000, | |
| 'VINM940101': 0.935000, | |
| 'WERD780101': 0.770000, | |
| 'ZIMJ680101': 2.520000, | |
| 'ZIMJ680104': 5.980000, | |
| }, | |
| 'K': { | |
| 'BIGC670101': 105.100000, | |
| 'CHAM820101': 0.219000, | |
| 'CHOP780201': 1.160000, | |
| 'CHOP780202': 0.740000, | |
| 'CHOP780203': 1.190000, | |
| 'EISD860101': -0.570000, | |
| 'FASG760101': 146.190000, | |
| 'FAUJ830101': -0.990000, | |
| 'GRAR740102': 11.300000, | |
| 'GRAR740103': 119.000000, | |
| 'GUYH850101': 1.400000, | |
| 'HOPT810101': 3.000000, | |
| 'JANJ780101': 103.000000, | |
| 'KARP850101': 1.093000, | |
| 'KYTJ820101': -3.900000, | |
| 'ROSM880101': 6.130000, | |
| 'VINM940101': 1.102000, | |
| 'WERD780101': 0.310000, | |
| 'ZIMJ680101': 1.600000, | |
| 'ZIMJ680104': 9.740000, | |
| }, | |
| 'M': { | |
| 'BIGC670101': 97.700000, | |
| 'CHAM820101': 0.221000, | |
| 'CHOP780201': 1.450000, | |
| 'CHOP780202': 1.050000, | |
| 'CHOP780203': 0.600000, | |
| 'EISD860101': 2.400000, | |
| 'FASG760101': 149.210000, | |
| 'FAUJ830101': 1.230000, | |
| 'GRAR740102': 5.700000, | |
| 'GRAR740103': 105.000000, | |
| 'GUYH850101': -1.590000, | |
| 'HOPT810101': -1.300000, | |
| 'JANJ780101': 33.500000, | |
| 'KARP850101': 0.947000, | |
| 'KYTJ820101': 1.900000, | |
| 'ROSM880101': -1.300000, | |
| 'VINM940101': 0.952000, | |
| 'WERD780101': 0.760000, | |
| 'ZIMJ680101': 1.400000, | |
| 'ZIMJ680104': 5.740000, | |
| }, | |
| 'F': { | |
| 'BIGC670101': 113.900000, | |
| 'CHAM820101': 0.290000, | |
| 'CHOP780201': 1.130000, | |
| 'CHOP780202': 1.380000, | |
| 'CHOP780203': 0.660000, | |
| 'EISD860101': 2.300000, | |
| 'FASG760101': 165.190000, | |
| 'FAUJ830101': 1.790000, | |
| 'GRAR740102': 5.200000, | |
| 'GRAR740103': 132.000000, | |
| 'GUYH850101': -2.120000, | |
| 'HOPT810101': -2.500000, | |
| 'JANJ780101': 25.500000, | |
| 'KARP850101': 0.930000, | |
| 'KYTJ820101': 2.800000, | |
| 'ROSM880101': -3.240000, | |
| 'VINM940101': 0.915000, | |
| 'WERD780101': 0.870000, | |
| 'ZIMJ680101': 2.750000, | |
| 'ZIMJ680104': 5.480000, | |
| }, | |
| 'P': { | |
| 'BIGC670101': 73.600000, | |
| 'CHAM820101': 0.131000, | |
| 'CHOP780201': 0.570000, | |
| 'CHOP780202': 0.550000, | |
| 'CHOP780203': 1.560000, | |
| 'EISD860101': 1.200000, | |
| 'FASG760101': 115.130000, | |
| 'FAUJ830101': 0.720000, | |
| 'GRAR740102': 8.000000, | |
| 'GRAR740103': 32.500000, | |
| 'GUYH850101': 0.730000, | |
| 'HOPT810101': 0.000000, | |
| 'JANJ780101': 51.500000, | |
| 'KARP850101': 1.055000, | |
| 'KYTJ820101': -1.600000, | |
| 'ROSM880101': -1.750000, | |
| 'VINM940101': 1.049000, | |
| 'WERD780101': 0.350000, | |
| 'ZIMJ680101': 2.700000, | |
| 'ZIMJ680104': 6.300000, | |
| }, | |
| 'S': { | |
| 'BIGC670101': 54.900000, | |
| 'CHAM820101': 0.062000, | |
| 'CHOP780201': 0.770000, | |
| 'CHOP780202': 0.750000, | |
| 'CHOP780203': 1.430000, | |
| 'EISD860101': 0.010000, | |
| 'FASG760101': 105.090000, | |
| 'FAUJ830101': -0.040000, | |
| 'GRAR740102': 9.200000, | |
| 'GRAR740103': 32.000000, | |
| 'GUYH850101': 0.520000, | |
| 'HOPT810101': 0.300000, | |
| 'JANJ780101': 42.000000, | |
| 'KARP850101': 1.169000, | |
| 'KYTJ820101': -0.800000, | |
| 'ROSM880101': 4.350000, | |
| 'VINM940101': 1.046000, | |
| 'WERD780101': 0.490000, | |
| 'ZIMJ680101': 0.140000, | |
| 'ZIMJ680104': 5.680000, | |
| }, | |
| 'T': { | |
| 'BIGC670101': 71.200000, | |
| 'CHAM820101': 0.108000, | |
| 'CHOP780201': 0.830000, | |
| 'CHOP780202': 1.190000, | |
| 'CHOP780203': 0.980000, | |
| 'EISD860101': 0.520000, | |
| 'FASG760101': 119.120000, | |
| 'FAUJ830101': 0.260000, | |
| 'GRAR740102': 8.600000, | |
| 'GRAR740103': 61.000000, | |
| 'GUYH850101': 0.070000, | |
| 'HOPT810101': -0.400000, | |
| 'JANJ780101': 45.000000, | |
| 'KARP850101': 1.073000, | |
| 'KYTJ820101': -0.700000, | |
| 'ROSM880101': 3.860000, | |
| 'VINM940101': 0.997000, | |
| 'WERD780101': 0.380000, | |
| 'ZIMJ680101': 0.540000, | |
| 'ZIMJ680104': 5.660000, | |
| }, | |
| 'W': { | |
| 'BIGC670101': 135.400000, | |
| 'CHAM820101': 0.409000, | |
| 'CHOP780201': 1.080000, | |
| 'CHOP780202': 1.370000, | |
| 'CHOP780203': 0.600000, | |
| 'EISD860101': 2.600000, | |
| 'FASG760101': 204.240000, | |
| 'FAUJ830101': 2.250000, | |
| 'GRAR740102': 5.400000, | |
| 'GRAR740103': 170.000000, | |
| 'GUYH850101': -0.510000, | |
| 'HOPT810101': -3.400000, | |
| 'JANJ780101': 34.700000, | |
| 'KARP850101': 0.925000, | |
| 'KYTJ820101': -0.900000, | |
| 'ROSM880101': -2.860000, | |
| 'VINM940101': 0.904000, | |
| 'WERD780101': 0.860000, | |
| 'ZIMJ680101': 0.310000, | |
| 'ZIMJ680104': 5.890000, | |
| }, | |
| 'Y': { | |
| 'BIGC670101': 116.200000, | |
| 'CHAM820101': 0.298000, | |
| 'CHOP780201': 0.690000, | |
| 'CHOP780202': 1.470000, | |
| 'CHOP780203': 1.140000, | |
| 'EISD860101': 1.600000, | |
| 'FASG760101': 181.190000, | |
| 'FAUJ830101': 0.960000, | |
| 'GRAR740102': 6.200000, | |
| 'GRAR740103': 136.000000, | |
| 'GUYH850101': -0.210000, | |
| 'HOPT810101': -2.300000, | |
| 'JANJ780101': 55.200000, | |
| 'KARP850101': 0.961000, | |
| 'KYTJ820101': -1.300000, | |
| 'ROSM880101': 0.980000, | |
| 'VINM940101': 0.929000, | |
| 'WERD780101': 0.640000, | |
| 'ZIMJ680101': 2.970000, | |
| 'ZIMJ680104': 5.660000, | |
| }, | |
| 'V': { | |
| 'BIGC670101': 85.100000, | |
| 'CHAM820101': 0.140000, | |
| 'CHOP780201': 1.060000, | |
| 'CHOP780202': 1.700000, | |
| 'CHOP780203': 0.590000, | |
| 'EISD860101': 1.500000, | |
| 'FASG760101': 117.150000, | |
| 'FAUJ830101': 1.220000, | |
| 'GRAR740102': 5.900000, | |
| 'GRAR740103': 84.000000, | |
| 'GUYH850101': -1.270000, | |
| 'HOPT810101': -1.500000, | |
| 'JANJ780101': 23.700000, | |
| 'KARP850101': 0.982000, | |
| 'KYTJ820101': 4.200000, | |
| 'ROSM880101': -2.180000, | |
| 'VINM940101': 0.931000, | |
| 'WERD780101': 0.720000, | |
| 'ZIMJ680101': 1.790000, | |
| 'ZIMJ680104': 5.960000, | |
| }, | |
| } | |
| # Feature descriptions | |
| FEATURE_DESCRIPTIONS = { | |
| 'BIGC670101': 'Residue volume (Bigelow, 1967)', | |
| 'CHAM820101': 'Polarizability parameter (Charton-Charton, 1982)', | |
| 'CHOP780201': 'Normalized frequency of alpha-helix (Chou-Fasman, 1978b)', | |
| 'CHOP780202': 'Normalized frequency of beta-sheet (Chou-Fasman, 1978b)', | |
| 'CHOP780203': 'Normalized frequency of beta-turn (Chou-Fasman, 1978b)', | |
| 'EISD860101': 'Solvation free energy (Eisenberg-McLachlan, 1986)', | |
| 'FASG760101': 'Molecular weight (Fasman, 1976)', | |
| 'FAUJ830101': 'Hydrophobic parameter pi (Fauchere-Pliska, 1983)', | |
| 'GRAR740102': 'Polarity (Grantham, 1974)', | |
| 'GRAR740103': 'Volume (Grantham, 1974)', | |
| 'GUYH850101': 'Partition energy (Guy, 1985)', | |
| 'HOPT810101': 'Hydrophilicity value (Hopp-Woods, 1981)', | |
| 'JANJ780101': 'Average accessible surface area (Janin et al., 1978)', | |
| 'KARP850101': 'Flexibility parameter for no rigid neighbors (Karplus-Schulz, 1985)', | |
| 'KYTJ820101': 'Hydropathy index (Kyte-Doolittle, 1982)', | |
| 'ROSM880101': 'Side chain hydropathy, uncorrected for solvation (Roseman, 1988)', | |
| 'VINM940101': 'Normalized flexibility parameters (B-values), average (Vihinen et al., 1994)', | |
| 'WERD780101': 'Propensity to be buried inside (Wertz-Scheraga, 1978)', | |
| 'ZIMJ680101': 'Hydrophobicity (Zimmerman et al., 1968)', | |
| 'ZIMJ680104': 'Isoelectric point (Zimmerman et al., 1968)', | |
| } | |
| # Convert to numpy array | |
| def get_feature_vector(aa, feature_list=None): | |
| """ | |
| Get feature vector for an amino acid | |
| Args: | |
| aa: Amino acid single letter code | |
| feature_list: List of feature codes to include (None = all) | |
| Returns: | |
| numpy array of features | |
| """ | |
| if aa not in AA_PROPERTIES_AAINDEX: | |
| aa = "A" # Default to Alanine | |
| props = AA_PROPERTIES_AAINDEX[aa] | |
| if feature_list is None: | |
| feature_list = sorted(props.keys()) | |
| return np.array([props[f] for f in feature_list]) | |
| def get_sequence_features(sequence, feature_list=None): | |
| """Get feature matrix for a sequence [L, N_features]""" | |
| return np.array([get_feature_vector(aa, feature_list) for aa in sequence]) | |
| # Test | |
| if __name__ == "__main__": | |
| print("Loaded 20 features for 20 amino acids") | |
| print("\nExample: Alanine (A)") | |
| for key, value in list(AA_PROPERTIES_AAINDEX["A"].items())[:5]: | |
| print(f" {key}: {value:.4f} - {FEATURE_DESCRIPTIONS[key][:50]}") | |
| print("\nTest sequence features:") | |
| seq = "ARNDCQEG" | |
| features = get_sequence_features(seq) | |
| print(f" Sequence: {seq}") | |
| print(f" Feature matrix shape: {features.shape}") |