StriMap / src /aa_properties_aaindex.py
cao
Add model and predictor files
78f28d5
raw
history blame
16.1 kB
"""
Amino Acid Properties from AAindex Database
Auto-generated by AAindexDownloader
Total features: 20
"""
import numpy as np
# Raw values from AAindex
AA_PROPERTIES_AAINDEX = {
'A': {
'BIGC670101': 52.600000,
'CHAM820101': 0.046000,
'CHOP780201': 1.420000,
'CHOP780202': 0.830000,
'CHOP780203': 0.740000,
'EISD860101': 0.670000,
'FASG760101': 89.090000,
'FAUJ830101': 0.310000,
'GRAR740102': 8.100000,
'GRAR740103': 31.000000,
'GUYH850101': 0.100000,
'HOPT810101': -0.500000,
'JANJ780101': 27.800000,
'KARP850101': 1.041000,
'KYTJ820101': 1.800000,
'ROSM880101': -0.670000,
'VINM940101': 0.984000,
'WERD780101': 0.520000,
'ZIMJ680101': 0.830000,
'ZIMJ680104': 6.000000,
},
'R': {
'BIGC670101': 109.100000,
'CHAM820101': 0.291000,
'CHOP780201': 0.980000,
'CHOP780202': 0.930000,
'CHOP780203': 1.010000,
'EISD860101': -2.100000,
'FASG760101': 174.200000,
'FAUJ830101': -1.010000,
'GRAR740102': 10.500000,
'GRAR740103': 124.000000,
'GUYH850101': 1.910000,
'HOPT810101': 3.000000,
'JANJ780101': 94.700000,
'KARP850101': 1.038000,
'KYTJ820101': -4.500000,
'ROSM880101': 12.100000,
'VINM940101': 1.008000,
'WERD780101': 0.490000,
'ZIMJ680101': 0.830000,
'ZIMJ680104': 10.760000,
},
'N': {
'BIGC670101': 75.700000,
'CHAM820101': 0.134000,
'CHOP780201': 0.670000,
'CHOP780202': 0.890000,
'CHOP780203': 1.460000,
'EISD860101': -0.600000,
'FASG760101': 132.120000,
'FAUJ830101': -0.600000,
'GRAR740102': 11.600000,
'GRAR740103': 56.000000,
'GUYH850101': 0.480000,
'HOPT810101': 0.200000,
'JANJ780101': 60.100000,
'KARP850101': 1.117000,
'KYTJ820101': -3.500000,
'ROSM880101': 7.230000,
'VINM940101': 1.048000,
'WERD780101': 0.420000,
'ZIMJ680101': 0.090000,
'ZIMJ680104': 5.410000,
},
'D': {
'BIGC670101': 68.400000,
'CHAM820101': 0.105000,
'CHOP780201': 1.010000,
'CHOP780202': 0.540000,
'CHOP780203': 1.520000,
'EISD860101': -1.200000,
'FASG760101': 133.100000,
'FAUJ830101': -0.770000,
'GRAR740102': 13.000000,
'GRAR740103': 54.000000,
'GUYH850101': 0.780000,
'HOPT810101': 3.000000,
'JANJ780101': 60.600000,
'KARP850101': 1.033000,
'KYTJ820101': -3.500000,
'ROSM880101': 8.720000,
'VINM940101': 1.068000,
'WERD780101': 0.370000,
'ZIMJ680101': 0.640000,
'ZIMJ680104': 2.770000,
},
'C': {
'BIGC670101': 68.300000,
'CHAM820101': 0.128000,
'CHOP780201': 0.700000,
'CHOP780202': 1.190000,
'CHOP780203': 0.960000,
'EISD860101': 0.380000,
'FASG760101': 121.150000,
'FAUJ830101': 1.540000,
'GRAR740102': 5.500000,
'GRAR740103': 55.000000,
'GUYH850101': -1.420000,
'HOPT810101': -1.000000,
'JANJ780101': 15.500000,
'KARP850101': 0.960000,
'KYTJ820101': 2.500000,
'ROSM880101': -0.340000,
'VINM940101': 0.906000,
'WERD780101': 0.830000,
'ZIMJ680101': 1.480000,
'ZIMJ680104': 5.050000,
},
'Q': {
'BIGC670101': 89.700000,
'CHAM820101': 0.180000,
'CHOP780201': 1.110000,
'CHOP780202': 1.100000,
'CHOP780203': 0.960000,
'EISD860101': -0.220000,
'FASG760101': 146.150000,
'FAUJ830101': -0.220000,
'GRAR740102': 10.500000,
'GRAR740103': 85.000000,
'GUYH850101': 0.950000,
'HOPT810101': 0.200000,
'JANJ780101': 68.700000,
'KARP850101': 1.165000,
'KYTJ820101': -3.500000,
'ROSM880101': 6.390000,
'VINM940101': 1.037000,
'WERD780101': 0.350000,
'ZIMJ680101': 0.000000,
'ZIMJ680104': 5.650000,
},
'E': {
'BIGC670101': 84.700000,
'CHAM820101': 0.151000,
'CHOP780201': 1.510000,
'CHOP780202': 0.370000,
'CHOP780203': 0.950000,
'EISD860101': -0.760000,
'FASG760101': 147.130000,
'FAUJ830101': -0.640000,
'GRAR740102': 12.300000,
'GRAR740103': 83.000000,
'GUYH850101': 0.830000,
'HOPT810101': 3.000000,
'JANJ780101': 68.200000,
'KARP850101': 1.094000,
'KYTJ820101': -3.500000,
'ROSM880101': 7.350000,
'VINM940101': 1.094000,
'WERD780101': 0.380000,
'ZIMJ680101': 0.650000,
'ZIMJ680104': 3.220000,
},
'G': {
'BIGC670101': 36.300000,
'CHAM820101': 0.000000,
'CHOP780201': 0.570000,
'CHOP780202': 0.750000,
'CHOP780203': 1.560000,
'EISD860101': 0.000000,
'FASG760101': 75.070000,
'FAUJ830101': 0.000000,
'GRAR740102': 9.000000,
'GRAR740103': 3.000000,
'GUYH850101': 0.330000,
'HOPT810101': 0.000000,
'JANJ780101': 24.500000,
'KARP850101': 1.142000,
'KYTJ820101': -0.400000,
'ROSM880101': 0.000000,
'VINM940101': 1.031000,
'WERD780101': 0.410000,
'ZIMJ680101': 0.100000,
'ZIMJ680104': 5.970000,
},
'H': {
'BIGC670101': 91.900000,
'CHAM820101': 0.230000,
'CHOP780201': 1.000000,
'CHOP780202': 0.870000,
'CHOP780203': 0.950000,
'EISD860101': 0.640000,
'FASG760101': 155.160000,
'FAUJ830101': 0.130000,
'GRAR740102': 10.400000,
'GRAR740103': 96.000000,
'GUYH850101': -0.500000,
'HOPT810101': -0.500000,
'JANJ780101': 50.700000,
'KARP850101': 0.982000,
'KYTJ820101': -3.200000,
'ROSM880101': 3.820000,
'VINM940101': 0.950000,
'WERD780101': 0.700000,
'ZIMJ680101': 1.100000,
'ZIMJ680104': 7.590000,
},
'I': {
'BIGC670101': 102.000000,
'CHAM820101': 0.186000,
'CHOP780201': 1.080000,
'CHOP780202': 1.600000,
'CHOP780203': 0.470000,
'EISD860101': 1.900000,
'FASG760101': 131.170000,
'FAUJ830101': 1.800000,
'GRAR740102': 5.200000,
'GRAR740103': 111.000000,
'GUYH850101': -1.130000,
'HOPT810101': -1.800000,
'JANJ780101': 22.800000,
'KARP850101': 1.002000,
'KYTJ820101': 4.500000,
'ROSM880101': -3.020000,
'VINM940101': 0.927000,
'WERD780101': 0.790000,
'ZIMJ680101': 3.070000,
'ZIMJ680104': 6.020000,
},
'L': {
'BIGC670101': 102.000000,
'CHAM820101': 0.186000,
'CHOP780201': 1.210000,
'CHOP780202': 1.300000,
'CHOP780203': 0.500000,
'EISD860101': 1.900000,
'FASG760101': 131.170000,
'FAUJ830101': 1.700000,
'GRAR740102': 4.900000,
'GRAR740103': 111.000000,
'GUYH850101': -1.180000,
'HOPT810101': -1.800000,
'JANJ780101': 27.600000,
'KARP850101': 0.967000,
'KYTJ820101': 3.800000,
'ROSM880101': -3.020000,
'VINM940101': 0.935000,
'WERD780101': 0.770000,
'ZIMJ680101': 2.520000,
'ZIMJ680104': 5.980000,
},
'K': {
'BIGC670101': 105.100000,
'CHAM820101': 0.219000,
'CHOP780201': 1.160000,
'CHOP780202': 0.740000,
'CHOP780203': 1.190000,
'EISD860101': -0.570000,
'FASG760101': 146.190000,
'FAUJ830101': -0.990000,
'GRAR740102': 11.300000,
'GRAR740103': 119.000000,
'GUYH850101': 1.400000,
'HOPT810101': 3.000000,
'JANJ780101': 103.000000,
'KARP850101': 1.093000,
'KYTJ820101': -3.900000,
'ROSM880101': 6.130000,
'VINM940101': 1.102000,
'WERD780101': 0.310000,
'ZIMJ680101': 1.600000,
'ZIMJ680104': 9.740000,
},
'M': {
'BIGC670101': 97.700000,
'CHAM820101': 0.221000,
'CHOP780201': 1.450000,
'CHOP780202': 1.050000,
'CHOP780203': 0.600000,
'EISD860101': 2.400000,
'FASG760101': 149.210000,
'FAUJ830101': 1.230000,
'GRAR740102': 5.700000,
'GRAR740103': 105.000000,
'GUYH850101': -1.590000,
'HOPT810101': -1.300000,
'JANJ780101': 33.500000,
'KARP850101': 0.947000,
'KYTJ820101': 1.900000,
'ROSM880101': -1.300000,
'VINM940101': 0.952000,
'WERD780101': 0.760000,
'ZIMJ680101': 1.400000,
'ZIMJ680104': 5.740000,
},
'F': {
'BIGC670101': 113.900000,
'CHAM820101': 0.290000,
'CHOP780201': 1.130000,
'CHOP780202': 1.380000,
'CHOP780203': 0.660000,
'EISD860101': 2.300000,
'FASG760101': 165.190000,
'FAUJ830101': 1.790000,
'GRAR740102': 5.200000,
'GRAR740103': 132.000000,
'GUYH850101': -2.120000,
'HOPT810101': -2.500000,
'JANJ780101': 25.500000,
'KARP850101': 0.930000,
'KYTJ820101': 2.800000,
'ROSM880101': -3.240000,
'VINM940101': 0.915000,
'WERD780101': 0.870000,
'ZIMJ680101': 2.750000,
'ZIMJ680104': 5.480000,
},
'P': {
'BIGC670101': 73.600000,
'CHAM820101': 0.131000,
'CHOP780201': 0.570000,
'CHOP780202': 0.550000,
'CHOP780203': 1.560000,
'EISD860101': 1.200000,
'FASG760101': 115.130000,
'FAUJ830101': 0.720000,
'GRAR740102': 8.000000,
'GRAR740103': 32.500000,
'GUYH850101': 0.730000,
'HOPT810101': 0.000000,
'JANJ780101': 51.500000,
'KARP850101': 1.055000,
'KYTJ820101': -1.600000,
'ROSM880101': -1.750000,
'VINM940101': 1.049000,
'WERD780101': 0.350000,
'ZIMJ680101': 2.700000,
'ZIMJ680104': 6.300000,
},
'S': {
'BIGC670101': 54.900000,
'CHAM820101': 0.062000,
'CHOP780201': 0.770000,
'CHOP780202': 0.750000,
'CHOP780203': 1.430000,
'EISD860101': 0.010000,
'FASG760101': 105.090000,
'FAUJ830101': -0.040000,
'GRAR740102': 9.200000,
'GRAR740103': 32.000000,
'GUYH850101': 0.520000,
'HOPT810101': 0.300000,
'JANJ780101': 42.000000,
'KARP850101': 1.169000,
'KYTJ820101': -0.800000,
'ROSM880101': 4.350000,
'VINM940101': 1.046000,
'WERD780101': 0.490000,
'ZIMJ680101': 0.140000,
'ZIMJ680104': 5.680000,
},
'T': {
'BIGC670101': 71.200000,
'CHAM820101': 0.108000,
'CHOP780201': 0.830000,
'CHOP780202': 1.190000,
'CHOP780203': 0.980000,
'EISD860101': 0.520000,
'FASG760101': 119.120000,
'FAUJ830101': 0.260000,
'GRAR740102': 8.600000,
'GRAR740103': 61.000000,
'GUYH850101': 0.070000,
'HOPT810101': -0.400000,
'JANJ780101': 45.000000,
'KARP850101': 1.073000,
'KYTJ820101': -0.700000,
'ROSM880101': 3.860000,
'VINM940101': 0.997000,
'WERD780101': 0.380000,
'ZIMJ680101': 0.540000,
'ZIMJ680104': 5.660000,
},
'W': {
'BIGC670101': 135.400000,
'CHAM820101': 0.409000,
'CHOP780201': 1.080000,
'CHOP780202': 1.370000,
'CHOP780203': 0.600000,
'EISD860101': 2.600000,
'FASG760101': 204.240000,
'FAUJ830101': 2.250000,
'GRAR740102': 5.400000,
'GRAR740103': 170.000000,
'GUYH850101': -0.510000,
'HOPT810101': -3.400000,
'JANJ780101': 34.700000,
'KARP850101': 0.925000,
'KYTJ820101': -0.900000,
'ROSM880101': -2.860000,
'VINM940101': 0.904000,
'WERD780101': 0.860000,
'ZIMJ680101': 0.310000,
'ZIMJ680104': 5.890000,
},
'Y': {
'BIGC670101': 116.200000,
'CHAM820101': 0.298000,
'CHOP780201': 0.690000,
'CHOP780202': 1.470000,
'CHOP780203': 1.140000,
'EISD860101': 1.600000,
'FASG760101': 181.190000,
'FAUJ830101': 0.960000,
'GRAR740102': 6.200000,
'GRAR740103': 136.000000,
'GUYH850101': -0.210000,
'HOPT810101': -2.300000,
'JANJ780101': 55.200000,
'KARP850101': 0.961000,
'KYTJ820101': -1.300000,
'ROSM880101': 0.980000,
'VINM940101': 0.929000,
'WERD780101': 0.640000,
'ZIMJ680101': 2.970000,
'ZIMJ680104': 5.660000,
},
'V': {
'BIGC670101': 85.100000,
'CHAM820101': 0.140000,
'CHOP780201': 1.060000,
'CHOP780202': 1.700000,
'CHOP780203': 0.590000,
'EISD860101': 1.500000,
'FASG760101': 117.150000,
'FAUJ830101': 1.220000,
'GRAR740102': 5.900000,
'GRAR740103': 84.000000,
'GUYH850101': -1.270000,
'HOPT810101': -1.500000,
'JANJ780101': 23.700000,
'KARP850101': 0.982000,
'KYTJ820101': 4.200000,
'ROSM880101': -2.180000,
'VINM940101': 0.931000,
'WERD780101': 0.720000,
'ZIMJ680101': 1.790000,
'ZIMJ680104': 5.960000,
},
}
# Feature descriptions
FEATURE_DESCRIPTIONS = {
'BIGC670101': 'Residue volume (Bigelow, 1967)',
'CHAM820101': 'Polarizability parameter (Charton-Charton, 1982)',
'CHOP780201': 'Normalized frequency of alpha-helix (Chou-Fasman, 1978b)',
'CHOP780202': 'Normalized frequency of beta-sheet (Chou-Fasman, 1978b)',
'CHOP780203': 'Normalized frequency of beta-turn (Chou-Fasman, 1978b)',
'EISD860101': 'Solvation free energy (Eisenberg-McLachlan, 1986)',
'FASG760101': 'Molecular weight (Fasman, 1976)',
'FAUJ830101': 'Hydrophobic parameter pi (Fauchere-Pliska, 1983)',
'GRAR740102': 'Polarity (Grantham, 1974)',
'GRAR740103': 'Volume (Grantham, 1974)',
'GUYH850101': 'Partition energy (Guy, 1985)',
'HOPT810101': 'Hydrophilicity value (Hopp-Woods, 1981)',
'JANJ780101': 'Average accessible surface area (Janin et al., 1978)',
'KARP850101': 'Flexibility parameter for no rigid neighbors (Karplus-Schulz, 1985)',
'KYTJ820101': 'Hydropathy index (Kyte-Doolittle, 1982)',
'ROSM880101': 'Side chain hydropathy, uncorrected for solvation (Roseman, 1988)',
'VINM940101': 'Normalized flexibility parameters (B-values), average (Vihinen et al., 1994)',
'WERD780101': 'Propensity to be buried inside (Wertz-Scheraga, 1978)',
'ZIMJ680101': 'Hydrophobicity (Zimmerman et al., 1968)',
'ZIMJ680104': 'Isoelectric point (Zimmerman et al., 1968)',
}
# Convert to numpy array
def get_feature_vector(aa, feature_list=None):
"""
Get feature vector for an amino acid
Args:
aa: Amino acid single letter code
feature_list: List of feature codes to include (None = all)
Returns:
numpy array of features
"""
if aa not in AA_PROPERTIES_AAINDEX:
aa = "A" # Default to Alanine
props = AA_PROPERTIES_AAINDEX[aa]
if feature_list is None:
feature_list = sorted(props.keys())
return np.array([props[f] for f in feature_list])
def get_sequence_features(sequence, feature_list=None):
"""Get feature matrix for a sequence [L, N_features]"""
return np.array([get_feature_vector(aa, feature_list) for aa in sequence])
# Test
if __name__ == "__main__":
print("Loaded 20 features for 20 amino acids")
print("\nExample: Alanine (A)")
for key, value in list(AA_PROPERTIES_AAINDEX["A"].items())[:5]:
print(f" {key}: {value:.4f} - {FEATURE_DESCRIPTIONS[key][:50]}")
print("\nTest sequence features:")
seq = "ARNDCQEG"
features = get_sequence_features(seq)
print(f" Sequence: {seq}")
print(f" Feature matrix shape: {features.shape}")