1 |
package proteinstructure; |
2 |
|
3 |
import java.io.BufferedReader; |
4 |
import java.io.IOException; |
5 |
import java.io.InputStream; |
6 |
import java.io.InputStreamReader; |
7 |
import java.util.HashMap; |
8 |
import java.util.Map; |
9 |
import java.util.Set; |
10 |
import java.util.TreeMap; |
11 |
import java.util.TreeSet; |
12 |
import java.util.regex.Matcher; |
13 |
import java.util.regex.Pattern; |
14 |
|
15 |
/** |
16 |
* Class with static methods to get aminoacids and contact type information |
17 |
* example usage: |
18 |
* AAinfo.isValidContactType("Cg"); // returns true |
19 |
* AAinfo.three2oneletter("ALA"); // returns "A" |
20 |
* |
21 |
* The contact types and aas2atoms data are defined in separate text file contactTypes.dat |
22 |
* New contact types can be added simply by editing the file |
23 |
* |
24 |
* Beware that everything is static in this file. The JVM will initialise the static |
25 |
* variables when they are first called and keep them as if if the static class itself |
26 |
* was a global instantiated object |
27 |
*/ |
28 |
public class AAinfo { |
29 |
|
30 |
/*--------------------------- constants ------------------------------*/ |
31 |
// file with contact type definitions |
32 |
// refers to root of the aglappe package |
33 |
private static final String CT_DEFS_FILE = "/proteinstructure/contactTypes.dat"; |
34 |
|
35 |
// lower bound distances used for our ConstraintsMaker class |
36 |
// from our "empirical" calculations |
37 |
public static final double BB_DIAMETER_GYRATION=4.6; |
38 |
public static final double DIST_MIN_CA=2.8; |
39 |
// "guessed" general min distance from hydrogen the hydrogen bond length (we used it in Cb and Cg) |
40 |
public static final double DIST_MIN=2.6; |
41 |
|
42 |
|
43 |
/*----------------------- member variables ---------------------------*/ |
44 |
private final static Map<String,Double> lowerBoundDistances = initialiseLowerBoundDistances(); |
45 |
|
46 |
private final static Map<String,String> one2threeletter = initialiseOne2threeletter(); |
47 |
private final static Map<String,String> three2oneletter = initialiseThree2oneletter(); |
48 |
private final static Set<String> aas = initialiseAAs(); |
49 |
|
50 |
private final static Map<String,ContactType> cts = initialiseCTsFromFile(); |
51 |
|
52 |
private final static Map<String,Set<String>> aas2atoms = initialiseAas2atoms(); // depends on cts |
53 |
|
54 |
private final static Map<String,String> fullname2threeletter = initialiseFullNames2Threeletter(); |
55 |
|
56 |
|
57 |
/*----------------------- private methods ----------------------------*/ |
58 |
private static Map<String,Double> initialiseLowerBoundDistances() { |
59 |
Map<String,Double> lowerBoundDistances = new HashMap<String, Double>(); |
60 |
lowerBoundDistances.put("Ca", DIST_MIN_CA); |
61 |
lowerBoundDistances.put("Cb", DIST_MIN); |
62 |
lowerBoundDistances.put("Cg", DIST_MIN); |
63 |
lowerBoundDistances.put("C", DIST_MIN_CA); |
64 |
return lowerBoundDistances; |
65 |
} |
66 |
|
67 |
private static Map<String,String> initialiseOne2threeletter() { |
68 |
Map<String,String> one2threeletter = new HashMap<String,String>(); |
69 |
one2threeletter.put("C", "CYS"); |
70 |
one2threeletter.put("D", "ASP"); |
71 |
one2threeletter.put("S", "SER"); |
72 |
one2threeletter.put("Q", "GLN"); |
73 |
one2threeletter.put("K", "LYS"); |
74 |
one2threeletter.put("I", "ILE"); |
75 |
one2threeletter.put("P", "PRO"); |
76 |
one2threeletter.put("T", "THR"); |
77 |
one2threeletter.put("F", "PHE"); |
78 |
one2threeletter.put("A", "ALA"); |
79 |
one2threeletter.put("G", "GLY"); |
80 |
one2threeletter.put("H", "HIS"); |
81 |
one2threeletter.put("E", "GLU"); |
82 |
one2threeletter.put("L", "LEU"); |
83 |
one2threeletter.put("R", "ARG"); |
84 |
one2threeletter.put("W", "TRP"); |
85 |
one2threeletter.put("V", "VAL"); |
86 |
one2threeletter.put("N", "ASN"); |
87 |
one2threeletter.put("Y", "TYR"); |
88 |
one2threeletter.put("M", "MET"); |
89 |
return one2threeletter; |
90 |
} |
91 |
|
92 |
private static Map<String,String> initialiseThree2oneletter() { |
93 |
Map<String,String> three2oneletter = new HashMap<String,String>(); |
94 |
three2oneletter.put("CYS", "C"); |
95 |
three2oneletter.put("ASP", "D"); |
96 |
three2oneletter.put("SER", "S"); |
97 |
three2oneletter.put("GLN", "Q"); |
98 |
three2oneletter.put("LYS", "K"); |
99 |
three2oneletter.put("ILE", "I"); |
100 |
three2oneletter.put("PRO", "P"); |
101 |
three2oneletter.put("THR", "T"); |
102 |
three2oneletter.put("PHE", "F"); |
103 |
three2oneletter.put("ALA", "A"); |
104 |
three2oneletter.put("GLY", "G"); |
105 |
three2oneletter.put("HIS", "H"); |
106 |
three2oneletter.put("GLU", "E"); |
107 |
three2oneletter.put("LEU", "L"); |
108 |
three2oneletter.put("ARG", "R"); |
109 |
three2oneletter.put("TRP", "W"); |
110 |
three2oneletter.put("VAL", "V"); |
111 |
three2oneletter.put("ASN", "N"); |
112 |
three2oneletter.put("TYR", "Y"); |
113 |
three2oneletter.put("MET", "M"); |
114 |
return three2oneletter; |
115 |
} |
116 |
|
117 |
private static Map<String,String> initialiseFullNames2Threeletter() { |
118 |
Map<String,String> fullnames2threeletter = new HashMap<String,String>(); |
119 |
fullnames2threeletter.put("Alanine","ALA"); |
120 |
fullnames2threeletter.put("Arginine","ARG"); |
121 |
fullnames2threeletter.put("Asparagine","ASN"); |
122 |
fullnames2threeletter.put("Aspartic Acid","ASP"); |
123 |
fullnames2threeletter.put("Cysteine","CYS"); |
124 |
fullnames2threeletter.put("Glutamic Acid","GLU"); |
125 |
fullnames2threeletter.put("Glutamine","GLN"); |
126 |
fullnames2threeletter.put("Glycine","GLY"); |
127 |
fullnames2threeletter.put("Histidine","HIS"); |
128 |
fullnames2threeletter.put("Isoleucine","ILE"); |
129 |
fullnames2threeletter.put("Leucine","LEU"); |
130 |
fullnames2threeletter.put("Lysine","LYS"); |
131 |
fullnames2threeletter.put("Methionine","MET"); |
132 |
fullnames2threeletter.put("Phenylalanine","PHE"); |
133 |
fullnames2threeletter.put("Proline","PRO"); |
134 |
fullnames2threeletter.put("Serine","SER"); |
135 |
fullnames2threeletter.put("Threonine","THR"); |
136 |
fullnames2threeletter.put("Tryptophan","TRP"); |
137 |
fullnames2threeletter.put("Tyrosine","TYR"); |
138 |
fullnames2threeletter.put("Valine","VAL"); |
139 |
return fullnames2threeletter; |
140 |
} |
141 |
|
142 |
private static Set<String> initialiseAAs() { |
143 |
Set<String> aas = new TreeSet<String>(); |
144 |
aas.add("TRP"); |
145 |
aas.add("CYS"); |
146 |
aas.add("GLN"); |
147 |
aas.add("ALA"); |
148 |
aas.add("VAL"); |
149 |
aas.add("LEU"); |
150 |
aas.add("ASP"); |
151 |
aas.add("SER"); |
152 |
aas.add("PRO"); |
153 |
aas.add("THR"); |
154 |
aas.add("PHE"); |
155 |
aas.add("ARG"); |
156 |
aas.add("LYS"); |
157 |
aas.add("MET"); |
158 |
aas.add("HIS"); |
159 |
aas.add("GLY"); |
160 |
aas.add("ILE"); |
161 |
aas.add("ASN"); |
162 |
aas.add("GLU"); |
163 |
aas.add("TYR"); |
164 |
return aas; |
165 |
} |
166 |
|
167 |
private static Map<String,Set<String>> initialiseAas2atoms() { |
168 |
Map<String,Set<String>> aas2atoms = new HashMap<String, Set<String>>(); |
169 |
aas2atoms = cts.get("ALL"); |
170 |
return aas2atoms; |
171 |
} |
172 |
|
173 |
private static Map<String,ContactType> initialiseCTsFromFile() { |
174 |
Map<String,ContactType> cts = new TreeMap<String,ContactType>(); |
175 |
|
176 |
InputStream inp = Runtime.getRuntime().getClass().getResourceAsStream(CT_DEFS_FILE); |
177 |
BufferedReader br = new BufferedReader(new InputStreamReader(inp)); |
178 |
String line; |
179 |
try { |
180 |
ContactType contactType = null; |
181 |
String ct = ""; |
182 |
boolean multiAtom = false; |
183 |
while ((line = br.readLine())!= null) { |
184 |
// skip comments and empty lines |
185 |
if (line.startsWith("#")) continue; |
186 |
if (line.trim().equals("")) continue; |
187 |
if (line.startsWith(">")){ |
188 |
if (!ct.equals("")) { // except for first ct put last res2atoms HashMap for the last ct |
189 |
cts.put(ct, contactType); |
190 |
} |
191 |
Pattern p = Pattern.compile("^>\\s(\\w+)\\s(\\w+)$"); |
192 |
Matcher m = p.matcher(line); |
193 |
if (m.matches()){ |
194 |
ct = m.group(1); |
195 |
String type = m.group(2); |
196 |
if (type.equals("multi")) { |
197 |
multiAtom = true ; |
198 |
} else { |
199 |
multiAtom = false; |
200 |
} |
201 |
} |
202 |
contactType = new ContactType(ct,multiAtom); |
203 |
} else { // for all other lines |
204 |
String aa = line.substring(0,3); |
205 |
String atomsStr = line.substring(4).trim(); |
206 |
String[] atomsArray = new String[0]; // initialisation to empty array |
207 |
if (!atomsStr.equals("")) { // if not atomsArray stays empty (for cases of no atoms for a given residue) |
208 |
atomsArray = atomsStr.split("\\s"); |
209 |
} |
210 |
Set<String> atoms = new TreeSet<String>(); |
211 |
for (String atom: atomsArray) { |
212 |
atoms.add(atom); // if atomsArray was empty then atoms will be an empty (not null) Set |
213 |
} |
214 |
contactType.put(aa, atoms); |
215 |
} |
216 |
} |
217 |
cts.put(ct, contactType); |
218 |
} catch (IOException e) { |
219 |
System.err.println("IO error while reading contact types definition file: "+CT_DEFS_FILE+". Exiting."); |
220 |
System.err.println("Error was: "+e.getMessage()); |
221 |
System.exit(1); |
222 |
} |
223 |
return cts; |
224 |
} |
225 |
|
226 |
/*----------------------- public methods ---------------------------*/ |
227 |
|
228 |
/** |
229 |
* Given a three letter code returns true if is a standard aminoacid |
230 |
*/ |
231 |
public static boolean isValidAA(String three) { |
232 |
return aas.contains(three); |
233 |
} |
234 |
|
235 |
/** |
236 |
* Gets all three letter code standard aminoacids in a Set |
237 |
* @return |
238 |
*/ |
239 |
public static Set<String> getAAs() { |
240 |
return aas; |
241 |
} |
242 |
|
243 |
/** |
244 |
* Gets all contact type names in a Set |
245 |
* @return |
246 |
*/ |
247 |
public static Set<String> getAllContactTypes() { |
248 |
return cts.keySet(); |
249 |
} |
250 |
|
251 |
/** |
252 |
* Gets all single atom contact types in a Set |
253 |
* @return |
254 |
*/ |
255 |
public static Set<String> getSingleAtomContactTypes() { |
256 |
Set<String> singleAtomCts = new TreeSet<String>(); |
257 |
for (ContactType contactType:cts.values()) { |
258 |
if (!contactType.isMultiAtom()) singleAtomCts.add(contactType.getName()); |
259 |
} |
260 |
return singleAtomCts; |
261 |
} |
262 |
|
263 |
/** |
264 |
* Gets all multiple atom contact types in a Set |
265 |
* @return |
266 |
*/ |
267 |
public static Set<String> getMultiAtomContactTypes() { |
268 |
Set<String> multiAtomCts = new TreeSet<String>(); |
269 |
for (ContactType contactType:cts.values()) { |
270 |
if (contactType.isMultiAtom()) multiAtomCts.add(contactType.getName()); |
271 |
} |
272 |
return multiAtomCts; |
273 |
|
274 |
} |
275 |
|
276 |
/** |
277 |
* Returns true if ct is a valid contact type name |
278 |
* Crossed contacts (e.g. BB/SC or Ca/Cg) will also be valid |
279 |
* @param ct |
280 |
* @return |
281 |
*/ |
282 |
public static boolean isValidContactType(String ct){ |
283 |
Set<String> allCts = getAllContactTypes(); // depends on cts being initialised |
284 |
if (ct.contains("/")){ |
285 |
String[] cts = ct.split("/"); |
286 |
if (allCts.contains(cts[0]) && allCts.contains(cts[1])) { |
287 |
return true; |
288 |
} else { |
289 |
return false; |
290 |
} |
291 |
} |
292 |
return allCts.contains(ct); |
293 |
} |
294 |
|
295 |
/** |
296 |
* Returns true if ct is a valid single atom contact type name |
297 |
* Crossed contacts (e.g. Ca/Cg) will also be valid |
298 |
* @param ct |
299 |
* @return |
300 |
*/ |
301 |
public static boolean isValidSingleAtomContactType(String ct){ |
302 |
Set<String> singleAtomCts = getSingleAtomContactTypes(); // depends on cts being initialised |
303 |
if (ct.contains("/")){ |
304 |
String[] cts = ct.split("/"); |
305 |
if (singleAtomCts.contains(cts[0]) && singleAtomCts.contains(cts[1])) { |
306 |
return true; |
307 |
} else { |
308 |
return false; |
309 |
} |
310 |
} |
311 |
return singleAtomCts.contains(ct); |
312 |
} |
313 |
|
314 |
/** |
315 |
* Returns true if ct is a valid multiple atom contact type name |
316 |
* Crossed contacts (e.g. BB/SC) will also be valid |
317 |
* @param ct |
318 |
* @return |
319 |
*/ |
320 |
public static boolean isValidMultiAtomContactType(String ct){ |
321 |
Set<String> multiAtomCts = getMultiAtomContactTypes(); // depends on cts being initialised |
322 |
if (ct.contains("/")){ |
323 |
String[] cts = ct.split("/"); |
324 |
if (multiAtomCts.contains(cts[0]) && multiAtomCts.contains(cts[1])) { |
325 |
return true; |
326 |
} else { |
327 |
return false; |
328 |
} |
329 |
} |
330 |
return multiAtomCts.contains(ct); |
331 |
} |
332 |
|
333 |
/** |
334 |
* Gets the lower bound distance for assigning distance restraints |
335 |
* to contacts given a contact type |
336 |
* @param ct |
337 |
* @return |
338 |
*/ |
339 |
public static double getLowerBoundDistance(String ct) { |
340 |
return lowerBoundDistances.get(ct); |
341 |
} |
342 |
|
343 |
/** |
344 |
* Converts from one letter aminoacid codes to three letter codes |
345 |
* If invalid input returns null |
346 |
* @param one |
347 |
* @return |
348 |
*/ |
349 |
public static String oneletter2threeletter(String one) { |
350 |
return one2threeletter.get(one); |
351 |
} |
352 |
|
353 |
/** |
354 |
* Converts from three letter aminoacid codes to one letter codes |
355 |
* If invalid input returns null |
356 |
* @param three |
357 |
* @return |
358 |
*/ |
359 |
public static String threeletter2oneletter(String three) { |
360 |
return three2oneletter.get(three); |
361 |
} |
362 |
|
363 |
/** |
364 |
* Converts from aminoacid full names (capitalised first letter, rest lower case) |
365 |
* to three letter codes |
366 |
* @param full |
367 |
* @return |
368 |
*/ |
369 |
public static String fullname2threeletter(String full){ |
370 |
return fullname2threeletter.get(full); |
371 |
} |
372 |
|
373 |
/** |
374 |
* Returns true if given String is a valid aminoacid name |
375 |
* (first letter capitalised, rest lower case) |
376 |
* @param full |
377 |
* @return |
378 |
*/ |
379 |
public static boolean isValidFullName(String full) { |
380 |
return fullname2threeletter.keySet().contains(full); |
381 |
} |
382 |
|
383 |
/** |
384 |
* Returns all aminoacid full names in a Set |
385 |
* @return |
386 |
*/ |
387 |
public static Set<String> getAAFullNames(){ |
388 |
return fullname2threeletter.keySet(); |
389 |
} |
390 |
|
391 |
/** |
392 |
* Given a three letter code aminoacid and an atom name say whether |
393 |
* the atom is a valid atom for that aminoacid |
394 |
* Doesn't consider OXT to be a valid atom for any aminoacid |
395 |
* @param aa |
396 |
* @param atom |
397 |
* @return |
398 |
*/ |
399 |
public static boolean isValidAtom(String aa, String atom) { |
400 |
return aas2atoms.get(aa).contains(atom); |
401 |
} |
402 |
|
403 |
/** |
404 |
* Given a three letter code aminoacid and an atom name say whether |
405 |
* the atom is a valid atom for that aminoacid |
406 |
* Considers OXT to be a valid atom for all aminoacids |
407 |
* @param aa |
408 |
* @param atom |
409 |
* @return |
410 |
*/ |
411 |
public static boolean isValidAtomWithOXT(String aa, String atom) { |
412 |
if (atom.equals("OXT")) return true; |
413 |
return aas2atoms.get(aa).contains(atom); |
414 |
} |
415 |
|
416 |
/** |
417 |
* Gets all (non-Hydrogen) atoms for an aminoacid (three letter code) |
418 |
* @param aa |
419 |
* @return |
420 |
*/ |
421 |
public static Set<String> getAtoms(String aa) { |
422 |
return aas2atoms.get(aa); |
423 |
} |
424 |
|
425 |
/** |
426 |
* Gets the number of non-hydrogen atoms for an aminoacid (three letter code) |
427 |
* @param aa |
428 |
* @return |
429 |
*/ |
430 |
public static int getNumberAtoms(String aa) { |
431 |
return aas2atoms.get(aa).size(); |
432 |
} |
433 |
|
434 |
/** |
435 |
* Returns a Set of all atom names given an aminoacid and a contact type |
436 |
* e.g. for aa="SER" and ct="SC" returns ["CB", "CG"] |
437 |
* @param ct |
438 |
* @param aa |
439 |
* @return |
440 |
*/ |
441 |
public static Set<String> getAtomsForCTAndRes(String ct, String aa) { |
442 |
return cts.get(ct).get(aa); |
443 |
} |
444 |
|
445 |
} |