ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/branches/aglappe-jung/proteinstructure/AAinfo.java
Revision: 328
Committed: Mon Oct 1 10:57:08 2007 UTC (17 years ago) by duarte
Original Path: trunk/proteinstructure/AAinfo.java
File size: 13632 byte(s)
Log Message:
First commit of ConstraintsMaker (and PRMInfo needed for it)
Does correct atom serial mapping when using amber prm files, except for some atoms of ASP, ARG, GLU, PHE, TYR (where there are some nomenclature differences between amber and pdb)
Does not work yet with multi atom contact types, should work fine for all single atom contact types and for crossed single atom contact types
The pdb/xyz atom mapping using coordinates (as in python) is commented out because it doesn't work due to rounding differences between java and fortran
Thus the PRMInfo class was needed, at the moment can read only amber prm files
Some new methods and better javadoc in Pdb.
Some new methods in AAinfo
Line File contents
1 package proteinstructure;
2
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.io.InputStreamReader;
7 import java.util.HashMap;
8 import java.util.Map;
9 import java.util.Set;
10 import java.util.TreeMap;
11 import java.util.TreeSet;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14
15 /**
16 * Class with static methods to get aminoacids and contact type information
17 * example usage:
18 * AAinfo.isValidContactType("Cg"); // returns true
19 * AAinfo.three2oneletter("ALA"); // returns "A"
20 *
21 * The contact types and aas2atoms data are defined in separate text file contactTypes.dat
22 * New contact types can be added simply by editing the file
23 *
24 * Beware that everything is static in this file. The JVM will initialise the static
25 * variables when they are first called and keep them as if if the static class itself
26 * was a global instantiated object
27 */
28 public class AAinfo {
29
30 /*--------------------------- constants ------------------------------*/
31 // file with contact type definitions
32 // refers to root of the aglappe package
33 private static final String CT_DEFS_FILE = "/proteinstructure/contactTypes.dat";
34
35 // lower bound distances used for our ConstraintsMaker class
36 // from our "empirical" calculations
37 public static final double BB_DIAMETER_GYRATION=4.6;
38 public static final double DIST_MIN_CA=2.8;
39 // "guessed" general min distance from hydrogen the hydrogen bond length (we used it in Cb and Cg)
40 public static final double DIST_MIN=2.6;
41
42
43 /*----------------------- member variables ---------------------------*/
44 private final static Map<String,Double> lowerBoundDistances = initialiseLowerBoundDistances();
45
46 private final static Map<String,String> one2threeletter = initialiseOne2threeletter();
47 private final static Map<String,String> three2oneletter = initialiseThree2oneletter();
48 private final static Set<String> aas = initialiseAAs();
49
50 private final static Map<String,ContactType> cts = initialiseCTsFromFile();
51
52 private final static Map<String,Set<String>> aas2atoms = initialiseAas2atoms(); // depends on cts
53
54 private final static Map<String,String> fullname2threeletter = initialiseFullNames2Threeletter();
55
56
57 /*----------------------- private methods ----------------------------*/
58 private static Map<String,Double> initialiseLowerBoundDistances() {
59 Map<String,Double> lowerBoundDistances = new HashMap<String, Double>();
60 lowerBoundDistances.put("Ca", DIST_MIN_CA);
61 lowerBoundDistances.put("Cb", DIST_MIN);
62 lowerBoundDistances.put("Cg", DIST_MIN);
63 lowerBoundDistances.put("C", DIST_MIN_CA);
64 return lowerBoundDistances;
65 }
66
67 private static Map<String,String> initialiseOne2threeletter() {
68 Map<String,String> one2threeletter = new HashMap<String,String>();
69 one2threeletter.put("C", "CYS");
70 one2threeletter.put("D", "ASP");
71 one2threeletter.put("S", "SER");
72 one2threeletter.put("Q", "GLN");
73 one2threeletter.put("K", "LYS");
74 one2threeletter.put("I", "ILE");
75 one2threeletter.put("P", "PRO");
76 one2threeletter.put("T", "THR");
77 one2threeletter.put("F", "PHE");
78 one2threeletter.put("A", "ALA");
79 one2threeletter.put("G", "GLY");
80 one2threeletter.put("H", "HIS");
81 one2threeletter.put("E", "GLU");
82 one2threeletter.put("L", "LEU");
83 one2threeletter.put("R", "ARG");
84 one2threeletter.put("W", "TRP");
85 one2threeletter.put("V", "VAL");
86 one2threeletter.put("N", "ASN");
87 one2threeletter.put("Y", "TYR");
88 one2threeletter.put("M", "MET");
89 return one2threeletter;
90 }
91
92 private static Map<String,String> initialiseThree2oneletter() {
93 Map<String,String> three2oneletter = new HashMap<String,String>();
94 three2oneletter.put("CYS", "C");
95 three2oneletter.put("ASP", "D");
96 three2oneletter.put("SER", "S");
97 three2oneletter.put("GLN", "Q");
98 three2oneletter.put("LYS", "K");
99 three2oneletter.put("ILE", "I");
100 three2oneletter.put("PRO", "P");
101 three2oneletter.put("THR", "T");
102 three2oneletter.put("PHE", "F");
103 three2oneletter.put("ALA", "A");
104 three2oneletter.put("GLY", "G");
105 three2oneletter.put("HIS", "H");
106 three2oneletter.put("GLU", "E");
107 three2oneletter.put("LEU", "L");
108 three2oneletter.put("ARG", "R");
109 three2oneletter.put("TRP", "W");
110 three2oneletter.put("VAL", "V");
111 three2oneletter.put("ASN", "N");
112 three2oneletter.put("TYR", "Y");
113 three2oneletter.put("MET", "M");
114 return three2oneletter;
115 }
116
117 private static Map<String,String> initialiseFullNames2Threeletter() {
118 Map<String,String> fullnames2threeletter = new HashMap<String,String>();
119 fullnames2threeletter.put("Alanine","ALA");
120 fullnames2threeletter.put("Arginine","ARG");
121 fullnames2threeletter.put("Asparagine","ASN");
122 fullnames2threeletter.put("Aspartic Acid","ASP");
123 fullnames2threeletter.put("Cysteine","CYS");
124 fullnames2threeletter.put("Glutamic Acid","GLU");
125 fullnames2threeletter.put("Glutamine","GLN");
126 fullnames2threeletter.put("Glycine","GLY");
127 fullnames2threeletter.put("Histidine","HIS");
128 fullnames2threeletter.put("Isoleucine","ILE");
129 fullnames2threeletter.put("Leucine","LEU");
130 fullnames2threeletter.put("Lysine","LYS");
131 fullnames2threeletter.put("Methionine","MET");
132 fullnames2threeletter.put("Phenylalanine","PHE");
133 fullnames2threeletter.put("Proline","PRO");
134 fullnames2threeletter.put("Serine","SER");
135 fullnames2threeletter.put("Threonine","THR");
136 fullnames2threeletter.put("Tryptophan","TRP");
137 fullnames2threeletter.put("Tyrosine","TYR");
138 fullnames2threeletter.put("Valine","VAL");
139 return fullnames2threeletter;
140 }
141
142 private static Set<String> initialiseAAs() {
143 Set<String> aas = new TreeSet<String>();
144 aas.add("TRP");
145 aas.add("CYS");
146 aas.add("GLN");
147 aas.add("ALA");
148 aas.add("VAL");
149 aas.add("LEU");
150 aas.add("ASP");
151 aas.add("SER");
152 aas.add("PRO");
153 aas.add("THR");
154 aas.add("PHE");
155 aas.add("ARG");
156 aas.add("LYS");
157 aas.add("MET");
158 aas.add("HIS");
159 aas.add("GLY");
160 aas.add("ILE");
161 aas.add("ASN");
162 aas.add("GLU");
163 aas.add("TYR");
164 return aas;
165 }
166
167 private static Map<String,Set<String>> initialiseAas2atoms() {
168 Map<String,Set<String>> aas2atoms = new HashMap<String, Set<String>>();
169 aas2atoms = cts.get("ALL");
170 return aas2atoms;
171 }
172
173 private static Map<String,ContactType> initialiseCTsFromFile() {
174 Map<String,ContactType> cts = new TreeMap<String,ContactType>();
175
176 InputStream inp = Runtime.getRuntime().getClass().getResourceAsStream(CT_DEFS_FILE);
177 BufferedReader br = new BufferedReader(new InputStreamReader(inp));
178 String line;
179 try {
180 ContactType contactType = null;
181 String ct = "";
182 boolean multiAtom = false;
183 while ((line = br.readLine())!= null) {
184 // skip comments and empty lines
185 if (line.startsWith("#")) continue;
186 if (line.trim().equals("")) continue;
187 if (line.startsWith(">")){
188 if (!ct.equals("")) { // except for first ct put last res2atoms HashMap for the last ct
189 cts.put(ct, contactType);
190 }
191 Pattern p = Pattern.compile("^>\\s(\\w+)\\s(\\w+)$");
192 Matcher m = p.matcher(line);
193 if (m.matches()){
194 ct = m.group(1);
195 String type = m.group(2);
196 if (type.equals("multi")) {
197 multiAtom = true ;
198 } else {
199 multiAtom = false;
200 }
201 }
202 contactType = new ContactType(ct,multiAtom);
203 } else { // for all other lines
204 String aa = line.substring(0,3);
205 String atomsStr = line.substring(4).trim();
206 String[] atomsArray = new String[0]; // initialisation to empty array
207 if (!atomsStr.equals("")) { // if not atomsArray stays empty (for cases of no atoms for a given residue)
208 atomsArray = atomsStr.split("\\s");
209 }
210 Set<String> atoms = new TreeSet<String>();
211 for (String atom: atomsArray) {
212 atoms.add(atom); // if atomsArray was empty then atoms will be an empty (not null) Set
213 }
214 contactType.put(aa, atoms);
215 }
216 }
217 cts.put(ct, contactType);
218 } catch (IOException e) {
219 System.err.println("IO error while reading contact types definition file: "+CT_DEFS_FILE+". Exiting.");
220 System.err.println("Error was: "+e.getMessage());
221 System.exit(1);
222 }
223 return cts;
224 }
225
226 /*----------------------- public methods ---------------------------*/
227
228 /**
229 * Given a three letter code returns true if is a standard aminoacid
230 */
231 public static boolean isValidAA(String three) {
232 return aas.contains(three);
233 }
234
235 /**
236 * Gets all three letter code standard aminoacids in a Set
237 * @return
238 */
239 public static Set<String> getAAs() {
240 return aas;
241 }
242
243 /**
244 * Gets all contact type names in a Set
245 * @return
246 */
247 public static Set<String> getAllContactTypes() {
248 return cts.keySet();
249 }
250
251 /**
252 * Gets all single atom contact types in a Set
253 * @return
254 */
255 public static Set<String> getSingleAtomContactTypes() {
256 Set<String> singleAtomCts = new TreeSet<String>();
257 for (ContactType contactType:cts.values()) {
258 if (!contactType.isMultiAtom()) singleAtomCts.add(contactType.getName());
259 }
260 return singleAtomCts;
261 }
262
263 /**
264 * Gets all multiple atom contact types in a Set
265 * @return
266 */
267 public static Set<String> getMultiAtomContactTypes() {
268 Set<String> multiAtomCts = new TreeSet<String>();
269 for (ContactType contactType:cts.values()) {
270 if (contactType.isMultiAtom()) multiAtomCts.add(contactType.getName());
271 }
272 return multiAtomCts;
273
274 }
275
276 /**
277 * Returns true if ct is a valid contact type name
278 * Crossed contacts (e.g. BB/SC or Ca/Cg) will also be valid
279 * @param ct
280 * @return
281 */
282 public static boolean isValidContactType(String ct){
283 Set<String> allCts = getAllContactTypes(); // depends on cts being initialised
284 if (ct.contains("/")){
285 String[] cts = ct.split("/");
286 if (allCts.contains(cts[0]) && allCts.contains(cts[1])) {
287 return true;
288 } else {
289 return false;
290 }
291 }
292 return allCts.contains(ct);
293 }
294
295 /**
296 * Returns true if ct is a valid single atom contact type name
297 * Crossed contacts (e.g. Ca/Cg) will also be valid
298 * @param ct
299 * @return
300 */
301 public static boolean isValidSingleAtomContactType(String ct){
302 Set<String> singleAtomCts = getSingleAtomContactTypes(); // depends on cts being initialised
303 if (ct.contains("/")){
304 String[] cts = ct.split("/");
305 if (singleAtomCts.contains(cts[0]) && singleAtomCts.contains(cts[1])) {
306 return true;
307 } else {
308 return false;
309 }
310 }
311 return singleAtomCts.contains(ct);
312 }
313
314 /**
315 * Returns true if ct is a valid multiple atom contact type name
316 * Crossed contacts (e.g. BB/SC) will also be valid
317 * @param ct
318 * @return
319 */
320 public static boolean isValidMultiAtomContactType(String ct){
321 Set<String> multiAtomCts = getMultiAtomContactTypes(); // depends on cts being initialised
322 if (ct.contains("/")){
323 String[] cts = ct.split("/");
324 if (multiAtomCts.contains(cts[0]) && multiAtomCts.contains(cts[1])) {
325 return true;
326 } else {
327 return false;
328 }
329 }
330 return multiAtomCts.contains(ct);
331 }
332
333 /**
334 * Gets the lower bound distance for assigning distance restraints
335 * to contacts given a contact type
336 * @param ct
337 * @return
338 */
339 public static double getLowerBoundDistance(String ct) {
340 return lowerBoundDistances.get(ct);
341 }
342
343 /**
344 * Converts from one letter aminoacid codes to three letter codes
345 * If invalid input returns null
346 * @param one
347 * @return
348 */
349 public static String oneletter2threeletter(String one) {
350 return one2threeletter.get(one);
351 }
352
353 /**
354 * Converts from three letter aminoacid codes to one letter codes
355 * If invalid input returns null
356 * @param three
357 * @return
358 */
359 public static String threeletter2oneletter(String three) {
360 return three2oneletter.get(three);
361 }
362
363 /**
364 * Converts from aminoacid full names (capitalised first letter, rest lower case)
365 * to three letter codes
366 * @param full
367 * @return
368 */
369 public static String fullname2threeletter(String full){
370 return fullname2threeletter.get(full);
371 }
372
373 /**
374 * Returns true if given String is a valid aminoacid name
375 * (first letter capitalised, rest lower case)
376 * @param full
377 * @return
378 */
379 public static boolean isValidFullName(String full) {
380 return fullname2threeletter.keySet().contains(full);
381 }
382
383 /**
384 * Returns all aminoacid full names in a Set
385 * @return
386 */
387 public static Set<String> getAAFullNames(){
388 return fullname2threeletter.keySet();
389 }
390
391 /**
392 * Given a three letter code aminoacid and an atom name say whether
393 * the atom is a valid atom for that aminoacid
394 * Doesn't consider OXT to be a valid atom for any aminoacid
395 * @param aa
396 * @param atom
397 * @return
398 */
399 public static boolean isValidAtom(String aa, String atom) {
400 return aas2atoms.get(aa).contains(atom);
401 }
402
403 /**
404 * Given a three letter code aminoacid and an atom name say whether
405 * the atom is a valid atom for that aminoacid
406 * Considers OXT to be a valid atom for all aminoacids
407 * @param aa
408 * @param atom
409 * @return
410 */
411 public static boolean isValidAtomWithOXT(String aa, String atom) {
412 if (atom.equals("OXT")) return true;
413 return aas2atoms.get(aa).contains(atom);
414 }
415
416 /**
417 * Gets all (non-Hydrogen) atoms for an aminoacid (three letter code)
418 * @param aa
419 * @return
420 */
421 public static Set<String> getAtoms(String aa) {
422 return aas2atoms.get(aa);
423 }
424
425 /**
426 * Gets the number of non-hydrogen atoms for an aminoacid (three letter code)
427 * @param aa
428 * @return
429 */
430 public static int getNumberAtoms(String aa) {
431 return aas2atoms.get(aa).size();
432 }
433
434 /**
435 * Returns a Set of all atom names given an aminoacid and a contact type
436 * e.g. for aa="SER" and ct="SC" returns ["CB", "CG"]
437 * @param ct
438 * @param aa
439 * @return
440 */
441 public static Set<String> getAtomsForCTAndRes(String ct, String aa) {
442 return cts.get(ct).get(aa);
443 }
444
445 }