ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 508
Committed: Thu Jan 10 15:39:00 2008 UTC (16 years, 9 months ago) by stehr
File size: 67598 byte(s)
Log Message:
Fixed bug in DSSP secondary structure assignment, first element was starting at index 0
Line File contents
1 package proteinstructure;
2
3 import java.io.*;
4 import java.net.*;
5 import java.util.Collections;
6 import java.util.HashMap;
7 import java.util.Iterator;
8 import java.util.Locale;
9 import java.util.Map;
10 import java.util.Set;
11 import java.util.TreeMap;
12 import java.util.ArrayList;
13 import java.util.TreeSet;
14 import java.util.Vector;
15 import java.util.regex.*;
16 import java.util.zip.GZIPInputStream;
17 import java.util.Arrays;
18
19 import javax.vecmath.Point3d;
20 import javax.vecmath.Point3i;
21 import javax.vecmath.Vector3d;
22
23 import edu.uci.ics.jung.graph.util.EdgeType;
24 import edu.uci.ics.jung.graph.util.Pair;
25
26 import Jama.Matrix;
27 import Jama.SingularValueDecomposition;
28
29 import tools.MySQLConnection;
30 import java.sql.SQLException;
31 import java.sql.Statement;
32
33 /**
34 * A single chain pdb protein structure
35 *
36 */
37 public abstract class Pdb {
38
39 protected static final int DEFAULT_MODEL=1; // default model serial (NMR structures)
40 public static final String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
41 public static final String NULL_CHAIN_CODE = "NULL"; // to specify the NULL (blank in pdb file) chain code
42 public static final String NO_PDB_CODE = ""; // to specify no pdb code
43 public static final String NO_PDB_CHAIN_CODE = ""; // to specify no pdb chain code
44 public static final String NO_CHAIN_CODE = ""; // to specify no internal chain code
45
46 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
47 protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
48 protected HashMap<Integer,Point3d> atomser2coord; // atom serials to 3D coordinates
49 protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
50 protected HashMap<Integer,String> atomser2atom; // atom serials to atom names
51 protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
52 protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
53
54 protected SecondaryStructure secondaryStructure; // the secondary structure annotation for this pdb object (should never be null)
55 protected Scop scop; // the scop annotation for this pdb object
56 protected HashMap<Integer,Double> resser2allrsa; // internal residue serials to all-atoms rsa
57 protected HashMap<Integer,Double> resser2scrsa; // internal residue serials to SC rsa
58 protected HashMap<Integer,Double> resser2consurfhsspscore; // internal residue serials to SC rsa
59 protected HashMap<Integer,Integer> resser2consurfhsspcolor; // internal residue serials to SC rsa
60 protected EC ec; // the ec annotation for this pdb object
61 protected CatalSiteSet catalSiteSet; // the catalytic site annotation for this pdb object
62
63 protected String sequence; // full sequence as it appears in SEQRES field
64 protected String pdbCode;
65 // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
66 protected String pdbChainCode;
67 // Our internal chain identifier:
68 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
69 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
70 protected String chainCode;
71 protected int model; // the model serial for NMR structures
72 protected String sid; // the scop id if Pdb has been restricted (restrictToScopDomain)
73
74 // in case we read from pdb file, 2 possibilities:
75 // 1) SEQRES field is present: fullLength coincides with sequence length
76 // 2) SEQRES not present: fullLength is maximum observed residue (so if residue numbering is correct that only misses possible non-observed residues at end of chain)
77 protected int fullLength; // length of full sequence as it appears in SEQRES field
78 protected int obsLength; // length without unobserved, non standard aas
79
80 protected boolean dataLoaded; // true if this object has been loaded with pdb data, false when is empty
81
82 protected String db; // the db from which we have taken the data (if applies)
83 protected MySQLConnection conn;
84
85 /*--------------------------------- abstract methods --------------------------------------------*/
86 /**
87 * Returns all pdb chain codes for this Pdb entry (be it a file or an entry in a database)
88 * @return pdb chain codes
89 */
90 public abstract String[] getChains() throws PdbLoadError;
91
92 /**
93 * Returns all models for this Pdb entry (be it a file or an entry in a database)
94 * @return
95 */
96 public abstract Integer[] getModels() throws PdbLoadError;
97
98 /**
99 * Loads pdb data (coordinates, sequence, etc.) from the source (file or database)
100 * for given pdbChainCode and modelSerial
101 * @param pdbChainCode
102 * @param modelSerial
103 * @throws PdbLoadError
104 */
105 public abstract void load(String pdbChainCode, int modelSerial) throws PdbLoadError;
106
107
108 /*--------------------------------- public methods ----------------------------------------------*/
109
110 /**
111 * Loads pdb data (coordinates, sequence, etc.) from the source (file or database)
112 * for given pdbChainCode and default model serial (1)
113 * @param pdbChainCode
114 * @throws PdbLoadError */
115
116 public void load(String pdbChainCode) throws PdbLoadError {
117 load(pdbChainCode, DEFAULT_MODEL);
118 }
119
120 /**
121 * Returns true if this Pdb has been loaded with pdb data (i.e. when
122 * load(pdbChainCode) has been called), false if it is empty
123 */
124 public boolean isDataLoaded() {
125 return dataLoaded;
126 }
127
128 public int checkCSA(String version, boolean online) throws IOException {
129 BufferedReader in;
130 String inputLine;
131
132 if (online) {
133 URL csa = new URL("http://www.ebi.ac.uk/thornton-srv/databases/CSA/archive/CSA_"+version.replaceAll("\\.","_")+".dat.gz");
134 URLConnection conn = csa.openConnection();
135 InputStream inURL = conn.getInputStream();
136 OutputStream out = new BufferedOutputStream(new FileOutputStream("CSA_"+version.replaceAll("\\.","_")+".dat.gz"));
137 byte[] buffer = new byte[1024];
138 int numRead;
139 long numWritten = 0;
140 while ((numRead = inURL.read(buffer)) != -1) {
141 out.write(buffer, 0, numRead);
142 numWritten += numRead;
143 }
144 inURL.close();
145 out.close();
146 in = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream("CSA_"+version.replaceAll(".","_")+".dat.gz"))));
147 } else {
148 File csaFile = new File("/project/StruPPi/Databases/CSA/CSA_"+version.replaceAll("\\.","_")+".dat");
149 in = new BufferedReader(new FileReader(csaFile));
150 }
151
152 int csaMistakes = 0;
153 this.catalSiteSet = new CatalSiteSet();
154 String curPdbCode, curPdbChainCode, curPdbResSerial, curResType;
155 String prevPdbCode = "";
156 int curSiteId;
157 int prevSiteId = -1;
158 CatalyticSite cs = null;
159 while ((inputLine = in.readLine()) != null) {
160 String[] fields = inputLine.split(",");
161 curPdbCode = fields[0];
162 curPdbChainCode = (fields[3].equals(""))?"NULL":fields[3];
163 if (curPdbCode.equals(pdbCode)) {
164 if (curPdbChainCode.equals(pdbChainCode)) {
165 curPdbResSerial = fields[4];
166 curResType = fields[2];
167 curSiteId = Integer.valueOf(fields[1]);
168 // only if the pdb residue type is a standard AA
169 if (AAinfo.isValidAA(curResType)) {
170 // only if the pdb residue type agrees with our residue type
171 if (getResTypeFromResSerial(get_resser_from_pdbresser(curPdbResSerial)).equals(curResType)) {
172 // each time the site changes except for the first site of a chain,
173 // add the site to the set
174 if ((curSiteId != prevSiteId) & (prevSiteId != -1)) {
175 catalSiteSet.add(cs);
176 }
177 // each time the site changes or it is the first site of a chain,
178 // create a new site
179 if ((curSiteId != prevSiteId) | (prevSiteId == -1)) {
180 String littEntryPdbCode = fields[7].substring(0,4);
181 String littEntryPdbChainCode = fields[7].substring(4).equals("")?"NULL":fields[7].substring(4);
182 cs = new CatalyticSite(curSiteId, fields[6], littEntryPdbCode, littEntryPdbChainCode);
183 }
184 // add the res to the site
185 cs.addRes(get_resser_from_pdbresser(curPdbResSerial), fields[5]);
186 prevSiteId = curSiteId;
187 } else {
188 csaMistakes++;
189 }
190 }
191 }
192 } else if (prevPdbCode.equals(pdbCode)) {
193 if (prevSiteId != -1) {
194 catalSiteSet.add(cs);
195 }
196 break;
197 }
198 prevPdbCode = curPdbCode;
199 }
200
201 in.close();
202 if (online) {
203 File csaFile = new File("CSA_"+version.replaceAll(".","_")+".dat.gz");
204 csaFile.delete();
205 }
206
207 if ((csaMistakes > 0) | (prevSiteId == -1)) {
208 this.catalSiteSet = null;
209 }
210
211 return csaMistakes;
212 }
213
214 public void checkEC(boolean online) throws IOException {
215
216 this.ec = new EC();
217 ECRegion er = null;
218 String startPdbResSer = "", endPdbResSer = "";
219 BufferedReader in;
220 String inputLine;
221 Pattern p = Pattern.compile("^ \\d");
222
223 if (online) {
224 URL pdb2ecMapping = new URL("http://www.bioinf.org.uk/pdbsprotec/mapping.txt");
225 URLConnection p2e= pdb2ecMapping.openConnection();
226 in = new BufferedReader(new InputStreamReader(p2e.getInputStream()));
227 } else {
228 File pdb2ecMapping = new File("/project/StruPPi/Databases/PDBSProtEC/mapping.txt");
229 in = new BufferedReader(new FileReader(pdb2ecMapping));
230 }
231
232 while ((inputLine = in.readLine()) != null) {
233 Matcher m = p.matcher(inputLine);
234 if (m.find()) {
235 String curPdbCode = inputLine.substring(0,9).trim();
236 String curPdbChainCode = (inputLine.charAt(11) == ' ')?"NULL":String.valueOf(inputLine.charAt(11));
237 if (curPdbCode.equals(pdbCode) && curPdbChainCode.equals(pdbChainCode)) {
238 startPdbResSer = inputLine.substring(20,26).trim();
239 endPdbResSer = inputLine.substring(27,33).trim();
240 String id = inputLine.substring(43).trim();
241 //System.out.println(curPdbCode+":"+curPdbChainCode+":"+startPdbResSer+"-"+endPdbResSer+":"+ec);
242 er = new ECRegion(id, startPdbResSer, endPdbResSer, get_resser_from_pdbresser(startPdbResSer), get_resser_from_pdbresser(endPdbResSer));
243 ec.add(er);
244 }
245 }
246 }
247
248 in.close();
249
250 }
251
252 public int checkConsurfHssp(boolean online) throws IOException {
253
254 BufferedReader in;
255 if (online) {
256 URL consurfhssp = new URL("http://consurf.tau.ac.il/results/HSSP_ML_"+pdbCode+(pdbChainCode.equals("NULL")?"_":pdbChainCode)+"/pdb"+pdbCode+".gradesPE");
257 URLConnection ch = consurfhssp.openConnection();
258 in = new BufferedReader(new InputStreamReader(ch.getInputStream()));
259 } else {
260 File consurfhssp = new File("/project/StruPPi/Databases/ConSurf-HSSP/ConservationGrades/"+pdbCode+(pdbChainCode.equals("NULL")?"_":pdbChainCode)+".grades");
261 in = new BufferedReader(new FileReader(consurfhssp));
262 }
263 String inputLine;
264 Pattern p = Pattern.compile("^\\s+\\d+");
265 int lineCount = 0;
266 int consurfHsspMistakes = 0;
267
268 Integer[] ressers = new Integer[resser2restype.size()];
269 resser2restype.keySet().toArray(ressers);
270 Arrays.sort(ressers);
271
272 resser2consurfhsspscore = new HashMap<Integer,Double>();
273 resser2consurfhsspcolor = new HashMap<Integer,Integer>();
274
275 while ((inputLine = in.readLine()) != null) {
276 Matcher m = p.matcher(inputLine);
277 if (m.find()) {
278 lineCount++;
279 int resser = ressers[lineCount-1];
280 String[] fields = inputLine.split("\\s+");
281 String pdbresser = fields[3].equals("-")?"-":fields[3].substring(3, fields[3].indexOf(':'));
282 if (fields[2].equals(AAinfo.threeletter2oneletter(getResTypeFromResSerial(resser))) &
283 (pdbresser.equals("-") | pdbresser.equals(get_pdbresser_from_resser(resser)))) {
284 resser2consurfhsspscore.put(resser, Double.valueOf(fields[4]));
285 resser2consurfhsspcolor.put(resser, Integer.valueOf(fields[5]));
286 } else {
287 consurfHsspMistakes++;
288 }
289 }
290 }
291 in.close();
292
293 consurfHsspMistakes += Math.abs(ressers.length - resser2consurfhsspscore.size());
294 if (consurfHsspMistakes > 0) {
295 resser2consurfhsspscore.clear();
296 resser2consurfhsspcolor.clear();
297 }
298
299 return consurfHsspMistakes;
300
301 }
302
303 public void runNaccess(String naccessExecutable, String naccessParameters) throws Exception {
304 String pdbFileName = pdbCode+chainCode+".pdb";
305 dump2pdbfile(pdbFileName, true);
306 String line;
307 int errorLineCount = 0;
308
309 File test = new File(naccessExecutable);
310 if(!test.canRead()) throw new IOException("Naccess Executable is not readable");
311
312 Process myNaccess = Runtime.getRuntime().exec(naccessExecutable + " " + pdbFileName + " " + naccessParameters);
313 BufferedReader naccessOutput = new BufferedReader(new InputStreamReader(myNaccess.getInputStream()));
314 BufferedReader naccessError = new BufferedReader(new InputStreamReader(myNaccess.getErrorStream()));
315 while((line = naccessOutput.readLine()) != null) {
316 }
317 while((line = naccessError.readLine()) != null) {
318 errorLineCount++;
319 }
320 naccessOutput.close();
321 naccessError.close();
322 int exitVal = myNaccess.waitFor();
323 if ((exitVal == 1) || (errorLineCount > 0)) {
324 throw new Exception("Naccess:Wrong arguments or pdb file format!");
325 }
326
327 File rsa = new File(pdbCode+chainCode+".rsa");
328 if (rsa.exists()) {
329 resser2allrsa = new HashMap<Integer,Double>();
330 resser2scrsa = new HashMap<Integer,Double>();
331 BufferedReader rsaInput = new BufferedReader(new FileReader(rsa));
332 while ((line = rsaInput.readLine()) != null) {
333 if (line.startsWith("RES")) {
334 int resser = Integer.valueOf(line.substring(9,13).trim());
335 double allrsa = Double.valueOf(line.substring(22,28).trim());
336 double scrsa = Double.valueOf(line.substring(35,41).trim());
337 resser2allrsa.put(resser, allrsa);
338 resser2scrsa.put(resser, scrsa);
339 }
340 }
341 rsaInput.close();
342 }
343 String[] filesToDelete = { ".pdb", ".asa", ".rsa", ".log" };
344 for (int i=0; i < filesToDelete.length; i++) {
345 File fileToDelete = new File(pdbCode+chainCode+filesToDelete[i]);
346 if (fileToDelete.exists()) {
347 fileToDelete.delete();
348 }
349 }
350
351 }
352
353 public void checkScop(String version, boolean online) throws IOException {
354 this.scop = new Scop();
355 ScopRegion sr = null;
356 String startPdbResSer = "", endPdbResSer = "";
357 BufferedReader in;
358 String inputLine;
359
360 if (online) {
361 URL scop_cla = new URL("http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_"+version);
362 URLConnection sc = scop_cla.openConnection();
363 in = new BufferedReader(new InputStreamReader(sc.getInputStream()));
364 } else {
365 File scop_cla = new File("/project/StruPPi/Databases/SCOP/dir.cla.scop.txt_"+version);
366 in = new BufferedReader(new FileReader(scop_cla));
367 }
368
369 while ((inputLine = in.readLine()) != null)
370 if (inputLine.startsWith(pdbCode,1)) {
371 String[] fields = inputLine.split("\\s+");
372 String[] regions = fields[2].split(",");
373 for (int j=0; j < regions.length; j++) {
374 Pattern p = Pattern.compile("^(-)|([a-zA-Z\\d]):(-?\\d+[a-zA-Z]*)-(-?\\d+[a-zA-Z]*)|(-?\\d+[a-zA-Z]*)-(-?\\d+[a-zA-Z]*)|([a-zA-Z\\d]):");
375 Matcher m = p.matcher(regions[j]);
376 if (m.find()) {
377 if (((pdbChainCode.equals("NULL") && ((m.group(1) != null && m.group(1).equals("-")) || m.group(5) != null))) ||
378 (m.group(2) != null && m.group(2).equals(pdbChainCode)) ||
379 (m.group(7) != null && m.group(7).equals(pdbChainCode))) {
380 if (m.group(3) != null) {
381 startPdbResSer = m.group(3);
382 endPdbResSer = m.group(4);
383 } else if (m.group(5) != null) {
384 startPdbResSer = m.group(5);
385 endPdbResSer = m.group(6);
386 } else {
387 startPdbResSer = get_pdbresser_from_resser(Collections.min(resser2pdbresser.keySet()));
388 endPdbResSer = get_pdbresser_from_resser(Collections.max(resser2pdbresser.keySet()));
389 }
390 sr = new ScopRegion(fields[0], fields[3], Integer.parseInt(fields[4]), j, regions.length, startPdbResSer, endPdbResSer, get_resser_from_pdbresser(startPdbResSer), get_resser_from_pdbresser(endPdbResSer));
391 scop.add(sr);
392 }
393 }
394 }
395 //break;
396 }
397 in.close();
398
399 scop.setVersion(version);
400 }
401
402 public void runDssp(String dsspExecutable, String dsspParameters) throws IOException {
403 runDssp(dsspExecutable, dsspParameters, SecStrucElement.ReducedState.FOURSTATE, SecStrucElement.ReducedState.FOURSTATE);
404 }
405
406 /**
407 * Runs an external DSSP executable and (re)assigns the secondary structure annotation from the parsed output.
408 * Existing secondary structure information will be overwritten.
409 * As of September 2007, a DSSP executable can be downloaded from http://swift.cmbi.ru.nl/gv/dssp/
410 * after filling out a license agreement. For this version, the dsspParamters variable should be
411 * set to "--" (two hyphens).
412 */
413 public void runDssp(String dsspExecutable, String dsspParameters, SecStrucElement.ReducedState state4Type, SecStrucElement.ReducedState state4Id) throws IOException {
414 String startLine = " # RESIDUE AA STRUCTURE BP1 BP2 ACC";
415 String line;
416 int lineCount = 0;
417 char ssType, sheetLabel;
418 TreeMap<Integer, Character> ssTypes;
419 TreeMap<Integer, Character> sheetLabels;
420 int resNum;
421 String resNumStr;
422 File test = new File(dsspExecutable);
423 if(!test.canRead()) throw new IOException("DSSP Executable is not readable");
424 Process myDssp = Runtime.getRuntime().exec(dsspExecutable + " " + dsspParameters);
425 PrintWriter dsspInput = new PrintWriter(myDssp.getOutputStream());
426 BufferedReader dsspOutput = new BufferedReader(new InputStreamReader(myDssp.getInputStream()));
427 BufferedReader dsspError = new BufferedReader(new InputStreamReader(myDssp.getErrorStream()));
428 writeAtomLines(dsspInput, true); // pipe atom lines to dssp
429 dsspInput.close();
430 ssTypes = new TreeMap<Integer,Character>();
431 sheetLabels = new TreeMap<Integer,Character>();
432 while((line = dsspOutput.readLine()) != null) {
433 lineCount++;
434 if(line.startsWith(startLine)) {
435 //System.out.println("Dssp Output: ");
436 break;
437 }
438 }
439 while((line = dsspOutput.readLine()) != null) {
440 lineCount++;
441 resNumStr = line.substring(5,10).trim();
442 ssType = line.charAt(16);
443 sheetLabel = line.charAt(33);
444 if (state4Id == SecStrucElement.ReducedState.FOURSTATE && SecStrucElement.getReducedStateTypeFromDsspType(ssType, state4Id) == SecStrucElement.OTHER) {
445 sheetLabel = ' ';
446 }
447 if(!resNumStr.equals("")) { // this should only happen if dssp inserts a line indicating a chain break
448 try {
449 resNum = Integer.valueOf(resNumStr);
450 ssTypes.put(resNum, ssType);
451 sheetLabels.put(resNum, sheetLabel);
452 } catch (NumberFormatException e) {
453 System.err.println("Error while parsing DSSP output. Expected residue number, found '" + resNumStr + "' in line " + lineCount);
454 }
455 }
456 }
457 //for(char c:ssTypes) {System.out.print(c);}; System.out.println(".");
458 dsspOutput.close();
459 dsspError.close();
460
461 if(ssTypes.size() == 0) {
462 throw new IOException("No DSSP output found.");
463 }
464
465 if(ssTypes.size() != get_length()) { // compare with number of observed residues
466 System.err.println("Error: DSSP output size (" + ssTypes.size() + ") does not match number of observed residues in structure (" + get_length() + ").");
467 }
468
469 // assign secondary structure
470 this.secondaryStructure = new SecondaryStructure(); // forget the old annotation
471 char lastType = SecStrucElement.getReducedStateTypeFromDsspType(ssTypes.get(ssTypes.firstKey()), state4Id);
472 int lastResSer = ssTypes.firstKey();
473 char lastSheet = sheetLabels.get(lastResSer);
474 char thisType, thisSheet, reducedType;
475 int start = 1;
476 int elementCount = 0;
477 SecStrucElement ssElem;
478 String ssId;
479 for(int resSer:ssTypes.keySet()) {
480 thisType = SecStrucElement.getReducedStateTypeFromDsspType(ssTypes.get(resSer), state4Id);
481 thisSheet = sheetLabels.get(resSer);
482 if(thisType != lastType || thisSheet != lastSheet || resSer > lastResSer+1) {
483 // finish previous element, start new one
484 elementCount++;
485 reducedType = SecStrucElement.getReducedStateTypeFromDsspType(ssTypes.get(lastResSer), state4Type);
486 ssId = new Character(lastType).toString() + (lastSheet==' '?"":String.valueOf(lastSheet)) + new Integer(elementCount).toString();
487 ssElem = new SecStrucElement(reducedType,start,lastResSer,ssId);
488 secondaryStructure.add(ssElem);
489 start = resSer;
490 lastType = thisType;
491 lastSheet = thisSheet;
492 }
493 lastResSer = resSer;
494 }
495 // finish last element
496 elementCount++;
497 reducedType = SecStrucElement.getReducedStateTypeFromDsspType(ssTypes.get(ssTypes.lastKey()), state4Type);
498 ssId = new Character(lastType).toString() + (lastSheet==' '?"":String.valueOf(lastSheet)) + new Integer(elementCount).toString();
499 ssElem = new SecStrucElement(reducedType, start,ssTypes.lastKey(),ssId);
500 secondaryStructure.add(ssElem);
501
502 secondaryStructure.setComment("DSSP");
503 }
504
505 /** Writes atom lines for this structure to the given output stream */
506 private void writeAtomLines(PrintWriter Out, boolean pdbCompatible) {
507 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
508 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
509 String chainCodeStr = chainCode;
510 if (pdbCompatible) {
511 chainCodeStr = chainCode.substring(0,1);
512 }
513 for (String resser_atom:resser_atom2atomserial.keySet()){
514 int atomserial = resser_atom2atomserial.get(resser_atom);
515 int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
516 String atom = resser_atom.split("_")[1];
517 String res_type = resser2restype.get(res_serial);
518 Point3d coords = atomser2coord.get(atomserial);
519 String atomType = atom.substring(0,1);
520 Object[] fields = {atomserial, atom, res_type, chainCodeStr, res_serial, coords.x, coords.y, coords.z, atomType};
521 lines.put(atomserial, fields);
522 }
523 for (int atomserial:lines.keySet()){
524 // Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales)
525 Out.printf(Locale.US,"ATOM %5d %-3s %3s %1s%4d %8.3f%8.3f%8.3f 1.00 0.00 %s\n",lines.get(atomserial));
526 }
527 Out.println("END");
528 Out.close();
529 }
530
531 /**
532 * Dumps coordinate data into a file in pdb format (ATOM lines only)
533 * The residue serials dumped are the internal ones.
534 * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
535 * @param outfile
536 * @throws IOException
537 */
538 public void dump2pdbfile(String outfile) throws IOException {
539 dump2pdbfile(outfile, false);
540 }
541
542 public void dump2pdbfile(String outfile, boolean pdbCompatible) throws IOException {
543 PrintWriter Out = new PrintWriter(new FileOutputStream(outfile));
544 writeAtomLines(Out, pdbCompatible);
545 }
546
547 /**
548 * Dump the full sequence of this Pdb object in fasta file format
549 * @param seqfile
550 * @throws IOException
551 */
552 public void dumpseq(String seqfile) throws IOException {
553 PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
554 Out.println(">"+pdbCode+"_"+pdbChainCode);
555 Out.println(sequence);
556 Out.close();
557 }
558
559 /**
560 * Returns the number of observed standard residues.
561 * TODO: Refactor method name
562 */
563 public int get_length(){
564 return obsLength;
565 }
566
567 /** Returns the number of residues in the sequence of this protein. */
568 public int getFullLength() {
569 return fullLength;
570 }
571
572 /**
573 * Returns number of (non-Hydrogen) atoms in the protein
574 * @return
575 */
576 public int getNumAtoms() {
577 return atomser2atom.size();
578 }
579
580 /**
581 * Gets a TreeMap with atom serials as keys and their coordinates as values for the given contact type
582 * The contact type can't be a cross contact type, it doesn't make sense here
583 * @param ct
584 * @return
585 */
586 private TreeMap<Integer,Point3d> get_coords_for_ct(String ct) {
587 TreeMap<Integer,Point3d> coords = new TreeMap<Integer,Point3d>();
588 for (int resser:resser2restype.keySet()){
589 Set<String> atoms = AAinfo.getAtomsForCTAndRes(ct, resser2restype.get(resser));
590 for (String atom:atoms){
591 if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
592 int atomser = resser_atom2atomserial.get(resser+"_"+atom);
593 Point3d coord = atomser2coord.get(atomser);
594 coords.put(atomser, coord);
595 }
596 else {
597 //System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
598 }
599 }
600 // in cts ("ALL","BB") we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue)
601 if ((ct.equals("ALL") || ct.equals("BB")) && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
602 int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
603 Point3d coord = atomser2coord.get(atomser);
604 coords.put(atomser, coord);
605 }
606 }
607 return coords;
608 }
609
610 /**
611 * Gets a TreeMap with residue serials+"_"+atomname as keys and their coordinates as values for the given contact type
612 * The contact type can't be a cross contact type, it doesn't make sense here
613 * Used in rmsd method
614 * @param ct
615 * @return
616 */
617 private TreeMap<String,Point3d> get_coords_for_ct_4rmsd(String ct) {
618 TreeMap<String,Point3d> coords = new TreeMap<String,Point3d>();
619 for (int resser:resser2restype.keySet()){
620 Set<String> atoms = AAinfo.getAtomsForCTAndRes(ct,resser2restype.get(resser));
621 for (String atom:atoms){
622 if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
623 int atomser = resser_atom2atomserial.get(resser+"_"+atom);
624 Point3d coord = atomser2coord.get(atomser);
625 coords.put(resser+"_"+atom, coord);
626 }
627 }
628 // in cts ("ALL","BB") we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue)
629 if ((ct.equals("ALL") || ct.equals("BB")) && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
630 int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
631 Point3d coord = atomser2coord.get(atomser);
632 coords.put(resser+"_"+"OXT", coord);
633 }
634 }
635 return coords;
636 }
637
638 /**
639 * Returns the distance matrix as a HashMap with residue serial pairs as keys
640 * For multi atom contact types the distance matrix has the minimum distance for each pair of
641 * residues
642 * AAinfo.isValidSingleAtomCT(ct) can be used to check before calling.
643 * @param ct contact type for which distances are being calculated
644 * @return a map which assigns to each edge the corresponding distance
645 */
646 public HashMap<Pair<Integer>, Double> calculate_dist_matrix(String ct){
647 HashMap<Pair<Integer>,Double> distMatrixAtoms = calculate_atom_dist_matrix(ct);
648
649 // mapping atom serials to residue serials
650 // TODO: we could integrate this with the code in calculate_atom_dist_matrix to avoid storing two distance maps in memory
651 HashMap<Pair<Integer>,Double> distMatrixRes = new HashMap<Pair<Integer>, Double>();
652 for (Pair<Integer> cont: distMatrixAtoms.keySet()){
653 int i_resser = get_resser_from_atomser(cont.getFirst());
654 int j_resser = get_resser_from_atomser(cont.getSecond());
655 Pair<Integer> edge = new Pair<Integer>(i_resser,j_resser);
656 if (distMatrixRes.containsKey(edge)) {
657 distMatrixRes.put(edge, Math.min(distMatrixRes.get(edge), distMatrixAtoms.get(cont)));
658 } else {
659 distMatrixRes.put(edge, distMatrixAtoms.get(cont));
660 }
661 }
662
663 return distMatrixRes;
664 }
665
666 /**
667 * Returns the distance matrix as a HashMap with atom serial pairs as keys
668 * This method can be used for any contact type
669 * AAinfo.isValidSingleAtomCT(ct) can be used to check before calling.
670 * @param ct contact type for which distances are being calculated
671 * @return a map which assings to each atom pair the corresponding distance
672 */
673 public HashMap<Pair<Integer>, Double> calculate_atom_dist_matrix(String ct){
674 HashMap<Pair<Integer>,Double> distMatrixAtoms = new HashMap<Pair<Integer>,Double>();
675 if (!ct.contains("/")){
676 TreeMap<Integer,Point3d> coords = get_coords_for_ct(ct);
677 for (int i_atomser:coords.keySet()){
678 for (int j_atomser:coords.keySet()){
679 if (j_atomser>i_atomser) {
680 Pair<Integer> pair = new Pair<Integer>(i_atomser,j_atomser);
681 distMatrixAtoms.put(pair, coords.get(i_atomser).distance(coords.get(j_atomser)));
682 }
683 }
684 }
685 } else {
686 String i_ct = ct.split("/")[0];
687 String j_ct = ct.split("/")[1];
688 TreeMap<Integer,Point3d> i_coords = get_coords_for_ct(i_ct);
689 TreeMap<Integer,Point3d> j_coords = get_coords_for_ct(j_ct);
690 for (int i_atomser:i_coords.keySet()){
691 for (int j_atomser:j_coords.keySet()){
692 if (j_atomser!=i_atomser){
693 Pair<Integer> pair = new Pair<Integer>(i_atomser,j_atomser);
694 distMatrixAtoms.put(pair, i_coords.get(i_atomser).distance(j_coords.get(j_atomser)));
695 }
696 }
697 }
698 }
699
700 return distMatrixAtoms;
701 }
702
703 /**
704 * Calculates the radius of gyration of this Pdb
705 * (defined as half of the maximum distance between any 2 CA atoms)
706 * @return
707 */
708 public double calculateRadGyration() {
709 //TODO this is a very raw implementation o(n^2): should optimise it if that's really needed
710 return Collections.max(this.calculate_atom_dist_matrix("Ca").values())/2;
711 }
712
713 /**
714 * Get the graph for given contact type and cutoff for this Pdb object.
715 * Returns a Graph object with the contacts
716 * We do geometric hashing for fast contact computation (without needing to calculate full distance matrix)
717 * @param ct
718 * @param cutoff
719 * @return
720 */
721 private AIGraph getAIGraph(String ct, double cutoff){
722 TreeMap<Integer,Point3d> i_coords = null;
723 TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types
724 boolean crossed = false;
725 if (!ct.contains("/")){
726 i_coords = get_coords_for_ct(ct);
727 crossed = false;
728 } else {
729 String i_ct = ct.split("/")[0];
730 String j_ct = ct.split("/")[1];
731 i_coords = get_coords_for_ct(i_ct);
732 j_coords = get_coords_for_ct(j_ct);
733 crossed = true;
734 }
735 int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
736 int[] j_atomserials = null;
737
738 int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A)
739
740 int boxSize = (int) Math.floor(cutoff*SCALE);
741
742 HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
743 int i=0;
744 for (int i_atomser:i_coords.keySet()){
745 //coordinates for atom serial atomser, we will use i as its identifier below
746 Point3d coord = i_coords.get(i_atomser);
747 int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
748 int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
749 int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
750 Point3i floor = new Point3i(floorX,floorY,floorZ);
751 if (boxes.containsKey(floor)){
752 // we put the coords for atom i in its corresponding box (identified by floor)
753 boxes.get(floor).put_i_Point(i, coord);
754 if (!crossed){
755 boxes.get(floor).put_j_Point(i, coord);
756 }
757 } else {
758 Box box = new Box(floor);
759 box.put_i_Point(i, coord);
760 if (!crossed){
761 box.put_j_Point(i, coord);
762 }
763 boxes.put(floor,box);
764 }
765 i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
766 i++;
767 }
768 int j=0;
769 if (crossed) {
770 j_atomserials = new int[j_coords.size()];
771 for (int j_atomser:j_coords.keySet()){
772 //coordinates for atom serial atomser, we will use j as its identifier below
773 Point3d coord = j_coords.get(j_atomser);
774 int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
775 int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
776 int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
777 Point3i floor = new Point3i(floorX,floorY,floorZ);
778 if (boxes.containsKey(floor)){
779 // we put the coords for atom j in its corresponding box (identified by floor)
780 boxes.get(floor).put_j_Point(j, coord);
781 } else {
782 Box box = new Box(floor);
783 box.put_j_Point(j, coord);
784 boxes.put(floor,box);
785 }
786 j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
787 j++;
788 }
789 } else {
790 j_atomserials = i_atomserials;
791 }
792
793
794 float[][]distMatrix = new float[i_atomserials.length][j_atomserials.length];
795
796 for (Point3i floor:boxes.keySet()){ // for each box
797 // distances of points within this box
798 boxes.get(floor).getDistancesWithinBox(distMatrix,crossed);
799
800 //TODO should iterate only through half of the neighbours here
801 // distances of points from this box to all neighbouring boxes: 26 iterations (26 neighbouring boxes)
802 for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){
803 for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){
804 for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){
805 if (!((x==floor.x)&&(y==floor.y)&&(z==floor.z))) { // skip this box
806 Point3i neighbor = new Point3i(x,y,z);
807 if (boxes.containsKey(neighbor)){
808 boxes.get(floor).getDistancesToNeighborBox(boxes.get(neighbor),distMatrix,crossed);
809 }
810 }
811 }
812 }
813 }
814 }
815
816 // creating the AIGraph
817 AIGraph graph = new AIGraph();
818 TreeMap<Integer,AIGNode> aignodemap = new TreeMap<Integer,AIGNode>();
819 TreeMap<Integer,RIGNode> rignodemap = new TreeMap<Integer,RIGNode>();
820 TreeSet<Integer> atomSerials = new TreeSet<Integer>();
821 atomSerials.addAll(i_coords.keySet());
822 if (j_coords!=null){
823 atomSerials.addAll(j_coords.keySet());
824 }
825 // adding the AIGNodes (including parent RIGNode references)
826 SecondaryStructure secondaryStructureCopy = secondaryStructure.copy();
827 for (int atomSer:atomSerials) {
828 int resser = get_resser_from_atomser(atomSer);
829 SecStrucElement sselem = secondaryStructureCopy.getSecStrucElement(resser);
830 if (!rignodemap.containsKey(resser)) {
831 // NOTE!: we are passing references to the SecStrucElement objects! they point to the same objects as secondaryStructureCopy
832 RIGNode resNode = new RIGNode(resser, resser2restype.get(resser), sselem);
833 rignodemap.put(resser,resNode);
834 }
835 AIGNode atomNode = new AIGNode(atomSer,atomser2atom.get(atomSer),rignodemap.get(resser));
836 aignodemap.put(atomSer,atomNode);
837 graph.addVertex(atomNode);
838 }
839
840 graph.setSecondaryStructure(secondaryStructureCopy);
841 graph.setSerials2NodesMap(aignodemap);
842 graph.setCutoff(cutoff);
843 graph.setSequence(sequence);
844 graph.setPdbCode(pdbCode);
845 graph.setChainCode(chainCode);
846 graph.setPdbChainCode(pdbChainCode);
847 graph.setModel(model);
848 graph.setSid(sid);
849 graph.setCrossed(crossed);
850
851 // populating the AIGraph with AIGEdges
852 for (i=0;i<distMatrix.length;i++){
853 for (j=0;j<distMatrix[i].length;j++){
854 // the condition distMatrix[i][j]!=0.0 takes care of skipping several things:
855 // - diagonal of the matrix in case of non-crossed
856 // - lower half of matrix in case of non-crossed
857 // - cells for which we didn't calculate a distance because the 2 points were not in same or neighbouring boxes (i.e. too far apart)
858 if (distMatrix[i][j]!=0.0f && distMatrix[i][j]<=cutoff){
859 if (!crossed) {
860 graph.addEdge(new AIGEdge(distMatrix[i][j]), aignodemap.get(i_atomserials[i]), aignodemap.get(j_atomserials[j]), EdgeType.UNDIRECTED);
861 }
862 // This condition is to take care of crossed contact types that have overlapping sets of atoms:
863 // the matrix would contain both i,j and j,i but that's only 1 edge in the AIGraph
864 //TODO if our AIGraph didn't allow parallel edges, this extra check woudn't be necessary
865 else if (graph.findEdge(aignodemap.get(i_atomserials[i]), aignodemap.get(j_atomserials[j]))==null) {
866 graph.addEdge(new AIGEdge(distMatrix[i][j]), aignodemap.get(i_atomserials[i]), aignodemap.get(j_atomserials[j]), EdgeType.UNDIRECTED);
867 }
868 }
869
870 }
871 }
872
873 return graph;
874 }
875
876 /**
877 * Returns a RIGraph for given contact type and cutoff
878 * @param ct
879 * @param cutoff
880 * @return
881 */
882 public RIGraph get_graph(String ct, double cutoff, boolean directed) {
883 //NOTE:To generate forbidden graphs, AIGraph should become directed and
884 //-the check for adding parallel atomic edges if crossed should be removed
885 //-atomic edges in both directions should be added if !crossed
886 //-make sure in getRIGraph for undirected graphs not to double-count atomic edges
887 if (directed && AAinfo.isOverlapping(ct)) {
888 throw new IllegalArgumentException();
889 }
890
891 String[] cts = ct.split("\\+");
892 AIGraph atomGraph = getAIGraph(cts[0], cutoff);
893 for(int i=1; i< cts.length; i++) {
894 atomGraph.addGraph(getAIGraph(cts[i], cutoff));
895 }
896 RIGraph graph = atomGraph.getRIGraph(directed);
897
898 graph.setContactType(ct);
899 graph.setCutoff(cutoff);
900
901 return graph;
902 }
903
904 public RIGraph get_graph(String ct, double cutoff) {
905 // TODO eventually we should use the ContactType class as parameter
906 boolean crossed = false;
907 if (ct.contains("/")) {
908 crossed = true;
909 }
910 return get_graph(ct, cutoff, crossed);
911 }
912
913 /**
914 * Returns an all atom graph in a AIGraph object
915 * @param cutoff
916 * @return
917 */
918 public AIGraph getAllAtomGraph(double cutoff) {
919 return this.getAIGraph("ALL", cutoff);
920 }
921
922 public void calcGridDensity(String ct, double cutoff, Map<Integer, Integer> densityCount) {
923 TreeMap<Integer,Point3d> i_coords = null;
924 TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymmetric edge types
925 boolean directed = false;
926 if (!ct.contains("/")){
927 i_coords = get_coords_for_ct(ct); // mapping from atom serials to coordinates
928 directed = false;
929 } else {
930 String i_ct = ct.split("/")[0];
931 String j_ct = ct.split("/")[1];
932 i_coords = get_coords_for_ct(i_ct);
933 j_coords = get_coords_for_ct(j_ct);
934 directed = true;
935 }
936 int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
937 int[] j_atomserials = null;
938
939 int SCALE=100; // i.e. we use units of hundredths of Angstroms (thus cutoffs can be specified with a maximum precission of 0.01A)
940
941 int boxSize = (int) Math.floor(cutoff*SCALE);
942
943 HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
944 int i=0;
945 for (int i_atomser:i_coords.keySet()){
946 //coordinates for atom serial atomser, we will use i as its identifier below
947 Point3d coord = i_coords.get(i_atomser);
948 int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
949 int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
950 int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
951 Point3i floor = new Point3i(floorX,floorY,floorZ);
952 if (boxes.containsKey(floor)){
953 // we put the coords for atom i in its corresponding box (identified by floor)
954 boxes.get(floor).put_i_Point(i, coord);
955 if (!directed){
956 boxes.get(floor).put_j_Point(i, coord);
957 }
958 } else {
959 Box box = new Box(floor);
960 box.put_i_Point(i, coord);
961 if (!directed){
962 box.put_j_Point(i, coord);
963 }
964 boxes.put(floor,box);
965 }
966 i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
967 i++;
968 }
969 int j=0;
970 if (directed) {
971 j_atomserials = new int[j_coords.size()];
972 for (int j_atomser:j_coords.keySet()){
973 //coordinates for atom serial atomser, we will use j as its identifier below
974 Point3d coord = j_coords.get(j_atomser);
975 int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
976 int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
977 int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
978 Point3i floor = new Point3i(floorX,floorY,floorZ);
979 if (boxes.containsKey(floor)){
980 // we put the coords for atom j in its corresponding box (identified by floor)
981 boxes.get(floor).put_j_Point(j, coord);
982 } else {
983 Box box = new Box(floor);
984 box.put_j_Point(j, coord);
985 boxes.put(floor,box);
986 }
987 j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
988 j++;
989 }
990 } else {
991 j_atomserials = i_atomserials;
992 }
993
994 // count density
995 for(Point3i floor:boxes.keySet()) {
996 //int size = boxes.get(floor).size();
997 int size = getNumGridNbs(boxes, floor, boxSize); // count number of neighbouring grid cells with points in them
998 if(densityCount.containsKey(size)) {
999 int old = densityCount.get(size);
1000 densityCount.put(size, ++old);
1001 } else {
1002 densityCount.put(size, 1);
1003 }
1004 }
1005
1006
1007 }
1008
1009 /**
1010 * Calculates and returns the difference of the distance maps of this
1011 * structure and another pdb object. On error returns null. Note that
1012 * the distance maps for the single structures have to have the same
1013 * size.
1014 * @param contactType1 contact type of this structure
1015 * @param pdb2 the second structure
1016 * @param contactType2 contact type of the second structure
1017 * @return the difference distance map
1018 * @see #getDiffDistMap(String, Pdb, String, Alignment, String, String)
1019 */
1020 public HashMap<Pair<Integer>,Double> getDiffDistMap(String contactType1, Pdb pdb2, String contactType2) {
1021 double dist1, dist2, diff;
1022 HashMap<Pair<Integer>,Double> otherDistMatrix = pdb2.calculate_dist_matrix(contactType2);
1023 HashMap<Pair<Integer>,Double> thisDistMatrix = this.calculate_dist_matrix(contactType1);
1024 if(thisDistMatrix.size() != otherDistMatrix.size()) {
1025 System.err.println("Cannot calculate difference distance map. Matrix sizes do not match.");
1026 return null;
1027 }
1028 for(Pair<Integer> e:otherDistMatrix.keySet()){
1029 dist1 = otherDistMatrix.get(e);
1030 if(!thisDistMatrix.containsKey(e)) {
1031 System.err.println("Error while calculating difference distance map. Entry " + e + " in matrix1 not not found in matrix2.");
1032 return null;
1033 }
1034 dist2 = thisDistMatrix.get(e);
1035 diff = Math.abs(dist1-dist2);
1036 otherDistMatrix.put(e, diff);
1037 }
1038 return otherDistMatrix;
1039 }
1040
1041 /**
1042 * Calculates the difference distance map of this structure and
1043 * another pdb object given a sequence alignment of the structures. The
1044 * resulting difference distance map may contains non-defined distances.
1045 * This behavior is due to the alignment. If any residue in either
1046 * structures is aligned with a gap one cannot assign a "difference
1047 * distance" to another residue pair.
1048 * @param contactType1 contact type of this structure
1049 * @param pdb2 the second structure
1050 * @param contactType2 contact type of the second structure
1051 * @param ali sequence alignment of both structures
1052 * @param name1 sequence tag of the this structure in the alignment
1053 * @param name2 sequence tag og the second structure in the alignment
1054 * @return the difference distance map
1055 * @see #getDiffDistMap(String, Pdb, String)
1056 */
1057 public HashMap<Pair<Integer>,Double> getDiffDistMap(String contactType1, Pdb pdb2, String contactType2, Alignment ali, String name1, String name2) {
1058
1059 HashMap<Pair<Integer>,Double> otherDistMatrix = pdb2.calculate_dist_matrix(contactType2);
1060 HashMap<Pair<Integer>,Double> thisDistMatrix = this.calculate_dist_matrix(contactType1);
1061 HashMap<Pair<Integer>,Double> alignedDistMatrix = new HashMap<Pair<Integer>, Double>(Math.min(this.getFullLength(), pdb2.getFullLength()));
1062 int i1,i2,j1,j2;
1063 TreeSet<Integer> unobserved1 = new TreeSet<Integer>();
1064 TreeSet<Integer> unobserved2 = new TreeSet<Integer>();
1065
1066 // detect all unobserved residues
1067 for(int i = 0; i < ali.getAlignmentLength(); ++i) {
1068 i1 = ali.al2seq(name1, i);
1069 i2 = ali.al2seq(name2, i);
1070 if( i1 != -1 && !hasCoordinates(i1) ) {
1071 unobserved1.add(i1);
1072 }
1073 if( i2 != -1 && !pdb2.hasCoordinates(i2) ) {
1074 unobserved2.add(i2);
1075 }
1076 }
1077
1078 // strategy: we always have to look through the alignment to say
1079 // whether a difference distance can be assigned to a pair of
1080 // corresponding residues. To put it differently, for any two
1081 // alignment columns one always has to ensure that both columns
1082 // only contain observed residues (no gaps!), otherwise the one
1083 // cannot obtain a distance in at least one structure as a gap
1084 // indicates "no coordinates available".
1085
1086 for(int i = 0; i < ali.getAlignmentLength()-1; ++i) {
1087
1088 i1 = ali.al2seq(name1, i);
1089 i2 = ali.al2seq(name2, i);
1090
1091 // alignment columns must not contain gap characters and both
1092 // residues in the current column have to be observed!
1093 if( i1 == -1 || i2 == -1 || unobserved1.contains(i1) || unobserved2.contains(i2) ) {
1094 continue;
1095 }
1096
1097 for(int j = i + 1; j < ali.getAlignmentLength(); ++j) {
1098
1099 j1 = ali.al2seq(name1, j);
1100 j2 = ali.al2seq(name2, j);
1101
1102 if( j1 == -1 || j2 == -1 || unobserved1.contains(j1) || unobserved2.contains(j2) ) {
1103 continue;
1104 }
1105
1106 // make the edges
1107 Pair<Integer> e1 = new Pair<Integer>(i1,j1);
1108 Pair<Integer> e2 = new Pair<Integer>(i2,j2);
1109
1110 alignedDistMatrix.put(new Pair<Integer>(i+1,j+1),Math.abs(thisDistMatrix.get(e1)-otherDistMatrix.get(e2)));
1111 }
1112 }
1113 return alignedDistMatrix;
1114 }
1115 // TODO: Version of this where already buffered distance matrices are passed as paremeters
1116
1117 /** Returns the number of neighbours of this grid cell */
1118 private int getNumGridNbs(HashMap<Point3i,Box> boxes, Point3i floor, int boxSize) {
1119 Point3i neighbor;
1120 int nbs = 0;
1121 for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){
1122 for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){
1123 for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){
1124 neighbor = new Point3i(x,y,z);
1125 if (boxes.containsKey(neighbor)) nbs++;
1126 }
1127 }
1128 }
1129 // compensate for counting myself as a neighbour
1130 if(boxes.containsKey(floor)) nbs--;
1131 return nbs;
1132 }
1133
1134 /**
1135 * Gets the Consurf-HSSP score given an internal residue serial
1136 * @param resser
1137 * @return
1138 */
1139 public Double getConsurfhsspScoreFromResSerial(int resser){
1140 if (resser2consurfhsspscore != null) {
1141 if (resser2consurfhsspscore.get(resser) != null) {
1142 return resser2consurfhsspscore.get(resser);
1143 } else {
1144 return null;
1145 }
1146 } else {
1147 return null;
1148 }
1149 }
1150
1151 /**
1152 * Gets the Consurf-HSSP color rsa given an internal residue serial
1153 * @param resser
1154 * @return
1155 */
1156 public Integer getConsurfhsspColorFromResSerial(int resser){
1157 if (resser2consurfhsspcolor != null) {
1158 if (resser2consurfhsspcolor.get(resser) != null) {
1159 return resser2consurfhsspcolor.get(resser);
1160 } else {
1161 return null;
1162 }
1163 } else {
1164 return null;
1165 }
1166 }
1167
1168 /**
1169 * Gets the all atoms rsa given an internal residue serial
1170 * @param resser
1171 * @return
1172 */
1173 public Double getAllRsaFromResSerial(int resser){
1174 if (resser2allrsa != null) {
1175 if (resser2allrsa.get(resser) != null) {
1176 return resser2allrsa.get(resser);
1177 } else {
1178 return null;
1179 }
1180 } else {
1181 return null;
1182 }
1183 }
1184
1185 /**
1186 * Gets the sc rsa given an internal residue serial
1187 * @param resser
1188 * @return
1189 */
1190 public Double getScRsaFromResSerial(int resser){
1191 if (resser2scrsa != null) {
1192 if (resser2scrsa.get(resser) != null) {
1193 return resser2scrsa.get(resser);
1194 } else {
1195 return null;
1196 }
1197 } else {
1198 return null;
1199 }
1200 }
1201
1202 /**
1203 * Gets the internal residue serial (cif) given a pdb residue serial (author assignment)
1204 * TODO refactor
1205 * @param pdbresser
1206 * @return
1207 */
1208 public int get_resser_from_pdbresser (String pdbresser){
1209 return pdbresser2resser.get(pdbresser);
1210 }
1211
1212 /**
1213 * Gets the pdb residue serial (author assignment) given an internal residue serial (cif)
1214 * TODO refactor
1215 * @param resser
1216 * @return
1217 */
1218 public String get_pdbresser_from_resser (int resser){
1219 return resser2pdbresser.get(resser);
1220 }
1221
1222 /**
1223 * Gets the residue serial given an atom serial
1224 * TODO refactor
1225 * @param atomser
1226 * @return
1227 */
1228 public int get_resser_from_atomser(int atomser){
1229 return atomser2resser.get(atomser);
1230 }
1231
1232 public String getResTypeFromResSerial(int resser) {
1233 return resser2restype.get(resser);
1234 }
1235
1236 /**
1237 * Gets the atom serial given the residue serial and atom name
1238 * @param resser
1239 * @param atom
1240 * @return
1241 */
1242 public int getAtomSerFromResSerAndAtom(int resser, String atom) {
1243 return resser_atom2atomserial.get(resser+"_"+atom);
1244 }
1245
1246 /**
1247 * Checks whether the given residue serial has any associated coordinates.
1248 * @param ser the residue serial
1249 * @return true if there is at least one atom with valid coordinates, else false
1250 */
1251 public boolean hasCoordinates(int resser) {
1252 return atomser2resser.values().contains(resser);
1253 }
1254
1255 /**
1256 * Returns true if this Pdb has been restricted to a specific SCOP domain
1257 * @return
1258 */
1259 public boolean isRestrictedToScopDomain() {
1260 return sid!=null;
1261 }
1262
1263 /**
1264 * Returns the sid of this Pdb
1265 * It is set when restrictToScopDomain is run
1266 */
1267 public String getSid() {
1268 return sid;
1269 }
1270
1271 /**
1272 * Gets the atom coordinates (Point3d object) given the atom serial
1273 * @param atomser
1274 * @return
1275 */
1276 public Point3d getAtomCoord(int atomser) {
1277 return this.atomser2coord.get(atomser);
1278 }
1279
1280 /**
1281 * Gets all atom serials in a Set
1282 * @return
1283 */
1284 public Set<Integer> getAllAtomSerials() {
1285 return this.atomser2resser.keySet();
1286 }
1287
1288 /**
1289 * Gets the 4 letter pdb code identifying this structure
1290 * @return
1291 */
1292 public String getPdbCode() {
1293 return this.pdbCode;
1294 }
1295
1296 /**
1297 * Gets the internal chain code (cif)
1298 * @return
1299 */
1300 public String getChainCode(){
1301 return this.chainCode;
1302 }
1303
1304 /**
1305 * Gets the pdb chain code (author assignment)
1306 * @return
1307 */
1308 public String getPdbChainCode(){
1309 return this.pdbChainCode;
1310 }
1311
1312 /**
1313 * Gets the sequence
1314 * @return
1315 */
1316 public String getSequence() {
1317 return sequence;
1318 }
1319
1320 /**
1321 * True if this Pdb has the sequence field set to not blank
1322 * @return
1323 */
1324 public boolean hasSequence() {
1325 if (sequence==null) return false;
1326 return !sequence.equals("");
1327 }
1328
1329 // csa related methods
1330
1331 /**
1332 * Returns true if csa information is available, false otherwise.
1333 */
1334 public boolean hasCSA() {
1335 if (catalSiteSet == null) {
1336 return false;
1337 } else if (catalSiteSet.isEmpty()) {
1338 return false;
1339 } else {
1340 return true;
1341 }
1342 }
1343
1344 /**
1345 * Returns the csa annotation object of this graph.
1346 */
1347 public CatalSiteSet getCSA() {
1348 return catalSiteSet;
1349 }
1350
1351 // ec related methods
1352
1353 /**
1354 * Returns true if ec information is available, false otherwise.
1355 */
1356 public boolean hasEC() {
1357 if (ec == null) {
1358 return false;
1359 } else if (ec.isEmpty()) {
1360 return false;
1361 } else {
1362 return true;
1363 }
1364 }
1365
1366 /**
1367 * Returns the ec annotation object of this graph.
1368 */
1369 public EC getEC() {
1370 return ec;
1371 }
1372
1373 // end of secop related methods
1374
1375 // scop related methods
1376
1377 /**
1378 * Returns true if scop information is available, false otherwise.
1379 */
1380 public boolean hasScop() {
1381 if (scop == null) {
1382 return false;
1383 } else if (scop.isEmpty()) {
1384 return false;
1385 } else {
1386 return true;
1387 }
1388 }
1389
1390 /**
1391 * Returns the scop annotation object of this graph.
1392 */
1393 public Scop getScop() {
1394 return scop;
1395 }
1396
1397 // end of secop related methods
1398
1399 // secondary structure related methods
1400
1401 /**
1402 * Returns true if secondary structure information is available, false otherwise.
1403 */
1404 public boolean hasSecondaryStructure() {
1405 return !this.secondaryStructure.isEmpty();
1406 }
1407
1408 /**
1409 * Returns the secondary structure annotation object of this graph.
1410 */
1411 public SecondaryStructure getSecondaryStructure() {
1412 return this.secondaryStructure;
1413 }
1414
1415 // end of secondary structure related methods
1416
1417 /**
1418 * Calculates rmsd (on atoms given by ct) of this Pdb object to otherPdb object
1419 * Both objects must represent structures with same sequence (save unobserved residues or missing atoms)
1420 *
1421 * @param otherPdb
1422 * @param ct the contact type (crossed contact types don't make sense here)
1423 * @return
1424 * @throws ConformationsNotSameSizeError
1425 */
1426 public double rmsd(Pdb otherPdb, String ct) throws ConformationsNotSameSizeError {
1427 TreeMap<String,Point3d> thiscoords = this.get_coords_for_ct_4rmsd(ct);
1428 TreeMap<String,Point3d> othercoords = otherPdb.get_coords_for_ct_4rmsd(ct);
1429
1430 // there might be unobserved residues or some missing atoms for a residue
1431 // here we get the ones that are in common
1432 ArrayList<String> common = new ArrayList<String>();
1433 for (String resser_atom:thiscoords.keySet()){
1434 if (othercoords.containsKey(resser_atom)){
1435 common.add(resser_atom);
1436 }
1437 }
1438
1439 // converting the TreeMaps to Vector3d arrays (input format for calculate_rmsd)
1440 Vector3d[] conformation1 = new Vector3d[common.size()];
1441 Vector3d[] conformation2 = new Vector3d[common.size()];
1442 int i = 0;
1443 for (String resser_atom:common){
1444 conformation1[i] = new Vector3d(thiscoords.get(resser_atom).x,thiscoords.get(resser_atom).y,thiscoords.get(resser_atom).z);
1445 conformation2[i] = new Vector3d(othercoords.get(resser_atom).x,othercoords.get(resser_atom).y,othercoords.get(resser_atom).z);
1446 i++;
1447 }
1448
1449 // this as well as calculating the rmsd, changes conformation1 and conformation2 to be optimally superposed
1450 double rmsd = calculate_rmsd(conformation1, conformation2);
1451
1452 // // printing out individual distances (conformation1 and conformation2 are now optimally superposed)
1453 // for (i=0;i<conformation1.length;i++){
1454 // Point3d point1 = new Point3d(conformation1[i].x,conformation1[i].y,conformation1[i].z);
1455 // Point3d point2 = new Point3d(conformation2[i].x,conformation2[i].y,conformation2[i].z);
1456 // System.out.println(point1.distance(point2));
1457 // }
1458
1459 return rmsd;
1460
1461 }
1462
1463 /**
1464 * Calculates the RMSD between two conformations.
1465 * conformation1: Vector3d array (matrix of dimensions [N,3])
1466 * conformation2: Vector3d array (matrix of dimensions [N,3])
1467 *
1468 * Both conformation1 and conformation2 are modified to be optimally superposed
1469 *
1470 * Implementation taken (python) from http://bosco.infogami.com/Root_Mean_Square_Deviation,
1471 * then ported to java using Jama matrix package
1472 * (page has moved to: http://boscoh.com/protein/rmsd-root-mean-square-deviation)
1473 * @param conformation1
1474 * @param conformation2
1475 * @return
1476 * @throws ConformationsNotSameSizeError
1477 */
1478 private double calculate_rmsd(Vector3d[] conformation1, Vector3d[] conformation2) throws ConformationsNotSameSizeError{
1479 if (conformation1.length!=conformation2.length) {
1480 //System.err.println("Conformations not the same size");
1481 throw new ConformationsNotSameSizeError(
1482 "Given conformations have different size: conformation1: "+conformation1.length+", conformation2: "+conformation2.length);
1483 }
1484 int n_vec = conformation1.length;
1485
1486 // 1st we bring both conformations to the same centre by subtracting their respectives centres
1487 Vector3d center1 = new Vector3d();
1488 Vector3d center2 = new Vector3d();
1489 for (int i=0;i<n_vec;i++){ // summing all vectors in each conformation
1490 center1.add(conformation1[i]);
1491 center2.add(conformation2[i]);
1492 }
1493 // dividing by n_vec (average)
1494 center1.scale((double)1/n_vec);
1495 center2.scale((double)1/n_vec);
1496 // translating our conformations to the same coordinate system by subtracting centers
1497 for (Vector3d vec:conformation1){
1498 vec.sub(center1);
1499 }
1500 for (Vector3d vec:conformation2){
1501 vec.sub(center2);
1502 }
1503
1504 //E0: initial sum of squared lengths of both conformations
1505 double sum1 = 0.0;
1506 double sum2 = 0.0;
1507 for (int i=0;i<n_vec;i++){
1508 sum1 += conformation1[i].lengthSquared();
1509 sum2 += conformation2[i].lengthSquared();
1510 }
1511 double E0 = sum1 + sum2;
1512
1513 // singular value decomposition
1514 Matrix vecs1 = vector3dAr2matrix(conformation1);
1515 Matrix vecs2 = vector3dAr2matrix(conformation2);
1516
1517 Matrix correlation_matrix = vecs2.transpose().times(vecs1); //gives a 3x3 matrix
1518
1519 SingularValueDecomposition svd = correlation_matrix.svd();
1520 Matrix U = svd.getU();
1521 Matrix V_trans = svd.getV().transpose();
1522 double[] singularValues = svd.getSingularValues();
1523
1524 boolean is_reflection = false;
1525 if (U.det()*V_trans.det()<0.0){
1526 is_reflection = true;
1527 }
1528 if (is_reflection){
1529 // reflect along smallest principal axis:
1530 // we change sign of last coordinate (smallest singular value)
1531 singularValues[singularValues.length-1]=(-1)*singularValues[singularValues.length-1];
1532 }
1533
1534 // getting sum of singular values
1535 double sumSV = 0.0;
1536 for (int i=0;i<singularValues.length;i++){
1537 sumSV += singularValues[i];
1538 }
1539
1540 // rmsd square: Kabsch formula
1541 double rmsd_sq = (E0 - 2.0*sumSV)/((double) n_vec);
1542 rmsd_sq = Math.max(rmsd_sq, 0.0);
1543
1544 // finally we modify conformation2 to be aligned to conformation1
1545 if (is_reflection) { // first we check if we are in is_reflection case: we need to change sign to last row of U
1546 for (int j=0;j<U.getColumnDimension();j++){
1547 // we change sign to last row of U
1548 int lastRow = U.getRowDimension()-1;
1549 U.set(lastRow, j, (-1)*U.get(lastRow,j));
1550 }
1551 }
1552 Matrix optimal_rotation = U.times(V_trans);
1553 Matrix conf2 = vecs2.times(optimal_rotation);
1554 for (int i=0;i<n_vec;i++){
1555 conformation2[i].x = conf2.get(i,0);
1556 conformation2[i].y = conf2.get(i,1);
1557 conformation2[i].z = conf2.get(i,2);
1558 }
1559
1560 return Math.sqrt(rmsd_sq);
1561 }
1562
1563 /** Gets a Jama.Matrix object from a Vector3d[] (deep copies) */
1564 private Matrix vector3dAr2matrix(Vector3d[] vecArray) {
1565 double[][] array = new double[vecArray.length][3];
1566 for (int i=0;i<vecArray.length;i++){
1567 vecArray[i].get(array[i]);
1568 }
1569 return new Matrix(array);
1570 }
1571
1572 /**
1573 * Write residue info to given db, using our db graph aglappe format,
1574 * i.e. tables: residue_info
1575 * @param conn
1576 * @param db
1577 * @throws SQLException
1578 */
1579 public void writeToDb(MySQLConnection conn, String db) throws SQLException{
1580
1581 Statement stmt;
1582 String sql = "";
1583
1584 conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL");
1585
1586 for (int resser:resser2restype.keySet()) {
1587 String resType = AAinfo.threeletter2oneletter(getResTypeFromResSerial(resser));
1588 String pdbresser = get_pdbresser_from_resser(resser);
1589
1590 String secStructType = null;
1591 String secStructId = null;
1592 if (secondaryStructure != null) {
1593 if (secondaryStructure.getSecStrucElement(resser) != null) {
1594 secStructType = quote(String.valueOf(secondaryStructure.getSecStrucElement(resser).getType()));
1595 secStructId = quote(secondaryStructure.getSecStrucElement(resser).getId());
1596 }
1597 }
1598
1599 String scopId = null;
1600 String sccs = null;
1601 String sunid = null;
1602 String orderIn = null;
1603 String domainType = null;
1604 String domainNumReg = null;
1605 if (scop != null) {
1606 if (scop.getScopRegion(resser)!=null) {
1607 ScopRegion sr = scop.getScopRegion(resser);
1608 scopId = quote(sr.getSId());
1609 sccs = quote(sr.getSccs());
1610 sunid = String.valueOf(sr.getSunid());
1611 orderIn = String.valueOf(sr.getOrder());
1612 domainType = quote(String.valueOf(sr.getDomainType()));
1613 domainNumReg = String.valueOf(sr.getNumRegions());
1614 }
1615 }
1616
1617 Double allRsa = getAllRsaFromResSerial(resser);
1618 Double scRsa = getScRsaFromResSerial(resser);
1619
1620 Double consurfhsspScore = getConsurfhsspScoreFromResSerial(resser);
1621 Integer consurfhsspColor = getConsurfhsspColorFromResSerial(resser);
1622
1623 String ecId = null;
1624 if (ec != null) {
1625 if (ec.getECRegion(resser) != null) {
1626 ecId = quote(ec.getECNum(resser));
1627 }
1628 }
1629
1630 String csaNums = null;
1631 String csaChemFuncs = null;
1632 String csaEvids = null;
1633 if (catalSiteSet != null) {
1634 if (catalSiteSet.getCatalSite(resser) != null) {
1635 csaNums = quote(catalSiteSet.getCatalSiteNum(resser));
1636 csaChemFuncs = quote(catalSiteSet.getCatalSiteChemFunc(resser));
1637 csaEvids = quote(catalSiteSet.getCatalSiteEvid(resser));
1638 }
1639 }
1640
1641 sql = "INSERT IGNORE INTO "+db+".pdb_residue_info (pdb_code, chain_code, pdb_chain_code, res_ser, pdb_res_ser, res_type, sstype, ssid, scop_id, sccs, sunid, order_in, domain_type, domain_num_reg, all_rsa, sc_rsa, consurfhssp_score, consurfhssp_color, ec, csa_site_nums, csa_chem_funcs, csa_evid) " +
1642 " VALUES ("+quote(pdbCode)+", "+quote(chainCode)+", "+(pdbChainCode.equals("NULL")?quote("-"):quote(pdbChainCode))+","+resser+", "+quote(pdbresser)+", "+quote(resType)+", "+secStructType+", "+secStructId+", "+scopId+", "+sccs+", "+sunid+", "+orderIn+", "+domainType+", "+domainNumReg+", "+allRsa+", "+scRsa+", "+consurfhsspScore+","+consurfhsspColor+","+ecId+","+csaNums+","+csaChemFuncs+","+csaEvids+")";
1643 //System.out.println(sql);
1644 stmt = conn.createStatement();
1645 stmt.executeUpdate(sql);
1646 stmt.close();
1647 }
1648 }
1649
1650 /**
1651 * Write residue info to given db, using our db graph aglappe format,
1652 * i.e. tables: residue_info
1653 * @param conn
1654 * @param db
1655 * @throws SQLException
1656 */
1657 public void writeToDbFast(MySQLConnection conn, String db) throws SQLException, IOException {
1658
1659 Statement stmt;
1660 String sql = "";
1661
1662 conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL");
1663
1664 PrintStream resOut = new PrintStream(new FileOutputStream(pdbCode+chainCode+"_residues.txt"));
1665
1666 for (int resser:resser2restype.keySet()) {
1667 String resType = AAinfo.threeletter2oneletter(getResTypeFromResSerial(resser));
1668 String pdbresser = get_pdbresser_from_resser(resser);
1669
1670 String secStructType = "\\N";
1671 String secStructId = "\\N";
1672 if (secondaryStructure != null) {
1673 if (secondaryStructure.getSecStrucElement(resser) != null) {
1674 secStructType = String.valueOf(secondaryStructure.getSecStrucElement(resser).getType());
1675 secStructId = secondaryStructure.getSecStrucElement(resser).getId();
1676 }
1677 }
1678
1679 String scopId = "\\N";
1680 String sccs = "\\N";
1681 String sunid = "\\N";
1682 String orderIn = "\\N";
1683 String domainType = "\\N";
1684 String domainNumReg = "\\N";
1685 if (scop != null) {
1686 if (scop.getScopRegion(resser)!=null) {
1687 ScopRegion sr = scop.getScopRegion(resser);
1688 scopId = sr.getSId();
1689 sccs = sr.getSccs();
1690 sunid = String.valueOf(sr.getSunid());
1691 orderIn = String.valueOf(sr.getOrder());
1692 domainType = String.valueOf(sr.getDomainType());
1693 domainNumReg = String.valueOf(sr.getNumRegions());
1694 }
1695 }
1696
1697 Double allRsa = getAllRsaFromResSerial(resser);
1698 Double scRsa = getScRsaFromResSerial(resser);
1699
1700 Double consurfhsspScore = getConsurfhsspScoreFromResSerial(resser);
1701 Integer consurfhsspColor = getConsurfhsspColorFromResSerial(resser);
1702
1703 String ecId = "\\N";
1704 if (ec != null) {
1705 if (ec.getECRegion(resser) != null) {
1706 ecId = ec.getECNum(resser);
1707 }
1708 }
1709
1710 String csaNums = "\\N";
1711 String csaChemFuncs = "\\N";
1712 String csaEvids = "\\N";
1713 if (catalSiteSet != null) {
1714 if (catalSiteSet.getCatalSite(resser) != null) {
1715 csaNums = catalSiteSet.getCatalSiteNum(resser);
1716 csaChemFuncs = catalSiteSet.getCatalSiteChemFunc(resser);
1717 csaEvids = catalSiteSet.getCatalSiteEvid(resser);
1718 }
1719 }
1720
1721 resOut.println(pdbCode+"\t"+chainCode+"\t"+(pdbChainCode.equals("NULL")?"-":pdbChainCode)+"\t"+resser+"\t"+pdbresser+"\t"+resType+"\t"+secStructType+"\t"+secStructId+"\t"+scopId+"\t"+sccs+"\t"+sunid+"\t"+orderIn+"\t"+domainType+"\t"+domainNumReg+"\t"+allRsa+"\t"+scRsa+"\t"+consurfhsspScore+"\t"+consurfhsspColor+"\t"+ecId+"\t"+csaNums+"\t"+csaChemFuncs+"\t"+csaEvids);
1722
1723 }
1724 resOut.close();
1725 sql = "LOAD DATA LOCAL INFILE '"+pdbCode+chainCode+"_residues.txt' INTO TABLE "+db+".pdb_residue_info (pdb_code, chain_code, pdb_chain_code, res_ser, pdb_res_ser, res_type, sstype, ssid, scop_id, sccs, sunid, order_in, domain_type, domain_num_reg, all_rsa, sc_rsa, consurfhssp_score, consurfhssp_color, ec, csa_site_nums, csa_chem_funcs, csa_evid);";
1726 //System.out.println(sql);
1727 stmt = conn.createStatement();
1728 stmt.executeUpdate(sql);
1729 stmt.close();
1730 File fileToDelete = new File(pdbCode+chainCode+"_residues.txt");
1731 if (fileToDelete.exists()) {
1732 fileToDelete.delete();
1733 }
1734 }
1735
1736 private static String quote(String s) {
1737 return ("'"+s+"'");
1738 }
1739
1740 /**
1741 * Restricts thisPdb object to residues that belong to the given sunid
1742 * Can only be used after calling checkScop()
1743 * @param sunid
1744 */
1745 public void restrictToScopDomain (int sunid) {
1746 Vector<ScopRegion> scopRegions = this.scop.getScopRegions(sunid);
1747 if (scopRegions.size()!=0) {
1748 this.sid = scopRegions.get(0).getSId();
1749 if (scopRegions.get(0).getDomainType() != ScopRegion.DomainType.WHOLECHAIN) {
1750 restrictToScopRegions(scopRegions);
1751
1752 Iterator<ScopRegion> allScopRegions = this.scop.getIterator();
1753 while (allScopRegions.hasNext()) {
1754 ScopRegion scopRegion = allScopRegions.next();
1755 if (!scopRegion.getSId().equals(sid)) {
1756 allScopRegions.remove();
1757 }
1758 }
1759 }
1760 }
1761 }
1762
1763 /**
1764 * Restricts thisPdb object to residues that belong to the given sid
1765 * Can only be used after calling checkScop()
1766 * @param sid
1767 */
1768 public void restrictToScopDomain (String sid) {
1769
1770 Vector<ScopRegion> scopRegions = this.scop.getScopRegions(sid);
1771 if (scopRegions.size()!=0) {
1772 this.sid = sid;
1773 if (scopRegions.get(0).getDomainType() != ScopRegion.DomainType.WHOLECHAIN) {
1774 restrictToScopRegions(scopRegions);
1775
1776 Iterator<ScopRegion> allScopRegions = this.scop.getIterator();
1777 while (allScopRegions.hasNext()) {
1778 ScopRegion scopRegion = allScopRegions.next();
1779 if (!scopRegion.getSId().equals(sid)) {
1780 allScopRegions.remove();
1781 }
1782 }
1783 }
1784 }
1785 }
1786
1787 /**
1788 * Restricts this Pdb object to residues within the given ScopRegions
1789 * @param scopRegions
1790 */
1791 private void restrictToScopRegions (Vector<ScopRegion> scopRegions) {
1792 IntervalSet intervSet = new IntervalSet();
1793 Iterator<ScopRegion> it = scopRegions.iterator();
1794 while(it.hasNext()) {
1795 ScopRegion scopRegion = it.next();
1796 intervSet.add(scopRegion.getInterval());
1797 }
1798 restrictToIntervalSet(intervSet);
1799 }
1800
1801 /**
1802 * Restricts this Pdb object to residues within the given IntervalSet
1803 * @param intervSet a set of internal residue serials
1804 */
1805 private void restrictToIntervalSet(IntervalSet intervSet) {
1806
1807 // getting list of the residue serials to keep
1808 TreeSet<Integer> resSersToKeep = intervSet.getIntegerSet();
1809
1810 // removing residues from resser2restype and resser2pdbresser
1811 Iterator<Integer> itressers = resser2restype.keySet().iterator();
1812 while (itressers.hasNext()) {
1813 int resSer = itressers.next();
1814 if (!resSersToKeep.contains(resSer)) {
1815 itressers.remove();
1816 resser2pdbresser.remove(resSer);
1817 if (resser2allrsa != null) {
1818 resser2allrsa.remove(resSer);
1819 resser2scrsa.remove(resSer);
1820 }
1821 if (resser2consurfhsspscore != null) {
1822 resser2consurfhsspscore.remove(resSer);
1823 resser2consurfhsspcolor.remove(resSer);
1824 }
1825 if (catalSiteSet != null) {
1826 catalSiteSet.removeCatalSiteRes(resSer);
1827 }
1828 }
1829 }
1830
1831 // removing residues from pdbresser2resser
1832 Iterator<String> pdbressers = pdbresser2resser.keySet().iterator();
1833 while (pdbressers.hasNext()) {
1834 String pdbresser = pdbressers.next();
1835 int resSer = pdbresser2resser.get(pdbresser);
1836 if (!resSersToKeep.contains(resSer)) {
1837 pdbressers.remove();
1838 }
1839 }
1840
1841 // removing residues(atoms) from resser_atom2atomserial
1842 for (int atomSer:atomser2resser.keySet()) {
1843 String atom = atomser2atom.get(atomSer);
1844 int resSer = atomser2resser.get(atomSer);
1845 String resser_atom = resSer+"_"+atom;
1846 if (!resSersToKeep.contains(resSer)) {
1847 resser_atom2atomserial.remove(resser_atom);
1848 }
1849 }
1850
1851 // removing atoms belonging to unwanted residues from atomser2resser, atomser2atom and atomser2coord
1852 Iterator<Integer> itatomsers = atomser2resser.keySet().iterator();
1853 while (itatomsers.hasNext()) {
1854 int atomSer = itatomsers.next();
1855 if (!resSersToKeep.contains(atomser2resser.get(atomSer))) {
1856 itatomsers.remove();
1857 atomser2atom.remove(atomSer);
1858 atomser2coord.remove(atomSer);
1859 }
1860 }
1861
1862 // setting sequence to scop sequence and obsLength and fullLength respectively
1863 Iterator<Interval> regionsToKeep = intervSet.iterator();
1864 String newSequence = "";
1865 while (regionsToKeep.hasNext()) {
1866 Interval region = regionsToKeep.next();
1867 newSequence += sequence.substring((region.beg-1),region.end);
1868 }
1869 sequence = newSequence;
1870 fullLength = sequence.length();
1871 obsLength = resser2restype.size();
1872
1873 }
1874 }