1 |
package proteinstructure; |
2 |
|
3 |
import java.io.*; |
4 |
import java.net.*; |
5 |
import java.util.Collections; |
6 |
import java.util.HashMap; |
7 |
import java.util.Iterator; |
8 |
import java.util.Locale; |
9 |
import java.util.Map; |
10 |
import java.util.Set; |
11 |
import java.util.TreeMap; |
12 |
import java.util.ArrayList; |
13 |
import java.util.TreeSet; |
14 |
import java.util.Vector; |
15 |
import java.util.regex.*; |
16 |
import java.util.zip.GZIPInputStream; |
17 |
import java.util.Arrays; |
18 |
|
19 |
import javax.vecmath.Point3d; |
20 |
import javax.vecmath.Point3i; |
21 |
import javax.vecmath.Vector3d; |
22 |
|
23 |
import edu.uci.ics.jung.graph.util.EdgeType; |
24 |
import edu.uci.ics.jung.graph.util.Pair; |
25 |
|
26 |
import Jama.Matrix; |
27 |
import Jama.SingularValueDecomposition; |
28 |
|
29 |
import tools.MySQLConnection; |
30 |
import java.sql.SQLException; |
31 |
import java.sql.Statement; |
32 |
|
33 |
/** |
34 |
* A single chain pdb protein structure |
35 |
* |
36 |
*/ |
37 |
public abstract class Pdb { |
38 |
|
39 |
protected static final int DEFAULT_MODEL=1; // default model serial (NMR structures) |
40 |
public static final String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence |
41 |
public static final String NULL_CHAIN_CODE = "NULL"; // to specify the NULL (blank in pdb file) chain code |
42 |
public static final String NO_PDB_CODE = ""; // to specify no pdb code |
43 |
public static final String NO_PDB_CHAIN_CODE = ""; // to specify no pdb chain code |
44 |
public static final String NO_CHAIN_CODE = ""; // to specify no internal chain code |
45 |
|
46 |
protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials |
47 |
protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type |
48 |
protected HashMap<Integer,Point3d> atomser2coord; // atom serials to 3D coordinates |
49 |
protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials |
50 |
protected HashMap<Integer,String> atomser2atom; // atom serials to atom names |
51 |
protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials |
52 |
protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings) |
53 |
|
54 |
protected SecondaryStructure secondaryStructure; // the secondary structure annotation for this pdb object (should never be null) |
55 |
protected Scop scop; // the scop annotation for this pdb object |
56 |
protected HashMap<Integer,Double> resser2allrsa; // internal residue serials to all-atoms rsa |
57 |
protected HashMap<Integer,Double> resser2scrsa; // internal residue serials to SC rsa |
58 |
protected HashMap<Integer,Double> resser2consurfhsspscore; // internal residue serials to SC rsa |
59 |
protected HashMap<Integer,Integer> resser2consurfhsspcolor; // internal residue serials to SC rsa |
60 |
protected EC ec; // the ec annotation for this pdb object |
61 |
protected CatalSiteSet catalSiteSet; // the catalytic site annotation for this pdb object |
62 |
|
63 |
protected String sequence; // full sequence as it appears in SEQRES field |
64 |
protected String pdbCode; |
65 |
// given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file) |
66 |
protected String pdbChainCode; |
67 |
// Our internal chain identifier: |
68 |
// - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd) |
69 |
// - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A" |
70 |
protected String chainCode; |
71 |
protected int model; // the model serial for NMR structures |
72 |
protected String sid; // the scop id if Pdb has been restricted (restrictToScopDomain) |
73 |
|
74 |
// in case we read from pdb file, 2 possibilities: |
75 |
// 1) SEQRES field is present: fullLength coincides with sequence length |
76 |
// 2) SEQRES not present: fullLength is maximum observed residue (so if residue numbering is correct that only misses possible non-observed residues at end of chain) |
77 |
protected int fullLength; // length of full sequence as it appears in SEQRES field |
78 |
protected int obsLength; // length without unobserved, non standard aas |
79 |
|
80 |
protected boolean dataLoaded; // true if this object has been loaded with pdb data, false when is empty |
81 |
|
82 |
protected String db; // the db from which we have taken the data (if applies) |
83 |
protected MySQLConnection conn; |
84 |
|
85 |
/*--------------------------------- abstract methods --------------------------------------------*/ |
86 |
/** |
87 |
* Returns all pdb chain codes for this Pdb entry (be it a file or an entry in a database) |
88 |
* @return pdb chain codes |
89 |
*/ |
90 |
public abstract String[] getChains() throws PdbLoadError; |
91 |
|
92 |
/** |
93 |
* Returns all models for this Pdb entry (be it a file or an entry in a database) |
94 |
* @return |
95 |
*/ |
96 |
public abstract Integer[] getModels() throws PdbLoadError; |
97 |
|
98 |
/** |
99 |
* Loads pdb data (coordinates, sequence, etc.) from the source (file or database) |
100 |
* for given pdbChainCode and modelSerial |
101 |
* @param pdbChainCode |
102 |
* @param modelSerial |
103 |
* @throws PdbLoadError |
104 |
*/ |
105 |
public abstract void load(String pdbChainCode, int modelSerial) throws PdbLoadError; |
106 |
|
107 |
|
108 |
/*--------------------------------- public methods ----------------------------------------------*/ |
109 |
|
110 |
/** |
111 |
* Loads pdb data (coordinates, sequence, etc.) from the source (file or database) |
112 |
* for given pdbChainCode and default model serial (1) |
113 |
* @param pdbChainCode |
114 |
* @throws PdbLoadError */ |
115 |
|
116 |
public void load(String pdbChainCode) throws PdbLoadError { |
117 |
load(pdbChainCode, DEFAULT_MODEL); |
118 |
} |
119 |
|
120 |
/** |
121 |
* Returns true if this Pdb has been loaded with pdb data (i.e. when |
122 |
* load(pdbChainCode) has been called), false if it is empty |
123 |
*/ |
124 |
public boolean isDataLoaded() { |
125 |
return dataLoaded; |
126 |
} |
127 |
|
128 |
public int checkCSA(String version, boolean online) throws IOException { |
129 |
BufferedReader in; |
130 |
String inputLine; |
131 |
|
132 |
if (online) { |
133 |
URL csa = new URL("http://www.ebi.ac.uk/thornton-srv/databases/CSA/archive/CSA_"+version.replaceAll("\\.","_")+".dat.gz"); |
134 |
URLConnection conn = csa.openConnection(); |
135 |
InputStream inURL = conn.getInputStream(); |
136 |
OutputStream out = new BufferedOutputStream(new FileOutputStream("CSA_"+version.replaceAll("\\.","_")+".dat.gz")); |
137 |
byte[] buffer = new byte[1024]; |
138 |
int numRead; |
139 |
long numWritten = 0; |
140 |
while ((numRead = inURL.read(buffer)) != -1) { |
141 |
out.write(buffer, 0, numRead); |
142 |
numWritten += numRead; |
143 |
} |
144 |
inURL.close(); |
145 |
out.close(); |
146 |
in = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream("CSA_"+version.replaceAll(".","_")+".dat.gz")))); |
147 |
} else { |
148 |
File csaFile = new File("/project/StruPPi/Databases/CSA/CSA_"+version.replaceAll("\\.","_")+".dat"); |
149 |
in = new BufferedReader(new FileReader(csaFile)); |
150 |
} |
151 |
|
152 |
int csaMistakes = 0; |
153 |
this.catalSiteSet = new CatalSiteSet(); |
154 |
String curPdbCode, curPdbChainCode, curPdbResSerial, curResType; |
155 |
String prevPdbCode = ""; |
156 |
int curSiteId; |
157 |
int prevSiteId = -1; |
158 |
CatalyticSite cs = null; |
159 |
while ((inputLine = in.readLine()) != null) { |
160 |
String[] fields = inputLine.split(","); |
161 |
curPdbCode = fields[0]; |
162 |
curPdbChainCode = (fields[3].equals(""))?"NULL":fields[3]; |
163 |
if (curPdbCode.equals(pdbCode)) { |
164 |
if (curPdbChainCode.equals(pdbChainCode)) { |
165 |
curPdbResSerial = fields[4]; |
166 |
curResType = fields[2]; |
167 |
curSiteId = Integer.valueOf(fields[1]); |
168 |
// only if the pdb residue type is a standard AA |
169 |
if (AAinfo.isValidAA(curResType)) { |
170 |
// only if the pdb residue type agrees with our residue type |
171 |
if (getResTypeFromResSerial(get_resser_from_pdbresser(curPdbResSerial)).equals(curResType)) { |
172 |
// each time the site changes except for the first site of a chain, |
173 |
// add the site to the set |
174 |
if ((curSiteId != prevSiteId) & (prevSiteId != -1)) { |
175 |
catalSiteSet.add(cs); |
176 |
} |
177 |
// each time the site changes or it is the first site of a chain, |
178 |
// create a new site |
179 |
if ((curSiteId != prevSiteId) | (prevSiteId == -1)) { |
180 |
String littEntryPdbCode = fields[7].substring(0,4); |
181 |
String littEntryPdbChainCode = fields[7].substring(4).equals("")?"NULL":fields[7].substring(4); |
182 |
cs = new CatalyticSite(curSiteId, fields[6], littEntryPdbCode, littEntryPdbChainCode); |
183 |
} |
184 |
// add the res to the site |
185 |
cs.addRes(get_resser_from_pdbresser(curPdbResSerial), fields[5]); |
186 |
prevSiteId = curSiteId; |
187 |
} else { |
188 |
csaMistakes++; |
189 |
} |
190 |
} |
191 |
} |
192 |
} else if (prevPdbCode.equals(pdbCode)) { |
193 |
if (prevSiteId != -1) { |
194 |
catalSiteSet.add(cs); |
195 |
} |
196 |
break; |
197 |
} |
198 |
prevPdbCode = curPdbCode; |
199 |
} |
200 |
|
201 |
in.close(); |
202 |
if (online) { |
203 |
File csaFile = new File("CSA_"+version.replaceAll(".","_")+".dat.gz"); |
204 |
csaFile.delete(); |
205 |
} |
206 |
|
207 |
if ((csaMistakes > 0) | (prevSiteId == -1)) { |
208 |
this.catalSiteSet = null; |
209 |
} |
210 |
|
211 |
return csaMistakes; |
212 |
} |
213 |
|
214 |
public void checkEC(boolean online) throws IOException { |
215 |
|
216 |
this.ec = new EC(); |
217 |
ECRegion er = null; |
218 |
String startPdbResSer = "", endPdbResSer = ""; |
219 |
BufferedReader in; |
220 |
String inputLine; |
221 |
Pattern p = Pattern.compile("^ \\d"); |
222 |
|
223 |
if (online) { |
224 |
URL pdb2ecMapping = new URL("http://www.bioinf.org.uk/pdbsprotec/mapping.txt"); |
225 |
URLConnection p2e= pdb2ecMapping.openConnection(); |
226 |
in = new BufferedReader(new InputStreamReader(p2e.getInputStream())); |
227 |
} else { |
228 |
File pdb2ecMapping = new File("/project/StruPPi/Databases/PDBSProtEC/mapping.txt"); |
229 |
in = new BufferedReader(new FileReader(pdb2ecMapping)); |
230 |
} |
231 |
|
232 |
while ((inputLine = in.readLine()) != null) { |
233 |
Matcher m = p.matcher(inputLine); |
234 |
if (m.find()) { |
235 |
String curPdbCode = inputLine.substring(0,9).trim(); |
236 |
String curPdbChainCode = (inputLine.charAt(11) == ' ')?"NULL":String.valueOf(inputLine.charAt(11)); |
237 |
if (curPdbCode.equals(pdbCode) && curPdbChainCode.equals(pdbChainCode)) { |
238 |
startPdbResSer = inputLine.substring(20,26).trim(); |
239 |
endPdbResSer = inputLine.substring(27,33).trim(); |
240 |
String id = inputLine.substring(43).trim(); |
241 |
//System.out.println(curPdbCode+":"+curPdbChainCode+":"+startPdbResSer+"-"+endPdbResSer+":"+ec); |
242 |
er = new ECRegion(id, startPdbResSer, endPdbResSer, get_resser_from_pdbresser(startPdbResSer), get_resser_from_pdbresser(endPdbResSer)); |
243 |
ec.add(er); |
244 |
} |
245 |
} |
246 |
} |
247 |
|
248 |
in.close(); |
249 |
|
250 |
} |
251 |
|
252 |
public int checkConsurfHssp(boolean online) throws IOException { |
253 |
|
254 |
BufferedReader in; |
255 |
if (online) { |
256 |
URL consurfhssp = new URL("http://consurf.tau.ac.il/results/HSSP_ML_"+pdbCode+(pdbChainCode.equals("NULL")?"_":pdbChainCode)+"/pdb"+pdbCode+".gradesPE"); |
257 |
URLConnection ch = consurfhssp.openConnection(); |
258 |
in = new BufferedReader(new InputStreamReader(ch.getInputStream())); |
259 |
} else { |
260 |
File consurfhssp = new File("/project/StruPPi/Databases/ConSurf-HSSP/ConservationGrades/"+pdbCode+(pdbChainCode.equals("NULL")?"_":pdbChainCode)+".grades"); |
261 |
in = new BufferedReader(new FileReader(consurfhssp)); |
262 |
} |
263 |
String inputLine; |
264 |
Pattern p = Pattern.compile("^\\s+\\d+"); |
265 |
int lineCount = 0; |
266 |
int consurfHsspMistakes = 0; |
267 |
|
268 |
Integer[] ressers = new Integer[resser2restype.size()]; |
269 |
resser2restype.keySet().toArray(ressers); |
270 |
Arrays.sort(ressers); |
271 |
|
272 |
resser2consurfhsspscore = new HashMap<Integer,Double>(); |
273 |
resser2consurfhsspcolor = new HashMap<Integer,Integer>(); |
274 |
|
275 |
while ((inputLine = in.readLine()) != null) { |
276 |
Matcher m = p.matcher(inputLine); |
277 |
if (m.find()) { |
278 |
lineCount++; |
279 |
int resser = ressers[lineCount-1]; |
280 |
String[] fields = inputLine.split("\\s+"); |
281 |
String pdbresser = fields[3].equals("-")?"-":fields[3].substring(3, fields[3].indexOf(':')); |
282 |
if (fields[2].equals(AAinfo.threeletter2oneletter(getResTypeFromResSerial(resser))) & |
283 |
(pdbresser.equals("-") | pdbresser.equals(get_pdbresser_from_resser(resser)))) { |
284 |
resser2consurfhsspscore.put(resser, Double.valueOf(fields[4])); |
285 |
resser2consurfhsspcolor.put(resser, Integer.valueOf(fields[5])); |
286 |
} else { |
287 |
consurfHsspMistakes++; |
288 |
} |
289 |
} |
290 |
} |
291 |
in.close(); |
292 |
|
293 |
consurfHsspMistakes += Math.abs(ressers.length - resser2consurfhsspscore.size()); |
294 |
if (consurfHsspMistakes > 0) { |
295 |
resser2consurfhsspscore.clear(); |
296 |
resser2consurfhsspcolor.clear(); |
297 |
} |
298 |
|
299 |
return consurfHsspMistakes; |
300 |
|
301 |
} |
302 |
|
303 |
public void runNaccess(String naccessExecutable, String naccessParameters) throws Exception { |
304 |
String pdbFileName = pdbCode+chainCode+".pdb"; |
305 |
dump2pdbfile(pdbFileName, true); |
306 |
String line; |
307 |
int errorLineCount = 0; |
308 |
|
309 |
File test = new File(naccessExecutable); |
310 |
if(!test.canRead()) throw new IOException("Naccess Executable is not readable"); |
311 |
|
312 |
Process myNaccess = Runtime.getRuntime().exec(naccessExecutable + " " + pdbFileName + " " + naccessParameters); |
313 |
BufferedReader naccessOutput = new BufferedReader(new InputStreamReader(myNaccess.getInputStream())); |
314 |
BufferedReader naccessError = new BufferedReader(new InputStreamReader(myNaccess.getErrorStream())); |
315 |
while((line = naccessOutput.readLine()) != null) { |
316 |
} |
317 |
while((line = naccessError.readLine()) != null) { |
318 |
errorLineCount++; |
319 |
} |
320 |
naccessOutput.close(); |
321 |
naccessError.close(); |
322 |
int exitVal = myNaccess.waitFor(); |
323 |
if ((exitVal == 1) || (errorLineCount > 0)) { |
324 |
throw new Exception("Naccess:Wrong arguments or pdb file format!"); |
325 |
} |
326 |
|
327 |
File rsa = new File(pdbCode+chainCode+".rsa"); |
328 |
if (rsa.exists()) { |
329 |
resser2allrsa = new HashMap<Integer,Double>(); |
330 |
resser2scrsa = new HashMap<Integer,Double>(); |
331 |
BufferedReader rsaInput = new BufferedReader(new FileReader(rsa)); |
332 |
while ((line = rsaInput.readLine()) != null) { |
333 |
if (line.startsWith("RES")) { |
334 |
int resser = Integer.valueOf(line.substring(9,13).trim()); |
335 |
double allrsa = Double.valueOf(line.substring(22,28).trim()); |
336 |
double scrsa = Double.valueOf(line.substring(35,41).trim()); |
337 |
resser2allrsa.put(resser, allrsa); |
338 |
resser2scrsa.put(resser, scrsa); |
339 |
} |
340 |
} |
341 |
rsaInput.close(); |
342 |
} |
343 |
String[] filesToDelete = { ".pdb", ".asa", ".rsa", ".log" }; |
344 |
for (int i=0; i < filesToDelete.length; i++) { |
345 |
File fileToDelete = new File(pdbCode+chainCode+filesToDelete[i]); |
346 |
if (fileToDelete.exists()) { |
347 |
fileToDelete.delete(); |
348 |
} |
349 |
} |
350 |
|
351 |
} |
352 |
|
353 |
public void checkScop(String version, boolean online) throws IOException { |
354 |
this.scop = new Scop(); |
355 |
ScopRegion sr = null; |
356 |
String startPdbResSer = "", endPdbResSer = ""; |
357 |
BufferedReader in; |
358 |
String inputLine; |
359 |
|
360 |
if (online) { |
361 |
URL scop_cla = new URL("http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_"+version); |
362 |
URLConnection sc = scop_cla.openConnection(); |
363 |
in = new BufferedReader(new InputStreamReader(sc.getInputStream())); |
364 |
} else { |
365 |
File scop_cla = new File("/project/StruPPi/Databases/SCOP/dir.cla.scop.txt_"+version); |
366 |
in = new BufferedReader(new FileReader(scop_cla)); |
367 |
} |
368 |
|
369 |
while ((inputLine = in.readLine()) != null) |
370 |
if (inputLine.startsWith(pdbCode,1)) { |
371 |
String[] fields = inputLine.split("\\s+"); |
372 |
String[] regions = fields[2].split(","); |
373 |
for (int j=0; j < regions.length; j++) { |
374 |
Pattern p = Pattern.compile("^(-)|([a-zA-Z\\d]):(-?\\d+[a-zA-Z]*)-(-?\\d+[a-zA-Z]*)|(-?\\d+[a-zA-Z]*)-(-?\\d+[a-zA-Z]*)|([a-zA-Z\\d]):"); |
375 |
Matcher m = p.matcher(regions[j]); |
376 |
if (m.find()) { |
377 |
if (((pdbChainCode.equals("NULL") && ((m.group(1) != null && m.group(1).equals("-")) || m.group(5) != null))) || |
378 |
(m.group(2) != null && m.group(2).equals(pdbChainCode)) || |
379 |
(m.group(7) != null && m.group(7).equals(pdbChainCode))) { |
380 |
if (m.group(3) != null) { |
381 |
startPdbResSer = m.group(3); |
382 |
endPdbResSer = m.group(4); |
383 |
} else if (m.group(5) != null) { |
384 |
startPdbResSer = m.group(5); |
385 |
endPdbResSer = m.group(6); |
386 |
} else { |
387 |
startPdbResSer = get_pdbresser_from_resser(Collections.min(resser2pdbresser.keySet())); |
388 |
endPdbResSer = get_pdbresser_from_resser(Collections.max(resser2pdbresser.keySet())); |
389 |
} |
390 |
sr = new ScopRegion(fields[0], fields[3], Integer.parseInt(fields[4]), j, regions.length, startPdbResSer, endPdbResSer, get_resser_from_pdbresser(startPdbResSer), get_resser_from_pdbresser(endPdbResSer)); |
391 |
scop.add(sr); |
392 |
} |
393 |
} |
394 |
} |
395 |
//break; |
396 |
} |
397 |
in.close(); |
398 |
|
399 |
scop.setVersion(version); |
400 |
} |
401 |
|
402 |
public void runDssp(String dsspExecutable, String dsspParameters) throws IOException { |
403 |
runDssp(dsspExecutable, dsspParameters, SecStrucElement.ReducedState.FOURSTATE, SecStrucElement.ReducedState.FOURSTATE); |
404 |
} |
405 |
|
406 |
/** |
407 |
* Runs an external DSSP executable and (re)assigns the secondary structure annotation from the parsed output. |
408 |
* Existing secondary structure information will be overwritten. |
409 |
* As of September 2007, a DSSP executable can be downloaded from http://swift.cmbi.ru.nl/gv/dssp/ |
410 |
* after filling out a license agreement. For this version, the dsspParamters variable should be |
411 |
* set to "--" (two hyphens). |
412 |
*/ |
413 |
public void runDssp(String dsspExecutable, String dsspParameters, SecStrucElement.ReducedState state4Type, SecStrucElement.ReducedState state4Id) throws IOException { |
414 |
String startLine = " # RESIDUE AA STRUCTURE BP1 BP2 ACC"; |
415 |
String line; |
416 |
int lineCount = 0; |
417 |
char ssType, sheetLabel; |
418 |
TreeMap<Integer, Character> ssTypes; |
419 |
TreeMap<Integer, Character> sheetLabels; |
420 |
int resNum; |
421 |
String resNumStr; |
422 |
File test = new File(dsspExecutable); |
423 |
if(!test.canRead()) throw new IOException("DSSP Executable is not readable"); |
424 |
Process myDssp = Runtime.getRuntime().exec(dsspExecutable + " " + dsspParameters); |
425 |
PrintWriter dsspInput = new PrintWriter(myDssp.getOutputStream()); |
426 |
BufferedReader dsspOutput = new BufferedReader(new InputStreamReader(myDssp.getInputStream())); |
427 |
BufferedReader dsspError = new BufferedReader(new InputStreamReader(myDssp.getErrorStream())); |
428 |
writeAtomLines(dsspInput, true); // pipe atom lines to dssp |
429 |
dsspInput.close(); |
430 |
ssTypes = new TreeMap<Integer,Character>(); |
431 |
sheetLabels = new TreeMap<Integer,Character>(); |
432 |
while((line = dsspOutput.readLine()) != null) { |
433 |
lineCount++; |
434 |
if(line.startsWith(startLine)) { |
435 |
//System.out.println("Dssp Output: "); |
436 |
break; |
437 |
} |
438 |
} |
439 |
while((line = dsspOutput.readLine()) != null) { |
440 |
lineCount++; |
441 |
resNumStr = line.substring(5,10).trim(); |
442 |
ssType = line.charAt(16); |
443 |
sheetLabel = line.charAt(33); |
444 |
if (state4Id == SecStrucElement.ReducedState.FOURSTATE && SecStrucElement.getReducedStateTypeFromDsspType(ssType, state4Id) == SecStrucElement.OTHER) { |
445 |
sheetLabel = ' '; |
446 |
} |
447 |
if(!resNumStr.equals("")) { // this should only happen if dssp inserts a line indicating a chain break |
448 |
try { |
449 |
resNum = Integer.valueOf(resNumStr); |
450 |
ssTypes.put(resNum, ssType); |
451 |
sheetLabels.put(resNum, sheetLabel); |
452 |
} catch (NumberFormatException e) { |
453 |
System.err.println("Error while parsing DSSP output. Expected residue number, found '" + resNumStr + "' in line " + lineCount); |
454 |
} |
455 |
} |
456 |
} |
457 |
//for(char c:ssTypes) {System.out.print(c);}; System.out.println("."); |
458 |
dsspOutput.close(); |
459 |
dsspError.close(); |
460 |
|
461 |
if(ssTypes.size() == 0) { |
462 |
throw new IOException("No DSSP output found."); |
463 |
} |
464 |
|
465 |
if(ssTypes.size() != get_length()) { // compare with number of observed residues |
466 |
System.err.println("Error: DSSP output size (" + ssTypes.size() + ") does not match number of observed residues in structure (" + get_length() + ")."); |
467 |
} |
468 |
|
469 |
// assign secondary structure |
470 |
this.secondaryStructure = new SecondaryStructure(); // forget the old annotation |
471 |
char lastType = SecStrucElement.getReducedStateTypeFromDsspType(ssTypes.get(ssTypes.firstKey()), state4Id); |
472 |
int lastResSer = ssTypes.firstKey(); |
473 |
char lastSheet = sheetLabels.get(lastResSer); |
474 |
char thisType, thisSheet, reducedType; |
475 |
int start = 1; |
476 |
int elementCount = 0; |
477 |
SecStrucElement ssElem; |
478 |
String ssId; |
479 |
for(int resSer:ssTypes.keySet()) { |
480 |
thisType = SecStrucElement.getReducedStateTypeFromDsspType(ssTypes.get(resSer), state4Id); |
481 |
thisSheet = sheetLabels.get(resSer); |
482 |
if(thisType != lastType || thisSheet != lastSheet || resSer > lastResSer+1) { |
483 |
// finish previous element, start new one |
484 |
elementCount++; |
485 |
reducedType = SecStrucElement.getReducedStateTypeFromDsspType(ssTypes.get(lastResSer), state4Type); |
486 |
ssId = new Character(lastType).toString() + (lastSheet==' '?"":String.valueOf(lastSheet)) + new Integer(elementCount).toString(); |
487 |
ssElem = new SecStrucElement(reducedType,start,lastResSer,ssId); |
488 |
secondaryStructure.add(ssElem); |
489 |
start = resSer; |
490 |
lastType = thisType; |
491 |
lastSheet = thisSheet; |
492 |
} |
493 |
lastResSer = resSer; |
494 |
} |
495 |
// finish last element |
496 |
elementCount++; |
497 |
reducedType = SecStrucElement.getReducedStateTypeFromDsspType(ssTypes.get(ssTypes.lastKey()), state4Type); |
498 |
ssId = new Character(lastType).toString() + (lastSheet==' '?"":String.valueOf(lastSheet)) + new Integer(elementCount).toString(); |
499 |
ssElem = new SecStrucElement(reducedType, start,ssTypes.lastKey(),ssId); |
500 |
secondaryStructure.add(ssElem); |
501 |
|
502 |
secondaryStructure.setComment("DSSP"); |
503 |
} |
504 |
|
505 |
/** Writes atom lines for this structure to the given output stream */ |
506 |
private void writeAtomLines(PrintWriter Out, boolean pdbCompatible) { |
507 |
TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>(); |
508 |
Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'"); |
509 |
String chainCodeStr = chainCode; |
510 |
if (pdbCompatible) { |
511 |
chainCodeStr = chainCode.substring(0,1); |
512 |
} |
513 |
for (String resser_atom:resser_atom2atomserial.keySet()){ |
514 |
int atomserial = resser_atom2atomserial.get(resser_atom); |
515 |
int res_serial = Integer.parseInt(resser_atom.split("_")[0]); |
516 |
String atom = resser_atom.split("_")[1]; |
517 |
String res_type = resser2restype.get(res_serial); |
518 |
Point3d coords = atomser2coord.get(atomserial); |
519 |
String atomType = atom.substring(0,1); |
520 |
Object[] fields = {atomserial, atom, res_type, chainCodeStr, res_serial, coords.x, coords.y, coords.z, atomType}; |
521 |
lines.put(atomserial, fields); |
522 |
} |
523 |
for (int atomserial:lines.keySet()){ |
524 |
// Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales) |
525 |
Out.printf(Locale.US,"ATOM %5d %-3s %3s %1s%4d %8.3f%8.3f%8.3f 1.00 0.00 %s\n",lines.get(atomserial)); |
526 |
} |
527 |
Out.println("END"); |
528 |
Out.close(); |
529 |
} |
530 |
|
531 |
/** |
532 |
* Dumps coordinate data into a file in pdb format (ATOM lines only) |
533 |
* The residue serials dumped are the internal ones. |
534 |
* The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects |
535 |
* @param outfile |
536 |
* @throws IOException |
537 |
*/ |
538 |
public void dump2pdbfile(String outfile) throws IOException { |
539 |
dump2pdbfile(outfile, false); |
540 |
} |
541 |
|
542 |
public void dump2pdbfile(String outfile, boolean pdbCompatible) throws IOException { |
543 |
PrintWriter Out = new PrintWriter(new FileOutputStream(outfile)); |
544 |
writeAtomLines(Out, pdbCompatible); |
545 |
} |
546 |
|
547 |
/** |
548 |
* Dump the full sequence of this Pdb object in fasta file format |
549 |
* @param seqfile |
550 |
* @throws IOException |
551 |
*/ |
552 |
public void dumpseq(String seqfile) throws IOException { |
553 |
PrintStream Out = new PrintStream(new FileOutputStream(seqfile)); |
554 |
Out.println(">"+pdbCode+"_"+pdbChainCode); |
555 |
Out.println(sequence); |
556 |
Out.close(); |
557 |
} |
558 |
|
559 |
/** |
560 |
* Returns the number of observed standard residues. |
561 |
* TODO: Refactor method name |
562 |
*/ |
563 |
public int get_length(){ |
564 |
return obsLength; |
565 |
} |
566 |
|
567 |
/** Returns the number of residues in the sequence of this protein. */ |
568 |
public int getFullLength() { |
569 |
return fullLength; |
570 |
} |
571 |
|
572 |
/** |
573 |
* Returns number of (non-Hydrogen) atoms in the protein |
574 |
* @return |
575 |
*/ |
576 |
public int getNumAtoms() { |
577 |
return atomser2atom.size(); |
578 |
} |
579 |
|
580 |
/** |
581 |
* Gets a TreeMap with atom serials as keys and their coordinates as values for the given contact type |
582 |
* The contact type can't be a cross contact type, it doesn't make sense here |
583 |
* @param ct |
584 |
* @return |
585 |
*/ |
586 |
private TreeMap<Integer,Point3d> get_coords_for_ct(String ct) { |
587 |
TreeMap<Integer,Point3d> coords = new TreeMap<Integer,Point3d>(); |
588 |
for (int resser:resser2restype.keySet()){ |
589 |
Set<String> atoms = AAinfo.getAtomsForCTAndRes(ct, resser2restype.get(resser)); |
590 |
for (String atom:atoms){ |
591 |
if (resser_atom2atomserial.containsKey(resser+"_"+atom)){ |
592 |
int atomser = resser_atom2atomserial.get(resser+"_"+atom); |
593 |
Point3d coord = atomser2coord.get(atomser); |
594 |
coords.put(atomser, coord); |
595 |
} |
596 |
else { |
597 |
//System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser."); |
598 |
} |
599 |
} |
600 |
// in cts ("ALL","BB") we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue) |
601 |
if ((ct.equals("ALL") || ct.equals("BB")) && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){ |
602 |
int atomser = resser_atom2atomserial.get(resser+"_"+"OXT"); |
603 |
Point3d coord = atomser2coord.get(atomser); |
604 |
coords.put(atomser, coord); |
605 |
} |
606 |
} |
607 |
return coords; |
608 |
} |
609 |
|
610 |
/** |
611 |
* Gets a TreeMap with residue serials+"_"+atomname as keys and their coordinates as values for the given contact type |
612 |
* The contact type can't be a cross contact type, it doesn't make sense here |
613 |
* Used in rmsd method |
614 |
* @param ct |
615 |
* @return |
616 |
*/ |
617 |
private TreeMap<String,Point3d> get_coords_for_ct_4rmsd(String ct) { |
618 |
TreeMap<String,Point3d> coords = new TreeMap<String,Point3d>(); |
619 |
for (int resser:resser2restype.keySet()){ |
620 |
Set<String> atoms = AAinfo.getAtomsForCTAndRes(ct,resser2restype.get(resser)); |
621 |
for (String atom:atoms){ |
622 |
if (resser_atom2atomserial.containsKey(resser+"_"+atom)){ |
623 |
int atomser = resser_atom2atomserial.get(resser+"_"+atom); |
624 |
Point3d coord = atomser2coord.get(atomser); |
625 |
coords.put(resser+"_"+atom, coord); |
626 |
} |
627 |
} |
628 |
// in cts ("ALL","BB") we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue) |
629 |
if ((ct.equals("ALL") || ct.equals("BB")) && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){ |
630 |
int atomser = resser_atom2atomserial.get(resser+"_"+"OXT"); |
631 |
Point3d coord = atomser2coord.get(atomser); |
632 |
coords.put(resser+"_"+"OXT", coord); |
633 |
} |
634 |
} |
635 |
return coords; |
636 |
} |
637 |
|
638 |
/** |
639 |
* Returns the distance matrix as a HashMap with residue serial pairs as keys |
640 |
* For multi atom contact types the distance matrix has the minimum distance for each pair of |
641 |
* residues |
642 |
* AAinfo.isValidSingleAtomCT(ct) can be used to check before calling. |
643 |
* @param ct contact type for which distances are being calculated |
644 |
* @return a map which assigns to each edge the corresponding distance |
645 |
*/ |
646 |
public HashMap<Pair<Integer>, Double> calculate_dist_matrix(String ct){ |
647 |
HashMap<Pair<Integer>,Double> distMatrixAtoms = calculate_atom_dist_matrix(ct); |
648 |
|
649 |
// mapping atom serials to residue serials |
650 |
// TODO: we could integrate this with the code in calculate_atom_dist_matrix to avoid storing two distance maps in memory |
651 |
HashMap<Pair<Integer>,Double> distMatrixRes = new HashMap<Pair<Integer>, Double>(); |
652 |
for (Pair<Integer> cont: distMatrixAtoms.keySet()){ |
653 |
int i_resser = get_resser_from_atomser(cont.getFirst()); |
654 |
int j_resser = get_resser_from_atomser(cont.getSecond()); |
655 |
Pair<Integer> edge = new Pair<Integer>(i_resser,j_resser); |
656 |
if (distMatrixRes.containsKey(edge)) { |
657 |
distMatrixRes.put(edge, Math.min(distMatrixRes.get(edge), distMatrixAtoms.get(cont))); |
658 |
} else { |
659 |
distMatrixRes.put(edge, distMatrixAtoms.get(cont)); |
660 |
} |
661 |
} |
662 |
|
663 |
return distMatrixRes; |
664 |
} |
665 |
|
666 |
/** |
667 |
* Returns the distance matrix as a HashMap with atom serial pairs as keys |
668 |
* This method can be used for any contact type |
669 |
* AAinfo.isValidSingleAtomCT(ct) can be used to check before calling. |
670 |
* @param ct contact type for which distances are being calculated |
671 |
* @return a map which assings to each atom pair the corresponding distance |
672 |
*/ |
673 |
public HashMap<Pair<Integer>, Double> calculate_atom_dist_matrix(String ct){ |
674 |
HashMap<Pair<Integer>,Double> distMatrixAtoms = new HashMap<Pair<Integer>,Double>(); |
675 |
if (!ct.contains("/")){ |
676 |
TreeMap<Integer,Point3d> coords = get_coords_for_ct(ct); |
677 |
for (int i_atomser:coords.keySet()){ |
678 |
for (int j_atomser:coords.keySet()){ |
679 |
if (j_atomser>i_atomser) { |
680 |
Pair<Integer> pair = new Pair<Integer>(i_atomser,j_atomser); |
681 |
distMatrixAtoms.put(pair, coords.get(i_atomser).distance(coords.get(j_atomser))); |
682 |
} |
683 |
} |
684 |
} |
685 |
} else { |
686 |
String i_ct = ct.split("/")[0]; |
687 |
String j_ct = ct.split("/")[1]; |
688 |
TreeMap<Integer,Point3d> i_coords = get_coords_for_ct(i_ct); |
689 |
TreeMap<Integer,Point3d> j_coords = get_coords_for_ct(j_ct); |
690 |
for (int i_atomser:i_coords.keySet()){ |
691 |
for (int j_atomser:j_coords.keySet()){ |
692 |
if (j_atomser!=i_atomser){ |
693 |
Pair<Integer> pair = new Pair<Integer>(i_atomser,j_atomser); |
694 |
distMatrixAtoms.put(pair, i_coords.get(i_atomser).distance(j_coords.get(j_atomser))); |
695 |
} |
696 |
} |
697 |
} |
698 |
} |
699 |
|
700 |
return distMatrixAtoms; |
701 |
} |
702 |
|
703 |
/** |
704 |
* Calculates the radius of gyration of this Pdb |
705 |
* (defined as half of the maximum distance between any 2 CA atoms) |
706 |
* @return |
707 |
*/ |
708 |
public double calculateRadGyration() { |
709 |
//TODO this is a very raw implementation o(n^2): should optimise it if that's really needed |
710 |
return Collections.max(this.calculate_atom_dist_matrix("Ca").values())/2; |
711 |
} |
712 |
|
713 |
/** |
714 |
* Get the graph for given contact type and cutoff for this Pdb object. |
715 |
* Returns a Graph object with the contacts |
716 |
* We do geometric hashing for fast contact computation (without needing to calculate full distance matrix) |
717 |
* @param ct |
718 |
* @param cutoff |
719 |
* @return |
720 |
*/ |
721 |
private AIGraph getAIGraph(String ct, double cutoff){ |
722 |
TreeMap<Integer,Point3d> i_coords = null; |
723 |
TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types |
724 |
boolean crossed = false; |
725 |
if (!ct.contains("/")){ |
726 |
i_coords = get_coords_for_ct(ct); |
727 |
crossed = false; |
728 |
} else { |
729 |
String i_ct = ct.split("/")[0]; |
730 |
String j_ct = ct.split("/")[1]; |
731 |
i_coords = get_coords_for_ct(i_ct); |
732 |
j_coords = get_coords_for_ct(j_ct); |
733 |
crossed = true; |
734 |
} |
735 |
int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials |
736 |
int[] j_atomserials = null; |
737 |
|
738 |
int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A) |
739 |
|
740 |
int boxSize = (int) Math.floor(cutoff*SCALE); |
741 |
|
742 |
HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>(); |
743 |
int i=0; |
744 |
for (int i_atomser:i_coords.keySet()){ |
745 |
//coordinates for atom serial atomser, we will use i as its identifier below |
746 |
Point3d coord = i_coords.get(i_atomser); |
747 |
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
748 |
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
749 |
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
750 |
Point3i floor = new Point3i(floorX,floorY,floorZ); |
751 |
if (boxes.containsKey(floor)){ |
752 |
// we put the coords for atom i in its corresponding box (identified by floor) |
753 |
boxes.get(floor).put_i_Point(i, coord); |
754 |
if (!crossed){ |
755 |
boxes.get(floor).put_j_Point(i, coord); |
756 |
} |
757 |
} else { |
758 |
Box box = new Box(floor); |
759 |
box.put_i_Point(i, coord); |
760 |
if (!crossed){ |
761 |
box.put_j_Point(i, coord); |
762 |
} |
763 |
boxes.put(floor,box); |
764 |
} |
765 |
i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
766 |
i++; |
767 |
} |
768 |
int j=0; |
769 |
if (crossed) { |
770 |
j_atomserials = new int[j_coords.size()]; |
771 |
for (int j_atomser:j_coords.keySet()){ |
772 |
//coordinates for atom serial atomser, we will use j as its identifier below |
773 |
Point3d coord = j_coords.get(j_atomser); |
774 |
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
775 |
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
776 |
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
777 |
Point3i floor = new Point3i(floorX,floorY,floorZ); |
778 |
if (boxes.containsKey(floor)){ |
779 |
// we put the coords for atom j in its corresponding box (identified by floor) |
780 |
boxes.get(floor).put_j_Point(j, coord); |
781 |
} else { |
782 |
Box box = new Box(floor); |
783 |
box.put_j_Point(j, coord); |
784 |
boxes.put(floor,box); |
785 |
} |
786 |
j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
787 |
j++; |
788 |
} |
789 |
} else { |
790 |
j_atomserials = i_atomserials; |
791 |
} |
792 |
|
793 |
|
794 |
float[][]distMatrix = new float[i_atomserials.length][j_atomserials.length]; |
795 |
|
796 |
for (Point3i floor:boxes.keySet()){ // for each box |
797 |
// distances of points within this box |
798 |
boxes.get(floor).getDistancesWithinBox(distMatrix,crossed); |
799 |
|
800 |
//TODO should iterate only through half of the neighbours here |
801 |
// distances of points from this box to all neighbouring boxes: 26 iterations (26 neighbouring boxes) |
802 |
for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){ |
803 |
for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){ |
804 |
for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){ |
805 |
if (!((x==floor.x)&&(y==floor.y)&&(z==floor.z))) { // skip this box |
806 |
Point3i neighbor = new Point3i(x,y,z); |
807 |
if (boxes.containsKey(neighbor)){ |
808 |
boxes.get(floor).getDistancesToNeighborBox(boxes.get(neighbor),distMatrix,crossed); |
809 |
} |
810 |
} |
811 |
} |
812 |
} |
813 |
} |
814 |
} |
815 |
|
816 |
// creating the AIGraph |
817 |
AIGraph graph = new AIGraph(); |
818 |
TreeMap<Integer,AIGNode> aignodemap = new TreeMap<Integer,AIGNode>(); |
819 |
TreeMap<Integer,RIGNode> rignodemap = new TreeMap<Integer,RIGNode>(); |
820 |
TreeSet<Integer> atomSerials = new TreeSet<Integer>(); |
821 |
atomSerials.addAll(i_coords.keySet()); |
822 |
if (j_coords!=null){ |
823 |
atomSerials.addAll(j_coords.keySet()); |
824 |
} |
825 |
// adding the AIGNodes (including parent RIGNode references) |
826 |
SecondaryStructure secondaryStructureCopy = secondaryStructure.copy(); |
827 |
for (int atomSer:atomSerials) { |
828 |
int resser = get_resser_from_atomser(atomSer); |
829 |
SecStrucElement sselem = secondaryStructureCopy.getSecStrucElement(resser); |
830 |
if (!rignodemap.containsKey(resser)) { |
831 |
// NOTE!: we are passing references to the SecStrucElement objects! they point to the same objects as secondaryStructureCopy |
832 |
RIGNode resNode = new RIGNode(resser, resser2restype.get(resser), sselem); |
833 |
rignodemap.put(resser,resNode); |
834 |
} |
835 |
AIGNode atomNode = new AIGNode(atomSer,atomser2atom.get(atomSer),rignodemap.get(resser)); |
836 |
aignodemap.put(atomSer,atomNode); |
837 |
graph.addVertex(atomNode); |
838 |
} |
839 |
|
840 |
graph.setSecondaryStructure(secondaryStructureCopy); |
841 |
graph.setSerials2NodesMap(aignodemap); |
842 |
graph.setCutoff(cutoff); |
843 |
graph.setSequence(sequence); |
844 |
graph.setPdbCode(pdbCode); |
845 |
graph.setChainCode(chainCode); |
846 |
graph.setPdbChainCode(pdbChainCode); |
847 |
graph.setModel(model); |
848 |
graph.setSid(sid); |
849 |
graph.setCrossed(crossed); |
850 |
|
851 |
// populating the AIGraph with AIGEdges |
852 |
for (i=0;i<distMatrix.length;i++){ |
853 |
for (j=0;j<distMatrix[i].length;j++){ |
854 |
// the condition distMatrix[i][j]!=0.0 takes care of skipping several things: |
855 |
// - diagonal of the matrix in case of non-crossed |
856 |
// - lower half of matrix in case of non-crossed |
857 |
// - cells for which we didn't calculate a distance because the 2 points were not in same or neighbouring boxes (i.e. too far apart) |
858 |
if (distMatrix[i][j]!=0.0f && distMatrix[i][j]<=cutoff){ |
859 |
if (!crossed) { |
860 |
graph.addEdge(new AIGEdge(distMatrix[i][j]), aignodemap.get(i_atomserials[i]), aignodemap.get(j_atomserials[j]), EdgeType.UNDIRECTED); |
861 |
} |
862 |
// This condition is to take care of crossed contact types that have overlapping sets of atoms: |
863 |
// the matrix would contain both i,j and j,i but that's only 1 edge in the AIGraph |
864 |
//TODO if our AIGraph didn't allow parallel edges, this extra check woudn't be necessary |
865 |
else if (graph.findEdge(aignodemap.get(i_atomserials[i]), aignodemap.get(j_atomserials[j]))==null) { |
866 |
graph.addEdge(new AIGEdge(distMatrix[i][j]), aignodemap.get(i_atomserials[i]), aignodemap.get(j_atomserials[j]), EdgeType.UNDIRECTED); |
867 |
} |
868 |
} |
869 |
|
870 |
} |
871 |
} |
872 |
|
873 |
return graph; |
874 |
} |
875 |
|
876 |
/** |
877 |
* Returns a RIGraph for given contact type and cutoff |
878 |
* @param ct |
879 |
* @param cutoff |
880 |
* @return |
881 |
*/ |
882 |
public RIGraph get_graph(String ct, double cutoff, boolean directed) { |
883 |
//NOTE:To generate forbidden graphs, AIGraph should become directed and |
884 |
//-the check for adding parallel atomic edges if crossed should be removed |
885 |
//-atomic edges in both directions should be added if !crossed |
886 |
//-make sure in getRIGraph for undirected graphs not to double-count atomic edges |
887 |
if (directed && AAinfo.isOverlapping(ct)) { |
888 |
throw new IllegalArgumentException(); |
889 |
} |
890 |
|
891 |
String[] cts = ct.split("\\+"); |
892 |
AIGraph atomGraph = getAIGraph(cts[0], cutoff); |
893 |
for(int i=1; i< cts.length; i++) { |
894 |
atomGraph.addGraph(getAIGraph(cts[i], cutoff)); |
895 |
} |
896 |
RIGraph graph = atomGraph.getRIGraph(directed); |
897 |
|
898 |
graph.setContactType(ct); |
899 |
graph.setCutoff(cutoff); |
900 |
|
901 |
return graph; |
902 |
} |
903 |
|
904 |
public RIGraph get_graph(String ct, double cutoff) { |
905 |
// TODO eventually we should use the ContactType class as parameter |
906 |
boolean crossed = false; |
907 |
if (ct.contains("/")) { |
908 |
crossed = true; |
909 |
} |
910 |
return get_graph(ct, cutoff, crossed); |
911 |
} |
912 |
|
913 |
/** |
914 |
* Returns an all atom graph in a AIGraph object |
915 |
* @param cutoff |
916 |
* @return |
917 |
*/ |
918 |
public AIGraph getAllAtomGraph(double cutoff) { |
919 |
return this.getAIGraph("ALL", cutoff); |
920 |
} |
921 |
|
922 |
public void calcGridDensity(String ct, double cutoff, Map<Integer, Integer> densityCount) { |
923 |
TreeMap<Integer,Point3d> i_coords = null; |
924 |
TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymmetric edge types |
925 |
boolean directed = false; |
926 |
if (!ct.contains("/")){ |
927 |
i_coords = get_coords_for_ct(ct); // mapping from atom serials to coordinates |
928 |
directed = false; |
929 |
} else { |
930 |
String i_ct = ct.split("/")[0]; |
931 |
String j_ct = ct.split("/")[1]; |
932 |
i_coords = get_coords_for_ct(i_ct); |
933 |
j_coords = get_coords_for_ct(j_ct); |
934 |
directed = true; |
935 |
} |
936 |
int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials |
937 |
int[] j_atomserials = null; |
938 |
|
939 |
int SCALE=100; // i.e. we use units of hundredths of Angstroms (thus cutoffs can be specified with a maximum precission of 0.01A) |
940 |
|
941 |
int boxSize = (int) Math.floor(cutoff*SCALE); |
942 |
|
943 |
HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>(); |
944 |
int i=0; |
945 |
for (int i_atomser:i_coords.keySet()){ |
946 |
//coordinates for atom serial atomser, we will use i as its identifier below |
947 |
Point3d coord = i_coords.get(i_atomser); |
948 |
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
949 |
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
950 |
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
951 |
Point3i floor = new Point3i(floorX,floorY,floorZ); |
952 |
if (boxes.containsKey(floor)){ |
953 |
// we put the coords for atom i in its corresponding box (identified by floor) |
954 |
boxes.get(floor).put_i_Point(i, coord); |
955 |
if (!directed){ |
956 |
boxes.get(floor).put_j_Point(i, coord); |
957 |
} |
958 |
} else { |
959 |
Box box = new Box(floor); |
960 |
box.put_i_Point(i, coord); |
961 |
if (!directed){ |
962 |
box.put_j_Point(i, coord); |
963 |
} |
964 |
boxes.put(floor,box); |
965 |
} |
966 |
i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
967 |
i++; |
968 |
} |
969 |
int j=0; |
970 |
if (directed) { |
971 |
j_atomserials = new int[j_coords.size()]; |
972 |
for (int j_atomser:j_coords.keySet()){ |
973 |
//coordinates for atom serial atomser, we will use j as its identifier below |
974 |
Point3d coord = j_coords.get(j_atomser); |
975 |
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
976 |
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
977 |
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
978 |
Point3i floor = new Point3i(floorX,floorY,floorZ); |
979 |
if (boxes.containsKey(floor)){ |
980 |
// we put the coords for atom j in its corresponding box (identified by floor) |
981 |
boxes.get(floor).put_j_Point(j, coord); |
982 |
} else { |
983 |
Box box = new Box(floor); |
984 |
box.put_j_Point(j, coord); |
985 |
boxes.put(floor,box); |
986 |
} |
987 |
j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
988 |
j++; |
989 |
} |
990 |
} else { |
991 |
j_atomserials = i_atomserials; |
992 |
} |
993 |
|
994 |
// count density |
995 |
for(Point3i floor:boxes.keySet()) { |
996 |
//int size = boxes.get(floor).size(); |
997 |
int size = getNumGridNbs(boxes, floor, boxSize); // count number of neighbouring grid cells with points in them |
998 |
if(densityCount.containsKey(size)) { |
999 |
int old = densityCount.get(size); |
1000 |
densityCount.put(size, ++old); |
1001 |
} else { |
1002 |
densityCount.put(size, 1); |
1003 |
} |
1004 |
} |
1005 |
|
1006 |
|
1007 |
} |
1008 |
|
1009 |
/** |
1010 |
* Calculates and returns the difference of the distance maps of this |
1011 |
* structure and another pdb object. On error returns null. Note that |
1012 |
* the distance maps for the single structures have to have the same |
1013 |
* size. |
1014 |
* @param contactType1 contact type of this structure |
1015 |
* @param pdb2 the second structure |
1016 |
* @param contactType2 contact type of the second structure |
1017 |
* @return the difference distance map |
1018 |
* @see #getDiffDistMap(String, Pdb, String, Alignment, String, String) |
1019 |
*/ |
1020 |
public HashMap<Pair<Integer>,Double> getDiffDistMap(String contactType1, Pdb pdb2, String contactType2) { |
1021 |
double dist1, dist2, diff; |
1022 |
HashMap<Pair<Integer>,Double> otherDistMatrix = pdb2.calculate_dist_matrix(contactType2); |
1023 |
HashMap<Pair<Integer>,Double> thisDistMatrix = this.calculate_dist_matrix(contactType1); |
1024 |
if(thisDistMatrix.size() != otherDistMatrix.size()) { |
1025 |
System.err.println("Cannot calculate difference distance map. Matrix sizes do not match."); |
1026 |
return null; |
1027 |
} |
1028 |
for(Pair<Integer> e:otherDistMatrix.keySet()){ |
1029 |
dist1 = otherDistMatrix.get(e); |
1030 |
if(!thisDistMatrix.containsKey(e)) { |
1031 |
System.err.println("Error while calculating difference distance map. Entry " + e + " in matrix1 not not found in matrix2."); |
1032 |
return null; |
1033 |
} |
1034 |
dist2 = thisDistMatrix.get(e); |
1035 |
diff = Math.abs(dist1-dist2); |
1036 |
otherDistMatrix.put(e, diff); |
1037 |
} |
1038 |
return otherDistMatrix; |
1039 |
} |
1040 |
|
1041 |
/** |
1042 |
* Calculates the difference distance map of this structure and |
1043 |
* another pdb object given a sequence alignment of the structures. The |
1044 |
* resulting difference distance map may contains non-defined distances. |
1045 |
* This behavior is due to the alignment. If any residue in either |
1046 |
* structures is aligned with a gap one cannot assign a "difference |
1047 |
* distance" to another residue pair. |
1048 |
* @param contactType1 contact type of this structure |
1049 |
* @param pdb2 the second structure |
1050 |
* @param contactType2 contact type of the second structure |
1051 |
* @param ali sequence alignment of both structures |
1052 |
* @param name1 sequence tag of the this structure in the alignment |
1053 |
* @param name2 sequence tag og the second structure in the alignment |
1054 |
* @return the difference distance map |
1055 |
* @see #getDiffDistMap(String, Pdb, String) |
1056 |
*/ |
1057 |
public HashMap<Pair<Integer>,Double> getDiffDistMap(String contactType1, Pdb pdb2, String contactType2, Alignment ali, String name1, String name2) { |
1058 |
|
1059 |
HashMap<Pair<Integer>,Double> otherDistMatrix = pdb2.calculate_dist_matrix(contactType2); |
1060 |
HashMap<Pair<Integer>,Double> thisDistMatrix = this.calculate_dist_matrix(contactType1); |
1061 |
HashMap<Pair<Integer>,Double> alignedDistMatrix = new HashMap<Pair<Integer>, Double>(Math.min(this.getFullLength(), pdb2.getFullLength())); |
1062 |
int i1,i2,j1,j2; |
1063 |
TreeSet<Integer> unobserved1 = new TreeSet<Integer>(); |
1064 |
TreeSet<Integer> unobserved2 = new TreeSet<Integer>(); |
1065 |
|
1066 |
// detect all unobserved residues |
1067 |
for(int i = 0; i < ali.getAlignmentLength(); ++i) { |
1068 |
i1 = ali.al2seq(name1, i); |
1069 |
i2 = ali.al2seq(name2, i); |
1070 |
if( i1 != -1 && !hasCoordinates(i1) ) { |
1071 |
unobserved1.add(i1); |
1072 |
} |
1073 |
if( i2 != -1 && !pdb2.hasCoordinates(i2) ) { |
1074 |
unobserved2.add(i2); |
1075 |
} |
1076 |
} |
1077 |
|
1078 |
// strategy: we always have to look through the alignment to say |
1079 |
// whether a difference distance can be assigned to a pair of |
1080 |
// corresponding residues. To put it differently, for any two |
1081 |
// alignment columns one always has to ensure that both columns |
1082 |
// only contain observed residues (no gaps!), otherwise the one |
1083 |
// cannot obtain a distance in at least one structure as a gap |
1084 |
// indicates "no coordinates available". |
1085 |
|
1086 |
for(int i = 0; i < ali.getAlignmentLength()-1; ++i) { |
1087 |
|
1088 |
i1 = ali.al2seq(name1, i); |
1089 |
i2 = ali.al2seq(name2, i); |
1090 |
|
1091 |
// alignment columns must not contain gap characters and both |
1092 |
// residues in the current column have to be observed! |
1093 |
if( i1 == -1 || i2 == -1 || unobserved1.contains(i1) || unobserved2.contains(i2) ) { |
1094 |
continue; |
1095 |
} |
1096 |
|
1097 |
for(int j = i + 1; j < ali.getAlignmentLength(); ++j) { |
1098 |
|
1099 |
j1 = ali.al2seq(name1, j); |
1100 |
j2 = ali.al2seq(name2, j); |
1101 |
|
1102 |
if( j1 == -1 || j2 == -1 || unobserved1.contains(j1) || unobserved2.contains(j2) ) { |
1103 |
continue; |
1104 |
} |
1105 |
|
1106 |
// make the edges |
1107 |
Pair<Integer> e1 = new Pair<Integer>(i1,j1); |
1108 |
Pair<Integer> e2 = new Pair<Integer>(i2,j2); |
1109 |
|
1110 |
alignedDistMatrix.put(new Pair<Integer>(i+1,j+1),Math.abs(thisDistMatrix.get(e1)-otherDistMatrix.get(e2))); |
1111 |
} |
1112 |
} |
1113 |
return alignedDistMatrix; |
1114 |
} |
1115 |
// TODO: Version of this where already buffered distance matrices are passed as paremeters |
1116 |
|
1117 |
/** Returns the number of neighbours of this grid cell */ |
1118 |
private int getNumGridNbs(HashMap<Point3i,Box> boxes, Point3i floor, int boxSize) { |
1119 |
Point3i neighbor; |
1120 |
int nbs = 0; |
1121 |
for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){ |
1122 |
for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){ |
1123 |
for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){ |
1124 |
neighbor = new Point3i(x,y,z); |
1125 |
if (boxes.containsKey(neighbor)) nbs++; |
1126 |
} |
1127 |
} |
1128 |
} |
1129 |
// compensate for counting myself as a neighbour |
1130 |
if(boxes.containsKey(floor)) nbs--; |
1131 |
return nbs; |
1132 |
} |
1133 |
|
1134 |
/** |
1135 |
* Gets the Consurf-HSSP score given an internal residue serial |
1136 |
* @param resser |
1137 |
* @return |
1138 |
*/ |
1139 |
public Double getConsurfhsspScoreFromResSerial(int resser){ |
1140 |
if (resser2consurfhsspscore != null) { |
1141 |
if (resser2consurfhsspscore.get(resser) != null) { |
1142 |
return resser2consurfhsspscore.get(resser); |
1143 |
} else { |
1144 |
return null; |
1145 |
} |
1146 |
} else { |
1147 |
return null; |
1148 |
} |
1149 |
} |
1150 |
|
1151 |
/** |
1152 |
* Gets the Consurf-HSSP color rsa given an internal residue serial |
1153 |
* @param resser |
1154 |
* @return |
1155 |
*/ |
1156 |
public Integer getConsurfhsspColorFromResSerial(int resser){ |
1157 |
if (resser2consurfhsspcolor != null) { |
1158 |
if (resser2consurfhsspcolor.get(resser) != null) { |
1159 |
return resser2consurfhsspcolor.get(resser); |
1160 |
} else { |
1161 |
return null; |
1162 |
} |
1163 |
} else { |
1164 |
return null; |
1165 |
} |
1166 |
} |
1167 |
|
1168 |
/** |
1169 |
* Gets the all atoms rsa given an internal residue serial |
1170 |
* @param resser |
1171 |
* @return |
1172 |
*/ |
1173 |
public Double getAllRsaFromResSerial(int resser){ |
1174 |
if (resser2allrsa != null) { |
1175 |
if (resser2allrsa.get(resser) != null) { |
1176 |
return resser2allrsa.get(resser); |
1177 |
} else { |
1178 |
return null; |
1179 |
} |
1180 |
} else { |
1181 |
return null; |
1182 |
} |
1183 |
} |
1184 |
|
1185 |
/** |
1186 |
* Gets the sc rsa given an internal residue serial |
1187 |
* @param resser |
1188 |
* @return |
1189 |
*/ |
1190 |
public Double getScRsaFromResSerial(int resser){ |
1191 |
if (resser2scrsa != null) { |
1192 |
if (resser2scrsa.get(resser) != null) { |
1193 |
return resser2scrsa.get(resser); |
1194 |
} else { |
1195 |
return null; |
1196 |
} |
1197 |
} else { |
1198 |
return null; |
1199 |
} |
1200 |
} |
1201 |
|
1202 |
/** |
1203 |
* Gets the internal residue serial (cif) given a pdb residue serial (author assignment) |
1204 |
* TODO refactor |
1205 |
* @param pdbresser |
1206 |
* @return |
1207 |
*/ |
1208 |
public int get_resser_from_pdbresser (String pdbresser){ |
1209 |
return pdbresser2resser.get(pdbresser); |
1210 |
} |
1211 |
|
1212 |
/** |
1213 |
* Gets the pdb residue serial (author assignment) given an internal residue serial (cif) |
1214 |
* TODO refactor |
1215 |
* @param resser |
1216 |
* @return |
1217 |
*/ |
1218 |
public String get_pdbresser_from_resser (int resser){ |
1219 |
return resser2pdbresser.get(resser); |
1220 |
} |
1221 |
|
1222 |
/** |
1223 |
* Gets the residue serial given an atom serial |
1224 |
* TODO refactor |
1225 |
* @param atomser |
1226 |
* @return |
1227 |
*/ |
1228 |
public int get_resser_from_atomser(int atomser){ |
1229 |
return atomser2resser.get(atomser); |
1230 |
} |
1231 |
|
1232 |
public String getResTypeFromResSerial(int resser) { |
1233 |
return resser2restype.get(resser); |
1234 |
} |
1235 |
|
1236 |
/** |
1237 |
* Gets the atom serial given the residue serial and atom name |
1238 |
* @param resser |
1239 |
* @param atom |
1240 |
* @return |
1241 |
*/ |
1242 |
public int getAtomSerFromResSerAndAtom(int resser, String atom) { |
1243 |
return resser_atom2atomserial.get(resser+"_"+atom); |
1244 |
} |
1245 |
|
1246 |
/** |
1247 |
* Checks whether the given residue serial has any associated coordinates. |
1248 |
* @param ser the residue serial |
1249 |
* @return true if there is at least one atom with valid coordinates, else false |
1250 |
*/ |
1251 |
public boolean hasCoordinates(int resser) { |
1252 |
return atomser2resser.values().contains(resser); |
1253 |
} |
1254 |
|
1255 |
/** |
1256 |
* Returns true if this Pdb has been restricted to a specific SCOP domain |
1257 |
* @return |
1258 |
*/ |
1259 |
public boolean isRestrictedToScopDomain() { |
1260 |
return sid!=null; |
1261 |
} |
1262 |
|
1263 |
/** |
1264 |
* Returns the sid of this Pdb |
1265 |
* It is set when restrictToScopDomain is run |
1266 |
*/ |
1267 |
public String getSid() { |
1268 |
return sid; |
1269 |
} |
1270 |
|
1271 |
/** |
1272 |
* Gets the atom coordinates (Point3d object) given the atom serial |
1273 |
* @param atomser |
1274 |
* @return |
1275 |
*/ |
1276 |
public Point3d getAtomCoord(int atomser) { |
1277 |
return this.atomser2coord.get(atomser); |
1278 |
} |
1279 |
|
1280 |
/** |
1281 |
* Gets all atom serials in a Set |
1282 |
* @return |
1283 |
*/ |
1284 |
public Set<Integer> getAllAtomSerials() { |
1285 |
return this.atomser2resser.keySet(); |
1286 |
} |
1287 |
|
1288 |
/** |
1289 |
* Gets the 4 letter pdb code identifying this structure |
1290 |
* @return |
1291 |
*/ |
1292 |
public String getPdbCode() { |
1293 |
return this.pdbCode; |
1294 |
} |
1295 |
|
1296 |
/** |
1297 |
* Gets the internal chain code (cif) |
1298 |
* @return |
1299 |
*/ |
1300 |
public String getChainCode(){ |
1301 |
return this.chainCode; |
1302 |
} |
1303 |
|
1304 |
/** |
1305 |
* Gets the pdb chain code (author assignment) |
1306 |
* @return |
1307 |
*/ |
1308 |
public String getPdbChainCode(){ |
1309 |
return this.pdbChainCode; |
1310 |
} |
1311 |
|
1312 |
/** |
1313 |
* Gets the sequence |
1314 |
* @return |
1315 |
*/ |
1316 |
public String getSequence() { |
1317 |
return sequence; |
1318 |
} |
1319 |
|
1320 |
/** |
1321 |
* True if this Pdb has the sequence field set to not blank |
1322 |
* @return |
1323 |
*/ |
1324 |
public boolean hasSequence() { |
1325 |
if (sequence==null) return false; |
1326 |
return !sequence.equals(""); |
1327 |
} |
1328 |
|
1329 |
// csa related methods |
1330 |
|
1331 |
/** |
1332 |
* Returns true if csa information is available, false otherwise. |
1333 |
*/ |
1334 |
public boolean hasCSA() { |
1335 |
if (catalSiteSet == null) { |
1336 |
return false; |
1337 |
} else if (catalSiteSet.isEmpty()) { |
1338 |
return false; |
1339 |
} else { |
1340 |
return true; |
1341 |
} |
1342 |
} |
1343 |
|
1344 |
/** |
1345 |
* Returns the csa annotation object of this graph. |
1346 |
*/ |
1347 |
public CatalSiteSet getCSA() { |
1348 |
return catalSiteSet; |
1349 |
} |
1350 |
|
1351 |
// ec related methods |
1352 |
|
1353 |
/** |
1354 |
* Returns true if ec information is available, false otherwise. |
1355 |
*/ |
1356 |
public boolean hasEC() { |
1357 |
if (ec == null) { |
1358 |
return false; |
1359 |
} else if (ec.isEmpty()) { |
1360 |
return false; |
1361 |
} else { |
1362 |
return true; |
1363 |
} |
1364 |
} |
1365 |
|
1366 |
/** |
1367 |
* Returns the ec annotation object of this graph. |
1368 |
*/ |
1369 |
public EC getEC() { |
1370 |
return ec; |
1371 |
} |
1372 |
|
1373 |
// end of secop related methods |
1374 |
|
1375 |
// scop related methods |
1376 |
|
1377 |
/** |
1378 |
* Returns true if scop information is available, false otherwise. |
1379 |
*/ |
1380 |
public boolean hasScop() { |
1381 |
if (scop == null) { |
1382 |
return false; |
1383 |
} else if (scop.isEmpty()) { |
1384 |
return false; |
1385 |
} else { |
1386 |
return true; |
1387 |
} |
1388 |
} |
1389 |
|
1390 |
/** |
1391 |
* Returns the scop annotation object of this graph. |
1392 |
*/ |
1393 |
public Scop getScop() { |
1394 |
return scop; |
1395 |
} |
1396 |
|
1397 |
// end of secop related methods |
1398 |
|
1399 |
// secondary structure related methods |
1400 |
|
1401 |
/** |
1402 |
* Returns true if secondary structure information is available, false otherwise. |
1403 |
*/ |
1404 |
public boolean hasSecondaryStructure() { |
1405 |
return !this.secondaryStructure.isEmpty(); |
1406 |
} |
1407 |
|
1408 |
/** |
1409 |
* Returns the secondary structure annotation object of this graph. |
1410 |
*/ |
1411 |
public SecondaryStructure getSecondaryStructure() { |
1412 |
return this.secondaryStructure; |
1413 |
} |
1414 |
|
1415 |
// end of secondary structure related methods |
1416 |
|
1417 |
/** |
1418 |
* Calculates rmsd (on atoms given by ct) of this Pdb object to otherPdb object |
1419 |
* Both objects must represent structures with same sequence (save unobserved residues or missing atoms) |
1420 |
* |
1421 |
* @param otherPdb |
1422 |
* @param ct the contact type (crossed contact types don't make sense here) |
1423 |
* @return |
1424 |
* @throws ConformationsNotSameSizeError |
1425 |
*/ |
1426 |
public double rmsd(Pdb otherPdb, String ct) throws ConformationsNotSameSizeError { |
1427 |
TreeMap<String,Point3d> thiscoords = this.get_coords_for_ct_4rmsd(ct); |
1428 |
TreeMap<String,Point3d> othercoords = otherPdb.get_coords_for_ct_4rmsd(ct); |
1429 |
|
1430 |
// there might be unobserved residues or some missing atoms for a residue |
1431 |
// here we get the ones that are in common |
1432 |
ArrayList<String> common = new ArrayList<String>(); |
1433 |
for (String resser_atom:thiscoords.keySet()){ |
1434 |
if (othercoords.containsKey(resser_atom)){ |
1435 |
common.add(resser_atom); |
1436 |
} |
1437 |
} |
1438 |
|
1439 |
// converting the TreeMaps to Vector3d arrays (input format for calculate_rmsd) |
1440 |
Vector3d[] conformation1 = new Vector3d[common.size()]; |
1441 |
Vector3d[] conformation2 = new Vector3d[common.size()]; |
1442 |
int i = 0; |
1443 |
for (String resser_atom:common){ |
1444 |
conformation1[i] = new Vector3d(thiscoords.get(resser_atom).x,thiscoords.get(resser_atom).y,thiscoords.get(resser_atom).z); |
1445 |
conformation2[i] = new Vector3d(othercoords.get(resser_atom).x,othercoords.get(resser_atom).y,othercoords.get(resser_atom).z); |
1446 |
i++; |
1447 |
} |
1448 |
|
1449 |
// this as well as calculating the rmsd, changes conformation1 and conformation2 to be optimally superposed |
1450 |
double rmsd = calculate_rmsd(conformation1, conformation2); |
1451 |
|
1452 |
// // printing out individual distances (conformation1 and conformation2 are now optimally superposed) |
1453 |
// for (i=0;i<conformation1.length;i++){ |
1454 |
// Point3d point1 = new Point3d(conformation1[i].x,conformation1[i].y,conformation1[i].z); |
1455 |
// Point3d point2 = new Point3d(conformation2[i].x,conformation2[i].y,conformation2[i].z); |
1456 |
// System.out.println(point1.distance(point2)); |
1457 |
// } |
1458 |
|
1459 |
return rmsd; |
1460 |
|
1461 |
} |
1462 |
|
1463 |
/** |
1464 |
* Calculates the RMSD between two conformations. |
1465 |
* conformation1: Vector3d array (matrix of dimensions [N,3]) |
1466 |
* conformation2: Vector3d array (matrix of dimensions [N,3]) |
1467 |
* |
1468 |
* Both conformation1 and conformation2 are modified to be optimally superposed |
1469 |
* |
1470 |
* Implementation taken (python) from http://bosco.infogami.com/Root_Mean_Square_Deviation, |
1471 |
* then ported to java using Jama matrix package |
1472 |
* (page has moved to: http://boscoh.com/protein/rmsd-root-mean-square-deviation) |
1473 |
* @param conformation1 |
1474 |
* @param conformation2 |
1475 |
* @return |
1476 |
* @throws ConformationsNotSameSizeError |
1477 |
*/ |
1478 |
private double calculate_rmsd(Vector3d[] conformation1, Vector3d[] conformation2) throws ConformationsNotSameSizeError{ |
1479 |
if (conformation1.length!=conformation2.length) { |
1480 |
//System.err.println("Conformations not the same size"); |
1481 |
throw new ConformationsNotSameSizeError( |
1482 |
"Given conformations have different size: conformation1: "+conformation1.length+", conformation2: "+conformation2.length); |
1483 |
} |
1484 |
int n_vec = conformation1.length; |
1485 |
|
1486 |
// 1st we bring both conformations to the same centre by subtracting their respectives centres |
1487 |
Vector3d center1 = new Vector3d(); |
1488 |
Vector3d center2 = new Vector3d(); |
1489 |
for (int i=0;i<n_vec;i++){ // summing all vectors in each conformation |
1490 |
center1.add(conformation1[i]); |
1491 |
center2.add(conformation2[i]); |
1492 |
} |
1493 |
// dividing by n_vec (average) |
1494 |
center1.scale((double)1/n_vec); |
1495 |
center2.scale((double)1/n_vec); |
1496 |
// translating our conformations to the same coordinate system by subtracting centers |
1497 |
for (Vector3d vec:conformation1){ |
1498 |
vec.sub(center1); |
1499 |
} |
1500 |
for (Vector3d vec:conformation2){ |
1501 |
vec.sub(center2); |
1502 |
} |
1503 |
|
1504 |
//E0: initial sum of squared lengths of both conformations |
1505 |
double sum1 = 0.0; |
1506 |
double sum2 = 0.0; |
1507 |
for (int i=0;i<n_vec;i++){ |
1508 |
sum1 += conformation1[i].lengthSquared(); |
1509 |
sum2 += conformation2[i].lengthSquared(); |
1510 |
} |
1511 |
double E0 = sum1 + sum2; |
1512 |
|
1513 |
// singular value decomposition |
1514 |
Matrix vecs1 = vector3dAr2matrix(conformation1); |
1515 |
Matrix vecs2 = vector3dAr2matrix(conformation2); |
1516 |
|
1517 |
Matrix correlation_matrix = vecs2.transpose().times(vecs1); //gives a 3x3 matrix |
1518 |
|
1519 |
SingularValueDecomposition svd = correlation_matrix.svd(); |
1520 |
Matrix U = svd.getU(); |
1521 |
Matrix V_trans = svd.getV().transpose(); |
1522 |
double[] singularValues = svd.getSingularValues(); |
1523 |
|
1524 |
boolean is_reflection = false; |
1525 |
if (U.det()*V_trans.det()<0.0){ |
1526 |
is_reflection = true; |
1527 |
} |
1528 |
if (is_reflection){ |
1529 |
// reflect along smallest principal axis: |
1530 |
// we change sign of last coordinate (smallest singular value) |
1531 |
singularValues[singularValues.length-1]=(-1)*singularValues[singularValues.length-1]; |
1532 |
} |
1533 |
|
1534 |
// getting sum of singular values |
1535 |
double sumSV = 0.0; |
1536 |
for (int i=0;i<singularValues.length;i++){ |
1537 |
sumSV += singularValues[i]; |
1538 |
} |
1539 |
|
1540 |
// rmsd square: Kabsch formula |
1541 |
double rmsd_sq = (E0 - 2.0*sumSV)/((double) n_vec); |
1542 |
rmsd_sq = Math.max(rmsd_sq, 0.0); |
1543 |
|
1544 |
// finally we modify conformation2 to be aligned to conformation1 |
1545 |
if (is_reflection) { // first we check if we are in is_reflection case: we need to change sign to last row of U |
1546 |
for (int j=0;j<U.getColumnDimension();j++){ |
1547 |
// we change sign to last row of U |
1548 |
int lastRow = U.getRowDimension()-1; |
1549 |
U.set(lastRow, j, (-1)*U.get(lastRow,j)); |
1550 |
} |
1551 |
} |
1552 |
Matrix optimal_rotation = U.times(V_trans); |
1553 |
Matrix conf2 = vecs2.times(optimal_rotation); |
1554 |
for (int i=0;i<n_vec;i++){ |
1555 |
conformation2[i].x = conf2.get(i,0); |
1556 |
conformation2[i].y = conf2.get(i,1); |
1557 |
conformation2[i].z = conf2.get(i,2); |
1558 |
} |
1559 |
|
1560 |
return Math.sqrt(rmsd_sq); |
1561 |
} |
1562 |
|
1563 |
/** Gets a Jama.Matrix object from a Vector3d[] (deep copies) */ |
1564 |
private Matrix vector3dAr2matrix(Vector3d[] vecArray) { |
1565 |
double[][] array = new double[vecArray.length][3]; |
1566 |
for (int i=0;i<vecArray.length;i++){ |
1567 |
vecArray[i].get(array[i]); |
1568 |
} |
1569 |
return new Matrix(array); |
1570 |
} |
1571 |
|
1572 |
/** |
1573 |
* Write residue info to given db, using our db graph aglappe format, |
1574 |
* i.e. tables: residue_info |
1575 |
* @param conn |
1576 |
* @param db |
1577 |
* @throws SQLException |
1578 |
*/ |
1579 |
public void writeToDb(MySQLConnection conn, String db) throws SQLException{ |
1580 |
|
1581 |
Statement stmt; |
1582 |
String sql = ""; |
1583 |
|
1584 |
conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL"); |
1585 |
|
1586 |
for (int resser:resser2restype.keySet()) { |
1587 |
String resType = AAinfo.threeletter2oneletter(getResTypeFromResSerial(resser)); |
1588 |
String pdbresser = get_pdbresser_from_resser(resser); |
1589 |
|
1590 |
String secStructType = null; |
1591 |
String secStructId = null; |
1592 |
if (secondaryStructure != null) { |
1593 |
if (secondaryStructure.getSecStrucElement(resser) != null) { |
1594 |
secStructType = quote(String.valueOf(secondaryStructure.getSecStrucElement(resser).getType())); |
1595 |
secStructId = quote(secondaryStructure.getSecStrucElement(resser).getId()); |
1596 |
} |
1597 |
} |
1598 |
|
1599 |
String scopId = null; |
1600 |
String sccs = null; |
1601 |
String sunid = null; |
1602 |
String orderIn = null; |
1603 |
String domainType = null; |
1604 |
String domainNumReg = null; |
1605 |
if (scop != null) { |
1606 |
if (scop.getScopRegion(resser)!=null) { |
1607 |
ScopRegion sr = scop.getScopRegion(resser); |
1608 |
scopId = quote(sr.getSId()); |
1609 |
sccs = quote(sr.getSccs()); |
1610 |
sunid = String.valueOf(sr.getSunid()); |
1611 |
orderIn = String.valueOf(sr.getOrder()); |
1612 |
domainType = quote(String.valueOf(sr.getDomainType())); |
1613 |
domainNumReg = String.valueOf(sr.getNumRegions()); |
1614 |
} |
1615 |
} |
1616 |
|
1617 |
Double allRsa = getAllRsaFromResSerial(resser); |
1618 |
Double scRsa = getScRsaFromResSerial(resser); |
1619 |
|
1620 |
Double consurfhsspScore = getConsurfhsspScoreFromResSerial(resser); |
1621 |
Integer consurfhsspColor = getConsurfhsspColorFromResSerial(resser); |
1622 |
|
1623 |
String ecId = null; |
1624 |
if (ec != null) { |
1625 |
if (ec.getECRegion(resser) != null) { |
1626 |
ecId = quote(ec.getECNum(resser)); |
1627 |
} |
1628 |
} |
1629 |
|
1630 |
String csaNums = null; |
1631 |
String csaChemFuncs = null; |
1632 |
String csaEvids = null; |
1633 |
if (catalSiteSet != null) { |
1634 |
if (catalSiteSet.getCatalSite(resser) != null) { |
1635 |
csaNums = quote(catalSiteSet.getCatalSiteNum(resser)); |
1636 |
csaChemFuncs = quote(catalSiteSet.getCatalSiteChemFunc(resser)); |
1637 |
csaEvids = quote(catalSiteSet.getCatalSiteEvid(resser)); |
1638 |
} |
1639 |
} |
1640 |
|
1641 |
sql = "INSERT IGNORE INTO "+db+".pdb_residue_info (pdb_code, chain_code, pdb_chain_code, res_ser, pdb_res_ser, res_type, sstype, ssid, scop_id, sccs, sunid, order_in, domain_type, domain_num_reg, all_rsa, sc_rsa, consurfhssp_score, consurfhssp_color, ec, csa_site_nums, csa_chem_funcs, csa_evid) " + |
1642 |
" VALUES ("+quote(pdbCode)+", "+quote(chainCode)+", "+(pdbChainCode.equals("NULL")?quote("-"):quote(pdbChainCode))+","+resser+", "+quote(pdbresser)+", "+quote(resType)+", "+secStructType+", "+secStructId+", "+scopId+", "+sccs+", "+sunid+", "+orderIn+", "+domainType+", "+domainNumReg+", "+allRsa+", "+scRsa+", "+consurfhsspScore+","+consurfhsspColor+","+ecId+","+csaNums+","+csaChemFuncs+","+csaEvids+")"; |
1643 |
//System.out.println(sql); |
1644 |
stmt = conn.createStatement(); |
1645 |
stmt.executeUpdate(sql); |
1646 |
stmt.close(); |
1647 |
} |
1648 |
} |
1649 |
|
1650 |
/** |
1651 |
* Write residue info to given db, using our db graph aglappe format, |
1652 |
* i.e. tables: residue_info |
1653 |
* @param conn |
1654 |
* @param db |
1655 |
* @throws SQLException |
1656 |
*/ |
1657 |
public void writeToDbFast(MySQLConnection conn, String db) throws SQLException, IOException { |
1658 |
|
1659 |
Statement stmt; |
1660 |
String sql = ""; |
1661 |
|
1662 |
conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL"); |
1663 |
|
1664 |
PrintStream resOut = new PrintStream(new FileOutputStream(pdbCode+chainCode+"_residues.txt")); |
1665 |
|
1666 |
for (int resser:resser2restype.keySet()) { |
1667 |
String resType = AAinfo.threeletter2oneletter(getResTypeFromResSerial(resser)); |
1668 |
String pdbresser = get_pdbresser_from_resser(resser); |
1669 |
|
1670 |
String secStructType = "\\N"; |
1671 |
String secStructId = "\\N"; |
1672 |
if (secondaryStructure != null) { |
1673 |
if (secondaryStructure.getSecStrucElement(resser) != null) { |
1674 |
secStructType = String.valueOf(secondaryStructure.getSecStrucElement(resser).getType()); |
1675 |
secStructId = secondaryStructure.getSecStrucElement(resser).getId(); |
1676 |
} |
1677 |
} |
1678 |
|
1679 |
String scopId = "\\N"; |
1680 |
String sccs = "\\N"; |
1681 |
String sunid = "\\N"; |
1682 |
String orderIn = "\\N"; |
1683 |
String domainType = "\\N"; |
1684 |
String domainNumReg = "\\N"; |
1685 |
if (scop != null) { |
1686 |
if (scop.getScopRegion(resser)!=null) { |
1687 |
ScopRegion sr = scop.getScopRegion(resser); |
1688 |
scopId = sr.getSId(); |
1689 |
sccs = sr.getSccs(); |
1690 |
sunid = String.valueOf(sr.getSunid()); |
1691 |
orderIn = String.valueOf(sr.getOrder()); |
1692 |
domainType = String.valueOf(sr.getDomainType()); |
1693 |
domainNumReg = String.valueOf(sr.getNumRegions()); |
1694 |
} |
1695 |
} |
1696 |
|
1697 |
Double allRsa = getAllRsaFromResSerial(resser); |
1698 |
Double scRsa = getScRsaFromResSerial(resser); |
1699 |
|
1700 |
Double consurfhsspScore = getConsurfhsspScoreFromResSerial(resser); |
1701 |
Integer consurfhsspColor = getConsurfhsspColorFromResSerial(resser); |
1702 |
|
1703 |
String ecId = "\\N"; |
1704 |
if (ec != null) { |
1705 |
if (ec.getECRegion(resser) != null) { |
1706 |
ecId = ec.getECNum(resser); |
1707 |
} |
1708 |
} |
1709 |
|
1710 |
String csaNums = "\\N"; |
1711 |
String csaChemFuncs = "\\N"; |
1712 |
String csaEvids = "\\N"; |
1713 |
if (catalSiteSet != null) { |
1714 |
if (catalSiteSet.getCatalSite(resser) != null) { |
1715 |
csaNums = catalSiteSet.getCatalSiteNum(resser); |
1716 |
csaChemFuncs = catalSiteSet.getCatalSiteChemFunc(resser); |
1717 |
csaEvids = catalSiteSet.getCatalSiteEvid(resser); |
1718 |
} |
1719 |
} |
1720 |
|
1721 |
resOut.println(pdbCode+"\t"+chainCode+"\t"+(pdbChainCode.equals("NULL")?"-":pdbChainCode)+"\t"+resser+"\t"+pdbresser+"\t"+resType+"\t"+secStructType+"\t"+secStructId+"\t"+scopId+"\t"+sccs+"\t"+sunid+"\t"+orderIn+"\t"+domainType+"\t"+domainNumReg+"\t"+allRsa+"\t"+scRsa+"\t"+consurfhsspScore+"\t"+consurfhsspColor+"\t"+ecId+"\t"+csaNums+"\t"+csaChemFuncs+"\t"+csaEvids); |
1722 |
|
1723 |
} |
1724 |
resOut.close(); |
1725 |
sql = "LOAD DATA LOCAL INFILE '"+pdbCode+chainCode+"_residues.txt' INTO TABLE "+db+".pdb_residue_info (pdb_code, chain_code, pdb_chain_code, res_ser, pdb_res_ser, res_type, sstype, ssid, scop_id, sccs, sunid, order_in, domain_type, domain_num_reg, all_rsa, sc_rsa, consurfhssp_score, consurfhssp_color, ec, csa_site_nums, csa_chem_funcs, csa_evid);"; |
1726 |
//System.out.println(sql); |
1727 |
stmt = conn.createStatement(); |
1728 |
stmt.executeUpdate(sql); |
1729 |
stmt.close(); |
1730 |
File fileToDelete = new File(pdbCode+chainCode+"_residues.txt"); |
1731 |
if (fileToDelete.exists()) { |
1732 |
fileToDelete.delete(); |
1733 |
} |
1734 |
} |
1735 |
|
1736 |
private static String quote(String s) { |
1737 |
return ("'"+s+"'"); |
1738 |
} |
1739 |
|
1740 |
/** |
1741 |
* Restricts thisPdb object to residues that belong to the given sunid |
1742 |
* Can only be used after calling checkScop() |
1743 |
* @param sunid |
1744 |
*/ |
1745 |
public void restrictToScopDomain (int sunid) { |
1746 |
Vector<ScopRegion> scopRegions = this.scop.getScopRegions(sunid); |
1747 |
if (scopRegions.size()!=0) { |
1748 |
this.sid = scopRegions.get(0).getSId(); |
1749 |
if (scopRegions.get(0).getDomainType() != ScopRegion.DomainType.WHOLECHAIN) { |
1750 |
restrictToScopRegions(scopRegions); |
1751 |
|
1752 |
Iterator<ScopRegion> allScopRegions = this.scop.getIterator(); |
1753 |
while (allScopRegions.hasNext()) { |
1754 |
ScopRegion scopRegion = allScopRegions.next(); |
1755 |
if (!scopRegion.getSId().equals(sid)) { |
1756 |
allScopRegions.remove(); |
1757 |
} |
1758 |
} |
1759 |
} |
1760 |
} |
1761 |
} |
1762 |
|
1763 |
/** |
1764 |
* Restricts thisPdb object to residues that belong to the given sid |
1765 |
* Can only be used after calling checkScop() |
1766 |
* @param sid |
1767 |
*/ |
1768 |
public void restrictToScopDomain (String sid) { |
1769 |
|
1770 |
Vector<ScopRegion> scopRegions = this.scop.getScopRegions(sid); |
1771 |
if (scopRegions.size()!=0) { |
1772 |
this.sid = sid; |
1773 |
if (scopRegions.get(0).getDomainType() != ScopRegion.DomainType.WHOLECHAIN) { |
1774 |
restrictToScopRegions(scopRegions); |
1775 |
|
1776 |
Iterator<ScopRegion> allScopRegions = this.scop.getIterator(); |
1777 |
while (allScopRegions.hasNext()) { |
1778 |
ScopRegion scopRegion = allScopRegions.next(); |
1779 |
if (!scopRegion.getSId().equals(sid)) { |
1780 |
allScopRegions.remove(); |
1781 |
} |
1782 |
} |
1783 |
} |
1784 |
} |
1785 |
} |
1786 |
|
1787 |
/** |
1788 |
* Restricts this Pdb object to residues within the given ScopRegions |
1789 |
* @param scopRegions |
1790 |
*/ |
1791 |
private void restrictToScopRegions (Vector<ScopRegion> scopRegions) { |
1792 |
IntervalSet intervSet = new IntervalSet(); |
1793 |
Iterator<ScopRegion> it = scopRegions.iterator(); |
1794 |
while(it.hasNext()) { |
1795 |
ScopRegion scopRegion = it.next(); |
1796 |
intervSet.add(scopRegion.getInterval()); |
1797 |
} |
1798 |
restrictToIntervalSet(intervSet); |
1799 |
} |
1800 |
|
1801 |
/** |
1802 |
* Restricts this Pdb object to residues within the given IntervalSet |
1803 |
* @param intervSet a set of internal residue serials |
1804 |
*/ |
1805 |
private void restrictToIntervalSet(IntervalSet intervSet) { |
1806 |
|
1807 |
// getting list of the residue serials to keep |
1808 |
TreeSet<Integer> resSersToKeep = intervSet.getIntegerSet(); |
1809 |
|
1810 |
// removing residues from resser2restype and resser2pdbresser |
1811 |
Iterator<Integer> itressers = resser2restype.keySet().iterator(); |
1812 |
while (itressers.hasNext()) { |
1813 |
int resSer = itressers.next(); |
1814 |
if (!resSersToKeep.contains(resSer)) { |
1815 |
itressers.remove(); |
1816 |
resser2pdbresser.remove(resSer); |
1817 |
if (resser2allrsa != null) { |
1818 |
resser2allrsa.remove(resSer); |
1819 |
resser2scrsa.remove(resSer); |
1820 |
} |
1821 |
if (resser2consurfhsspscore != null) { |
1822 |
resser2consurfhsspscore.remove(resSer); |
1823 |
resser2consurfhsspcolor.remove(resSer); |
1824 |
} |
1825 |
if (catalSiteSet != null) { |
1826 |
catalSiteSet.removeCatalSiteRes(resSer); |
1827 |
} |
1828 |
} |
1829 |
} |
1830 |
|
1831 |
// removing residues from pdbresser2resser |
1832 |
Iterator<String> pdbressers = pdbresser2resser.keySet().iterator(); |
1833 |
while (pdbressers.hasNext()) { |
1834 |
String pdbresser = pdbressers.next(); |
1835 |
int resSer = pdbresser2resser.get(pdbresser); |
1836 |
if (!resSersToKeep.contains(resSer)) { |
1837 |
pdbressers.remove(); |
1838 |
} |
1839 |
} |
1840 |
|
1841 |
// removing residues(atoms) from resser_atom2atomserial |
1842 |
for (int atomSer:atomser2resser.keySet()) { |
1843 |
String atom = atomser2atom.get(atomSer); |
1844 |
int resSer = atomser2resser.get(atomSer); |
1845 |
String resser_atom = resSer+"_"+atom; |
1846 |
if (!resSersToKeep.contains(resSer)) { |
1847 |
resser_atom2atomserial.remove(resser_atom); |
1848 |
} |
1849 |
} |
1850 |
|
1851 |
// removing atoms belonging to unwanted residues from atomser2resser, atomser2atom and atomser2coord |
1852 |
Iterator<Integer> itatomsers = atomser2resser.keySet().iterator(); |
1853 |
while (itatomsers.hasNext()) { |
1854 |
int atomSer = itatomsers.next(); |
1855 |
if (!resSersToKeep.contains(atomser2resser.get(atomSer))) { |
1856 |
itatomsers.remove(); |
1857 |
atomser2atom.remove(atomSer); |
1858 |
atomser2coord.remove(atomSer); |
1859 |
} |
1860 |
} |
1861 |
|
1862 |
// setting sequence to scop sequence and obsLength and fullLength respectively |
1863 |
Iterator<Interval> regionsToKeep = intervSet.iterator(); |
1864 |
String newSequence = ""; |
1865 |
while (regionsToKeep.hasNext()) { |
1866 |
Interval region = regionsToKeep.next(); |
1867 |
newSequence += sequence.substring((region.beg-1),region.end); |
1868 |
} |
1869 |
sequence = newSequence; |
1870 |
fullLength = sequence.length(); |
1871 |
obsLength = resser2restype.size(); |
1872 |
|
1873 |
} |
1874 |
} |