ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 282
Committed: Tue Aug 14 16:15:59 2007 UTC (17 years, 6 months ago) by duarte
File size: 28054 byte(s)
Log Message:
Made get_graph a bit less memory hungry, by using a float[][] instead of double[][]
Not printing warnings about missing atoms anymore
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3 stehr 257 import java.io.*;
4 duarte 123 import java.util.HashMap;
5 duarte 216 import java.util.Locale;
6 stehr 250 import java.util.Map;
7 duarte 123 import java.util.TreeMap;
8     import java.util.ArrayList;
9    
10 duarte 226 import javax.vecmath.Point3d;
11     import javax.vecmath.Point3i;
12 duarte 235 import javax.vecmath.Vector3d;
13 duarte 226
14 duarte 235 import Jama.Matrix;
15     import Jama.SingularValueDecomposition;
16    
17 duarte 207 /**
18     * A single chain pdb protein structure
19     *
20     * @author Jose Duarte
21     * Class: Pdb
22     * Package: proteinstructure
23     */
24     public abstract class Pdb {
25 duarte 123
26 duarte 208 protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures)
27     public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
28 duarte 153
29 duarte 208 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
30     protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
31 duarte 226 protected HashMap<Integer,Point3d> atomser2coord; // atom serials to 3D coordinates
32 duarte 208 protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
33 duarte 237 protected HashMap<Integer,String> atomser2atom; // atom serials to atom names
34 duarte 208 protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
35     protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
36 duarte 190
37 stehr 274 protected SecondaryStructure secondaryStructure; // the secondary structure annotation for this pdb object (should never be null)
38 duarte 219
39 duarte 208 protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid
40 duarte 135
41 duarte 208 protected String sequence; // full sequence as it appears in SEQRES field
42     protected String pdbCode;
43     // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
44     protected String pdbChainCode;
45     // Our internal chain identifier:
46 duarte 135 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
47 duarte 208 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
48 duarte 207 protected String chainCode;
49 duarte 208 protected int model; // the model serial for NMR structures
50 duarte 123
51 duarte 278 // in case we read from pdb file, 2 possibilities:
52     // 1) SEQRES field is present: fullLength coincides with sequence length
53     // 2) SEQRES not present: fullLength is maximum observed residue (so if residue numbering is correct that only misses possible non-observed residues at end of chain)
54     protected int fullLength; // length of full sequence as it appears in SEQRES field
55     protected int obsLength; // length without unobserved, non standard aas
56    
57 duarte 208 protected String db; // the db from which we have taken the data (if applies)
58 duarte 153
59 stehr 257 /** Run DSSP externally and (re)assign the secondary structure annotation from the output */
60 stehr 259 public void runDssp(String dsspExecutable, String dsspParameters) throws IOException {
61 stehr 257 String startLine = " # RESIDUE AA STRUCTURE BP1 BP2 ACC";
62     String line;
63 stehr 280 int lineCount = 0;
64     char ssType;
65     TreeMap<Integer, Character> ssTypes;
66 duarte 260 int resNum;
67 stehr 259 String resNumStr;
68 stehr 257 File test = new File(dsspExecutable);
69     if(!test.canRead()) throw new IOException("DSSP Executable is not readable");
70 stehr 259 Process myDssp = Runtime.getRuntime().exec(dsspExecutable + " " + dsspParameters);
71 stehr 257 PrintWriter dsspInput = new PrintWriter(myDssp.getOutputStream());
72     BufferedReader dsspOutput = new BufferedReader(new InputStreamReader(myDssp.getInputStream()));
73     BufferedReader dsspError = new BufferedReader(new InputStreamReader(myDssp.getErrorStream()));
74     writeAtomLines(dsspInput); // pipe atom lines to dssp
75     dsspInput.close();
76 stehr 280 ssTypes = new TreeMap<Integer,Character>();
77 stehr 257 while((line = dsspOutput.readLine()) != null) {
78 stehr 259 lineCount++;
79 stehr 257 if(line.startsWith(startLine)) {
80 stehr 259 //System.out.println("Dssp Output: ");
81 stehr 257 break;
82     }
83     }
84     while((line = dsspOutput.readLine()) != null) {
85 stehr 259 lineCount++;
86     resNumStr = line.substring(5,10).trim();
87     ssType = line.charAt(16);
88     if(!resNumStr.equals("")) { // this should only happen if dssp inserts a line indicating a chain break
89     try {
90     resNum = Integer.valueOf(resNumStr);
91 stehr 280 ssTypes.put(resNum, ssType);
92 stehr 259 } catch (NumberFormatException e) {
93     System.err.println("Error while parsing DSSP output. Expected residue number, found '" + resNumStr + "' in line " + lineCount);
94     }
95     }
96 stehr 257 }
97 stehr 259 //for(char c:ssTypes) {System.out.print(c);}; System.out.println(".");
98 stehr 257 dsspOutput.close();
99     dsspError.close();
100 stehr 259
101 stehr 280 if(ssTypes.size() == 0) {
102 stehr 259 throw new IOException("No DSSP output found.");
103     }
104    
105 stehr 280 if(ssTypes.size() != get_length()) { // compare with number of observed residues
106     System.err.println("Error: DSSP output size (" + ssTypes.size() + ") does not match number of observed residues in structure (" + get_length() + ").");
107 stehr 259 }
108    
109     // assign secondary structure
110 stehr 274 this.secondaryStructure = new SecondaryStructure(); // forget the old annotation
111 stehr 280 char lastType = SecStrucElement.getFourStateTypeFromDsspType(ssTypes.get(ssTypes.firstKey()));
112     int lastResSer = ssTypes.firstKey();
113 stehr 259 char thisType;
114 stehr 274 int start = 0;
115 stehr 259 int elementCount = 0;
116 stehr 274 SecStrucElement ssElem;
117 stehr 259 String ssId;
118 stehr 280 for(int resSer:ssTypes.keySet()) {
119     thisType = SecStrucElement.getFourStateTypeFromDsspType(ssTypes.get(resSer));
120     if(thisType != lastType || resSer > lastResSer+1) {
121 stehr 259 // finish previous element, start new one
122 stehr 274 if(lastType != SecStrucElement.OTHER) {
123 stehr 259 elementCount++;
124     ssId = new Character(lastType).toString() + new Integer(elementCount).toString();
125 stehr 280 ssElem = new SecStrucElement(lastType, start,lastResSer,ssId);
126 stehr 274 secondaryStructure.add(ssElem);
127 stehr 259 }
128 stehr 280 start = resSer;
129 stehr 259 lastType = thisType;
130     }
131 stehr 280 lastResSer = resSer;
132 stehr 259 }
133     // finish last element
134 stehr 280 if(lastType != SecStrucElement.OTHER) {
135 stehr 259 elementCount++;
136     ssId = new Character(lastType).toString() + new Integer(elementCount).toString();
137 stehr 280 ssElem = new SecStrucElement(lastType, start, ssTypes.lastKey(), ssId);
138 stehr 274 secondaryStructure.add(ssElem);
139 stehr 259 }
140 stehr 274 secondaryStructure.setComment("DSSP");
141 stehr 257 }
142 stehr 259
143    
144 stehr 257 /** Writes atom lines for this structure to the given output stream */
145     private void writeAtomLines(PrintWriter Out) {
146 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
147 duarte 206 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
148 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
149     int atomserial = resser_atom2atomserial.get(resser_atom);
150     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
151     String atom = resser_atom.split("_")[1];
152     String res_type = resser2restype.get(res_serial);
153 duarte 226 Point3d coords = atomser2coord.get(atomserial);
154 stehr 257 String atomType = atom.substring(0,1);
155     Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords.x, coords.y, coords.z, atomType};
156 duarte 130 lines.put(atomserial, fields);
157 duarte 123 }
158 duarte 130 for (int atomserial:lines.keySet()){
159 duarte 216 // Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales)
160 stehr 257 Out.printf(Locale.US,"ATOM %5d %-3s %3s %1s%4d %8.3f%8.3f%8.3f 1.00 0.00 %s\n",lines.get(atomserial));
161 duarte 130 }
162 duarte 123 Out.println("END");
163 stehr 257 Out.close();
164 duarte 123 }
165 stehr 257
166     /**
167     * Dumps coordinate data into a file in pdb format (ATOM lines only)
168     * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
169     * @param outfile
170     * @throws IOException
171     */
172     public void dump2pdbfile(String outfile) throws IOException {
173     PrintWriter Out = new PrintWriter(new FileOutputStream(outfile));
174     writeAtomLines(Out);
175     }
176 duarte 123
177 duarte 237 /**
178     * Dump the full sequence of this Pdb object in fasta file format
179     * @param seqfile
180     * @throws IOException
181     */
182 duarte 123 public void dumpseq(String seqfile) throws IOException {
183     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
184 duarte 206 Out.println(">"+pdbCode+"_"+pdbChainCode);
185 duarte 123 Out.println(sequence);
186     Out.close();
187     }
188    
189 duarte 256 /**
190 stehr 259 * Returns the number of observed standard residues.
191     * TODO: Refactor method name
192 duarte 256 */
193 duarte 123 public int get_length(){
194 duarte 278 return obsLength;
195 duarte 123 }
196    
197 stehr 259 /** Returns the number of residues in the sequence of this protein. */
198     public int getFullLength() {
199 duarte 278 return fullLength;
200 stehr 259 }
201    
202 duarte 237 /**
203 duarte 256 * Returns number of (non-Hydrogen) atoms in the protein
204     * @return
205     */
206     public int getNumAtoms() {
207     return atomser2atom.size();
208     }
209    
210     /**
211 duarte 237 * Gets a TreeMap with atom serials as keys and their coordinates as values for the given contact type
212     * The contact type can't be a cross contact type, it doesn't make sense here
213     * @param ct
214     * @return
215     */
216 duarte 235 private TreeMap<Integer,Point3d> get_coords_for_ct(String ct) {
217 duarte 226 TreeMap<Integer,Point3d> coords = new TreeMap<Integer,Point3d>();
218 duarte 123 HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
219     for (int resser:resser2restype.keySet()){
220     String[] atoms = restype2atoms.get(resser2restype.get(resser));
221     for (String atom:atoms){
222     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
223     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
224 duarte 226 Point3d coord = atomser2coord.get(atomser);
225 duarte 123 coords.put(atomser, coord);
226     }
227     else {
228 duarte 282 //System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
229 duarte 123 }
230     }
231 duarte 275 // in ct="ALL" we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue)
232     if (ct.equals("ALL") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
233     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
234     Point3d coord = atomser2coord.get(atomser);
235     coords.put(atomser, coord);
236     }
237 duarte 123 }
238     return coords;
239     }
240 duarte 229
241     /**
242 duarte 237 * Gets a TreeMap with residue serials+"_"+atomname as keys and their coordinates as values for the given contact type
243     * The contact type can't be a cross contact type, it doesn't make sense here
244     * Used in rmsd method
245     * @param ct
246     * @return
247     */
248     private TreeMap<String,Point3d> get_coords_for_ct_4rmsd(String ct) {
249     TreeMap<String,Point3d> coords = new TreeMap<String,Point3d>();
250     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
251     for (int resser:resser2restype.keySet()){
252     String[] atoms = restype2atoms.get(resser2restype.get(resser));
253     for (String atom:atoms){
254     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
255     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
256     Point3d coord = atomser2coord.get(atomser);
257     coords.put(resser+"_"+atom, coord);
258     }
259     }
260 duarte 275 // in ct="ALL" we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue)
261     if (ct.equals("ALL") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
262     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
263     Point3d coord = atomser2coord.get(atomser);
264     coords.put(resser+"_"+"OXT", coord);
265     }
266 duarte 237 }
267     return coords;
268     }
269    
270     /**
271 duarte 229 * Returns the distance matrix as a HashMap with Contacts (residue serial pairs) as keys
272     * It doesn't make sense to call this method for multi atom contact
273     * types (for each residue serial pair there's more than 1 distance)
274     * Thus before calling this we should check AA.isValidSingleAtomCT(ct)
275     * @param ct the contact type
276     * @return
277     */
278 duarte 234 public HashMap<Edge, Double> calculate_dist_matrix(String ct){
279     HashMap<Edge,Double> distMatrixAtoms = new HashMap<Edge,Double>();
280 duarte 123 if (!ct.contains("/")){
281 duarte 226 TreeMap<Integer,Point3d> coords = get_coords_for_ct(ct);
282 duarte 123 for (int i_atomser:coords.keySet()){
283     for (int j_atomser:coords.keySet()){
284     if (j_atomser>i_atomser) {
285 duarte 234 Edge pair = new Edge(i_atomser,j_atomser);
286 duarte 229 distMatrixAtoms.put(pair, coords.get(i_atomser).distance(coords.get(j_atomser)));
287 duarte 123 }
288     }
289     }
290     } else {
291     String i_ct = ct.split("/")[0];
292     String j_ct = ct.split("/")[1];
293 duarte 226 TreeMap<Integer,Point3d> i_coords = get_coords_for_ct(i_ct);
294     TreeMap<Integer,Point3d> j_coords = get_coords_for_ct(j_ct);
295 duarte 123 for (int i_atomser:i_coords.keySet()){
296     for (int j_atomser:j_coords.keySet()){
297     if (j_atomser!=i_atomser){
298 duarte 234 Edge pair = new Edge(i_atomser,j_atomser);
299 duarte 229 distMatrixAtoms.put(pair, i_coords.get(i_atomser).distance(j_coords.get(j_atomser)));
300 duarte 123 }
301     }
302     }
303     }
304 duarte 229
305 duarte 234 HashMap<Edge,Double> distMatrixRes = new HashMap<Edge, Double>();
306     for (Edge cont: distMatrixAtoms.keySet()){
307 duarte 229 int i_resser = get_resser_from_atomser(cont.i);
308     int j_resser = get_resser_from_atomser(cont.j);
309 duarte 234 distMatrixRes.put(new Edge(i_resser,j_resser), distMatrixAtoms.get(cont));
310 duarte 229 }
311    
312     return distMatrixRes;
313 duarte 123 }
314    
315 duarte 129 /**
316 duarte 226 * Get the graph for given contact type and cutoff for this Pdb object.
317     * Returns a Graph object with the contacts
318     * We do geometric hashing for fast contact computation (without needing to calculate full distance matrix)
319     * @param ct
320     * @param cutoff
321     * @return
322     */
323 duarte 228 public Graph get_graph(String ct, double cutoff){
324     TreeMap<Integer,Point3d> i_coords = null;
325 stehr 250 TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types
326 duarte 228 boolean directed = false;
327     if (!ct.contains("/")){
328     i_coords = get_coords_for_ct(ct);
329     directed = false;
330     } else {
331     String i_ct = ct.split("/")[0];
332     String j_ct = ct.split("/")[1];
333     i_coords = get_coords_for_ct(i_ct);
334     j_coords = get_coords_for_ct(j_ct);
335     directed = true;
336     }
337     int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
338     int[] j_atomserials = null;
339 duarte 226
340     int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A)
341    
342     int boxSize = (int) Math.floor(cutoff*SCALE);
343    
344     HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
345     int i=0;
346 duarte 228 for (int i_atomser:i_coords.keySet()){
347 duarte 226 //coordinates for atom serial atomser, we will use i as its identifier below
348 duarte 228 Point3d coord = i_coords.get(i_atomser);
349 duarte 226 int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
350     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
351     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
352     Point3i floor = new Point3i(floorX,floorY,floorZ);
353     if (boxes.containsKey(floor)){
354     // we put the coords for atom i in its corresponding box (identified by floor)
355 duarte 228 boxes.get(floor).put_i_Point(i, coord);
356     if (!directed){
357     boxes.get(floor).put_j_Point(i, coord);
358     }
359 duarte 226 } else {
360     Box box = new Box(floor);
361 duarte 228 box.put_i_Point(i, coord);
362     if (!directed){
363     box.put_j_Point(i, coord);
364     }
365 duarte 226 boxes.put(floor,box);
366     }
367 duarte 228 i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
368 duarte 226 i++;
369     }
370 duarte 228 int j=0;
371     if (directed) {
372     j_atomserials = new int[j_coords.size()];
373     for (int j_atomser:j_coords.keySet()){
374     //coordinates for atom serial atomser, we will use j as its identifier below
375     Point3d coord = j_coords.get(j_atomser);
376     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
377     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
378     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
379     Point3i floor = new Point3i(floorX,floorY,floorZ);
380     if (boxes.containsKey(floor)){
381     // we put the coords for atom j in its corresponding box (identified by floor)
382     boxes.get(floor).put_j_Point(j, coord);
383     } else {
384     Box box = new Box(floor);
385     box.put_j_Point(j, coord);
386     boxes.put(floor,box);
387     }
388     j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
389     j++;
390     }
391     } else {
392     j_atomserials = i_atomserials;
393     }
394    
395 duarte 226
396 duarte 282 float[][]distMatrix = new float[i_atomserials.length][j_atomserials.length];
397 duarte 226
398     for (Point3i floor:boxes.keySet()){ // for each box
399     // distances of points within this box
400 duarte 228 boxes.get(floor).getDistancesWithinBox(distMatrix,directed);
401 duarte 226
402 duarte 228 //TODO should iterate only through half of the neighbours here
403 duarte 226 // distances of points from this box to all neighbouring boxes: 26 iterations (26 neighbouring boxes)
404     for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){
405     for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){
406     for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){
407     if (!((x==floor.x)&&(y==floor.y)&&(z==floor.z))) { // skip this box
408     Point3i neighbor = new Point3i(x,y,z);
409     if (boxes.containsKey(neighbor)){
410 duarte 228 boxes.get(floor).getDistancesToNeighborBox(boxes.get(neighbor),distMatrix,directed);
411 duarte 226 }
412     }
413     }
414     }
415     }
416     }
417    
418 duarte 228 // getting the contacts (in residue serials) from the atom serials (partial) distance matrix
419 duarte 234 EdgeSet contacts = new EdgeSet();
420 duarte 226 for (i=0;i<distMatrix.length;i++){
421 duarte 228 for (j=0;j<distMatrix[i].length;j++){
422     // the condition distMatrix[i][j]!=0.0 takes care of skipping several things:
423     // - diagonal of the matrix in case of undirected
424     // - lower half of matrix in case of undirected
425 duarte 235 // - cells for which we didn't calculate a distance because the 2 points were not in same or neighbouring boxes (i.e. too far apart)
426 duarte 282 if (distMatrix[i][j]!=0.0f && distMatrix[i][j]<=cutoff){
427 duarte 228 int i_resser = atomser2resser.get(i_atomserials[i]);
428     int j_resser = atomser2resser.get(j_atomserials[j]);
429 duarte 234 Edge resser_pair = new Edge(i_resser,j_resser);
430 duarte 228 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
431 duarte 235 if (i_resser!=j_resser){ // duplicates are automatically taken care by the EdgeSet class which is a TreeSet and doesn't allow duplicates
432 duarte 226 contacts.add(resser_pair);
433     }
434     }
435 duarte 228
436 duarte 226 }
437     }
438 duarte 228
439     // creating and returning the graph object
440 duarte 226 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
441     for (int resser:resser2restype.keySet()){
442     nodes.put(resser,resser2restype.get(resser));
443     }
444 stehr 274 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model, secondaryStructure);
445 duarte 226 return graph;
446     }
447    
448 stehr 250 public void calcGridDensity(String ct, double cutoff, Map<Integer, Integer> densityCount) {
449     TreeMap<Integer,Point3d> i_coords = null;
450     TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types
451     boolean directed = false;
452     if (!ct.contains("/")){
453     i_coords = get_coords_for_ct(ct);
454     directed = false;
455     } else {
456     String i_ct = ct.split("/")[0];
457     String j_ct = ct.split("/")[1];
458     i_coords = get_coords_for_ct(i_ct);
459     j_coords = get_coords_for_ct(j_ct);
460     directed = true;
461     }
462     int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
463     int[] j_atomserials = null;
464    
465     int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A)
466    
467     int boxSize = (int) Math.floor(cutoff*SCALE);
468    
469     HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
470     int i=0;
471     for (int i_atomser:i_coords.keySet()){
472     //coordinates for atom serial atomser, we will use i as its identifier below
473     Point3d coord = i_coords.get(i_atomser);
474     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
475     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
476     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
477     Point3i floor = new Point3i(floorX,floorY,floorZ);
478     if (boxes.containsKey(floor)){
479     // we put the coords for atom i in its corresponding box (identified by floor)
480     boxes.get(floor).put_i_Point(i, coord);
481     if (!directed){
482     boxes.get(floor).put_j_Point(i, coord);
483     }
484     } else {
485     Box box = new Box(floor);
486     box.put_i_Point(i, coord);
487     if (!directed){
488     box.put_j_Point(i, coord);
489     }
490     boxes.put(floor,box);
491     }
492     i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
493     i++;
494     }
495     int j=0;
496     if (directed) {
497     j_atomserials = new int[j_coords.size()];
498     for (int j_atomser:j_coords.keySet()){
499     //coordinates for atom serial atomser, we will use j as its identifier below
500     Point3d coord = j_coords.get(j_atomser);
501     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
502     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
503     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
504     Point3i floor = new Point3i(floorX,floorY,floorZ);
505     if (boxes.containsKey(floor)){
506     // we put the coords for atom j in its corresponding box (identified by floor)
507     boxes.get(floor).put_j_Point(j, coord);
508     } else {
509     Box box = new Box(floor);
510     box.put_j_Point(j, coord);
511     boxes.put(floor,box);
512     }
513     j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
514     j++;
515     }
516     } else {
517     j_atomserials = i_atomserials;
518     }
519    
520     for(Point3i floor:boxes.keySet()) {
521     int size = boxes.get(floor).size();
522     if(densityCount.containsKey(size)) {
523     int old = densityCount.get(size);
524     densityCount.put(size, ++old);
525     } else {
526     densityCount.put(size, 1);
527     }
528     }
529     }
530    
531 duarte 190 public int get_resser_from_pdbresser (String pdbresser){
532     return pdbresser2resser.get(pdbresser);
533     }
534    
535     public String get_pdbresser_from_resser (int resser){
536     return resser2pdbresser.get(resser);
537     }
538    
539 duarte 200 public int get_resser_from_atomser(int atomser){
540     return atomser2resser.get(atomser);
541     }
542 duarte 206
543 duarte 275 public String getPdbCode() {
544     return this.pdbCode;
545     }
546    
547 duarte 206 public String getChainCode(){
548     return this.chainCode;
549     }
550    
551     public String getPdbChainCode(){
552     return this.pdbChainCode;
553     }
554 stehr 274
555     // secondary structure related methods
556 duarte 219
557 stehr 274 /**
558     * Returns true if secondary structure information is available, false otherwise.
559 stehr 259 */
560 stehr 257 public boolean hasSecondaryStructure() {
561 stehr 274 return !this.secondaryStructure.isEmpty();
562 stehr 257 }
563    
564 stehr 274 /**
565     * Returns the secondary structure annotation object of this graph.
566     */
567     public SecondaryStructure getSecondaryStructure() {
568     return this.secondaryStructure;
569 stehr 259 }
570    
571 stehr 274 // end of secondary structure related methods
572    
573 duarte 235 /**
574     * Calculates rmsd (on atoms given by ct) of this Pdb object to otherPdb object
575 duarte 237 * Both objects must represent structures with same sequence (save unobserved residues or missing atoms)
576     *
577 duarte 235 * @param otherPdb
578 duarte 237 * @param ct the contact type (crossed contact types don't make sense here)
579 duarte 235 * @return
580 duarte 237 * @throws ConformationsNotSameSizeError
581 duarte 235 */
582 duarte 237 public double rmsd(Pdb otherPdb, String ct) throws ConformationsNotSameSizeError {
583     TreeMap<String,Point3d> thiscoords = this.get_coords_for_ct_4rmsd(ct);
584     TreeMap<String,Point3d> othercoords = otherPdb.get_coords_for_ct_4rmsd(ct);
585 duarte 235
586 duarte 237 // there might be unobserved residues or some missing atoms for a residue
587     // here we get the ones that are in common
588     ArrayList<String> common = new ArrayList<String>();
589     for (String resser_atom:thiscoords.keySet()){
590     if (othercoords.containsKey(resser_atom)){
591     common.add(resser_atom);
592     }
593     }
594    
595 duarte 235 // converting the TreeMaps to Vector3d arrays (input format for calculate_rmsd)
596 duarte 237 Vector3d[] conformation1 = new Vector3d[common.size()];
597     Vector3d[] conformation2 = new Vector3d[common.size()];
598     int i = 0;
599     for (String resser_atom:common){
600     conformation1[i] = new Vector3d(thiscoords.get(resser_atom).x,thiscoords.get(resser_atom).y,thiscoords.get(resser_atom).z);
601     conformation2[i] = new Vector3d(othercoords.get(resser_atom).x,othercoords.get(resser_atom).y,othercoords.get(resser_atom).z);
602 duarte 235 i++;
603     }
604 duarte 237
605     // this as well as calculating the rmsd, changes conformation1 and conformation2 to be optimally superposed
606 duarte 235 double rmsd = calculate_rmsd(conformation1, conformation2);
607    
608     // // printing out individual distances (conformation1 and conformation2 are now optimally superposed)
609     // for (i=0;i<conformation1.length;i++){
610     // Point3d point1 = new Point3d(conformation1[i].x,conformation1[i].y,conformation1[i].z);
611     // Point3d point2 = new Point3d(conformation2[i].x,conformation2[i].y,conformation2[i].z);
612     // System.out.println(point1.distance(point2));
613     // }
614    
615     return rmsd;
616    
617     }
618    
619     /**
620     * Calculates the RMSD between two conformations.
621     * conformation1: Vector3d array (matrix of dimensions [N,3])
622     * conformation2: Vector3d array (matrix of dimensions [N,3])
623     *
624     * Both conformation1 and conformation2 are modified to be optimally superposed
625     *
626     * Implementation taken (python) from http://bosco.infogami.com/Root_Mean_Square_Deviation,
627     * then ported to java using Jama matrix package
628     * (page has moved to: http://boscoh.com/protein/rmsd-root-mean-square-deviation)
629     * @param conformation1
630     * @param conformation2
631     * @return
632 duarte 237 * @throws ConformationsNotSameSizeError
633 duarte 235 */
634 duarte 237 private double calculate_rmsd(Vector3d[] conformation1, Vector3d[] conformation2) throws ConformationsNotSameSizeError{
635 duarte 235 if (conformation1.length!=conformation2.length) {
636 duarte 237 //System.err.println("Conformations not the same size");
637     throw new ConformationsNotSameSizeError(
638     "Given conformations have different size: conformation1: "+conformation1.length+", conformation2: "+conformation2.length);
639 duarte 235 }
640     int n_vec = conformation1.length;
641    
642     // 1st we bring both conformations to the same centre by subtracting their respectives centres
643     Vector3d center1 = new Vector3d();
644     Vector3d center2 = new Vector3d();
645     for (int i=0;i<n_vec;i++){ // summing all vectors in each conformation
646     center1.add(conformation1[i]);
647     center2.add(conformation2[i]);
648     }
649     // dividing by n_vec (average)
650     center1.scale((double)1/n_vec);
651     center2.scale((double)1/n_vec);
652     // translating our conformations to the same coordinate system by subtracting centers
653     for (Vector3d vec:conformation1){
654     vec.sub(center1);
655     }
656     for (Vector3d vec:conformation2){
657     vec.sub(center2);
658     }
659    
660     //E0: initial sum of squared lengths of both conformations
661     double sum1 = 0.0;
662     double sum2 = 0.0;
663     for (int i=0;i<n_vec;i++){
664     sum1 += conformation1[i].lengthSquared();
665     sum2 += conformation2[i].lengthSquared();
666     }
667     double E0 = sum1 + sum2;
668    
669     // singular value decomposition
670     Matrix vecs1 = vector3dAr2matrix(conformation1);
671     Matrix vecs2 = vector3dAr2matrix(conformation2);
672    
673     Matrix correlation_matrix = vecs2.transpose().times(vecs1); //gives a 3x3 matrix
674    
675     SingularValueDecomposition svd = correlation_matrix.svd();
676     Matrix U = svd.getU();
677     Matrix V_trans = svd.getV().transpose();
678     double[] singularValues = svd.getSingularValues();
679    
680     boolean is_reflection = false;
681     if (U.det()*V_trans.det()<0.0){
682     is_reflection = true;
683     }
684     if (is_reflection){
685     // reflect along smallest principal axis:
686     // we change sign of last coordinate (smallest singular value)
687 duarte 236 singularValues[singularValues.length-1]=(-1)*singularValues[singularValues.length-1];
688 duarte 235 }
689    
690     // getting sum of singular values
691     double sumSV = 0.0;
692     for (int i=0;i<singularValues.length;i++){
693     sumSV += singularValues[i];
694     }
695    
696     // rmsd square: Kabsch formula
697     double rmsd_sq = (E0 - 2.0*sumSV)/((double) n_vec);
698     rmsd_sq = Math.max(rmsd_sq, 0.0);
699    
700     // finally we modify conformation2 to be aligned to conformation1
701     if (is_reflection) { // first we check if we are in is_reflection case: we need to change sign to last row of U
702     for (int j=0;j<U.getColumnDimension();j++){
703     // we change sign to last row of U
704     int lastRow = U.getRowDimension()-1;
705     U.set(lastRow, j, (-1)*U.get(lastRow,j));
706     }
707     }
708     Matrix optimal_rotation = U.times(V_trans);
709     Matrix conf2 = vecs2.times(optimal_rotation);
710     for (int i=0;i<n_vec;i++){
711     conformation2[i].x = conf2.get(i,0);
712     conformation2[i].y = conf2.get(i,1);
713     conformation2[i].z = conf2.get(i,2);
714     }
715    
716     return Math.sqrt(rmsd_sq);
717     }
718    
719     /** Gets a Jama.Matrix object from a Vector3d[] (deep copies) */
720     private Matrix vector3dAr2matrix(Vector3d[] vecArray) {
721     double[][] array = new double[vecArray.length][3];
722     for (int i=0;i<vecArray.length;i++){
723     vecArray[i].get(array[i]);
724     }
725     return new Matrix(array);
726     }
727    
728 duarte 123 }