ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 250
Committed: Mon Aug 6 14:48:37 2007 UTC (17 years, 6 months ago) by stehr
File size: 23249 byte(s)
Log Message:
added application to count grid density, suppressed warning for secondary structure reassignment
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3     import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6     import java.util.HashMap;
7 duarte 216 import java.util.Locale;
8 stehr 250 import java.util.Map;
9 duarte 123 import java.util.TreeMap;
10     import java.util.ArrayList;
11    
12 duarte 226 import javax.vecmath.Point3d;
13     import javax.vecmath.Point3i;
14 duarte 235 import javax.vecmath.Vector3d;
15 duarte 226
16 duarte 235 import Jama.Matrix;
17     import Jama.SingularValueDecomposition;
18    
19 duarte 207 /**
20     * A single chain pdb protein structure
21     *
22     * @author Jose Duarte
23     * Class: Pdb
24     * Package: proteinstructure
25     */
26     public abstract class Pdb {
27 duarte 123
28 duarte 208 protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures)
29     public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
30 duarte 153
31 duarte 208 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
32     protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
33 duarte 226 protected HashMap<Integer,Point3d> atomser2coord; // atom serials to 3D coordinates
34 duarte 208 protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
35 duarte 237 protected HashMap<Integer,String> atomser2atom; // atom serials to atom names
36 duarte 208 protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
37     protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
38 duarte 190
39 duarte 222 protected HashMap<Integer,String> resser2secstruct; // residue serials to secondary structure
40     protected TreeMap<String,Interval> secstruct2resinterval;// secondary structure element to residue serial intervals
41 duarte 219
42 duarte 208 protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid
43 duarte 135
44 duarte 208 protected String sequence; // full sequence as it appears in SEQRES field
45     protected String pdbCode;
46     // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
47     protected String pdbChainCode;
48     // Our internal chain identifier:
49 duarte 135 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
50 duarte 208 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
51 duarte 207 protected String chainCode;
52 duarte 208 protected int model; // the model serial for NMR structures
53 duarte 123
54 duarte 208 protected String db; // the db from which we have taken the data (if applies)
55 duarte 153
56 duarte 208
57    
58 duarte 133
59 duarte 153 /**
60     * Dumps coordinate data into a file in pdb format (ATOM lines only)
61 duarte 206 * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
62 duarte 153 * @param outfile
63     * @throws IOException
64     */
65 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
66 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
67 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
68 duarte 206 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
69 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
70     int atomserial = resser_atom2atomserial.get(resser_atom);
71     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
72     String atom = resser_atom.split("_")[1];
73     String res_type = resser2restype.get(res_serial);
74 duarte 226 Point3d coords = atomser2coord.get(atomserial);
75     Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords.x, coords.y, coords.z};
76 duarte 130 lines.put(atomserial, fields);
77 duarte 123 }
78 duarte 130 for (int atomserial:lines.keySet()){
79 duarte 216 // Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales)
80     Out.printf(Locale.US,"ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
81 duarte 130 }
82 duarte 123 Out.println("END");
83     Out.close();
84     }
85    
86 duarte 237 /**
87     * Dump the full sequence of this Pdb object in fasta file format
88     * @param seqfile
89     * @throws IOException
90     */
91 duarte 123 public void dumpseq(String seqfile) throws IOException {
92     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
93 duarte 206 Out.println(">"+pdbCode+"_"+pdbChainCode);
94 duarte 123 Out.println(sequence);
95     Out.close();
96     }
97    
98     public int get_length(){
99     return resser2restype.size();
100     }
101    
102 duarte 237 /**
103     * Gets a TreeMap with atom serials as keys and their coordinates as values for the given contact type
104     * The contact type can't be a cross contact type, it doesn't make sense here
105     * @param ct
106     * @return
107     */
108 duarte 235 private TreeMap<Integer,Point3d> get_coords_for_ct(String ct) {
109 duarte 226 TreeMap<Integer,Point3d> coords = new TreeMap<Integer,Point3d>();
110 duarte 123 HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
111     for (int resser:resser2restype.keySet()){
112     String[] atoms = restype2atoms.get(resser2restype.get(resser));
113     for (String atom:atoms){
114     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
115     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
116 duarte 226 Point3d coord = atomser2coord.get(atomser);
117 duarte 123 coords.put(atomser, coord);
118     }
119     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
120     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
121 duarte 226 Point3d coord = atomser2coord.get(atomser);
122 duarte 123 coords.put(atomser, coord);
123     }
124     else {
125     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
126     }
127     }
128     }
129     return coords;
130     }
131 duarte 229
132     /**
133 duarte 237 * Gets a TreeMap with residue serials+"_"+atomname as keys and their coordinates as values for the given contact type
134     * The contact type can't be a cross contact type, it doesn't make sense here
135     * Used in rmsd method
136     * @param ct
137     * @return
138     */
139     private TreeMap<String,Point3d> get_coords_for_ct_4rmsd(String ct) {
140     TreeMap<String,Point3d> coords = new TreeMap<String,Point3d>();
141     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
142     for (int resser:resser2restype.keySet()){
143     String[] atoms = restype2atoms.get(resser2restype.get(resser));
144     for (String atom:atoms){
145     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
146     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
147     Point3d coord = atomser2coord.get(atomser);
148     coords.put(resser+"_"+atom, coord);
149     }
150     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
151     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
152     Point3d coord = atomser2coord.get(atomser);
153     coords.put(resser+"_"+atom, coord);
154     }
155     }
156     }
157     return coords;
158     }
159    
160     /**
161 duarte 229 * Returns the distance matrix as a HashMap with Contacts (residue serial pairs) as keys
162     * It doesn't make sense to call this method for multi atom contact
163     * types (for each residue serial pair there's more than 1 distance)
164     * Thus before calling this we should check AA.isValidSingleAtomCT(ct)
165     * @param ct the contact type
166     * @return
167     */
168 duarte 234 public HashMap<Edge, Double> calculate_dist_matrix(String ct){
169     HashMap<Edge,Double> distMatrixAtoms = new HashMap<Edge,Double>();
170 duarte 123 if (!ct.contains("/")){
171 duarte 226 TreeMap<Integer,Point3d> coords = get_coords_for_ct(ct);
172 duarte 123 for (int i_atomser:coords.keySet()){
173     for (int j_atomser:coords.keySet()){
174     if (j_atomser>i_atomser) {
175 duarte 234 Edge pair = new Edge(i_atomser,j_atomser);
176 duarte 229 distMatrixAtoms.put(pair, coords.get(i_atomser).distance(coords.get(j_atomser)));
177 duarte 123 }
178     }
179     }
180     } else {
181     String i_ct = ct.split("/")[0];
182     String j_ct = ct.split("/")[1];
183 duarte 226 TreeMap<Integer,Point3d> i_coords = get_coords_for_ct(i_ct);
184     TreeMap<Integer,Point3d> j_coords = get_coords_for_ct(j_ct);
185 duarte 123 for (int i_atomser:i_coords.keySet()){
186     for (int j_atomser:j_coords.keySet()){
187     if (j_atomser!=i_atomser){
188 duarte 234 Edge pair = new Edge(i_atomser,j_atomser);
189 duarte 229 distMatrixAtoms.put(pair, i_coords.get(i_atomser).distance(j_coords.get(j_atomser)));
190 duarte 123 }
191     }
192     }
193     }
194 duarte 229
195 duarte 234 HashMap<Edge,Double> distMatrixRes = new HashMap<Edge, Double>();
196     for (Edge cont: distMatrixAtoms.keySet()){
197 duarte 229 int i_resser = get_resser_from_atomser(cont.i);
198     int j_resser = get_resser_from_atomser(cont.j);
199 duarte 234 distMatrixRes.put(new Edge(i_resser,j_resser), distMatrixAtoms.get(cont));
200 duarte 229 }
201    
202     return distMatrixRes;
203 duarte 123 }
204    
205 duarte 129 /**
206 duarte 226 * Get the graph for given contact type and cutoff for this Pdb object.
207     * Returns a Graph object with the contacts
208     * We do geometric hashing for fast contact computation (without needing to calculate full distance matrix)
209     * @param ct
210     * @param cutoff
211     * @return
212     */
213 duarte 228 public Graph get_graph(String ct, double cutoff){
214     TreeMap<Integer,Point3d> i_coords = null;
215 stehr 250 TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types
216 duarte 228 boolean directed = false;
217     if (!ct.contains("/")){
218     i_coords = get_coords_for_ct(ct);
219     directed = false;
220     } else {
221     String i_ct = ct.split("/")[0];
222     String j_ct = ct.split("/")[1];
223     i_coords = get_coords_for_ct(i_ct);
224     j_coords = get_coords_for_ct(j_ct);
225     directed = true;
226     }
227     int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
228     int[] j_atomserials = null;
229 duarte 226
230     int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A)
231    
232     int boxSize = (int) Math.floor(cutoff*SCALE);
233    
234     HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
235     int i=0;
236 duarte 228 for (int i_atomser:i_coords.keySet()){
237 duarte 226 //coordinates for atom serial atomser, we will use i as its identifier below
238 duarte 228 Point3d coord = i_coords.get(i_atomser);
239 duarte 226 int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
240     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
241     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
242     Point3i floor = new Point3i(floorX,floorY,floorZ);
243     if (boxes.containsKey(floor)){
244     // we put the coords for atom i in its corresponding box (identified by floor)
245 duarte 228 boxes.get(floor).put_i_Point(i, coord);
246     if (!directed){
247     boxes.get(floor).put_j_Point(i, coord);
248     }
249 duarte 226 } else {
250     Box box = new Box(floor);
251 duarte 228 box.put_i_Point(i, coord);
252     if (!directed){
253     box.put_j_Point(i, coord);
254     }
255 duarte 226 boxes.put(floor,box);
256     }
257 duarte 228 i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
258 duarte 226 i++;
259     }
260 duarte 228 int j=0;
261     if (directed) {
262     j_atomserials = new int[j_coords.size()];
263     for (int j_atomser:j_coords.keySet()){
264     //coordinates for atom serial atomser, we will use j as its identifier below
265     Point3d coord = j_coords.get(j_atomser);
266     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
267     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
268     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
269     Point3i floor = new Point3i(floorX,floorY,floorZ);
270     if (boxes.containsKey(floor)){
271     // we put the coords for atom j in its corresponding box (identified by floor)
272     boxes.get(floor).put_j_Point(j, coord);
273     } else {
274     Box box = new Box(floor);
275     box.put_j_Point(j, coord);
276     boxes.put(floor,box);
277     }
278     j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
279     j++;
280     }
281     } else {
282     j_atomserials = i_atomserials;
283     }
284    
285 duarte 226
286 duarte 228 double[][]distMatrix = new double[i_atomserials.length][j_atomserials.length];
287 duarte 226
288     for (Point3i floor:boxes.keySet()){ // for each box
289     // distances of points within this box
290 duarte 228 boxes.get(floor).getDistancesWithinBox(distMatrix,directed);
291 duarte 226
292 duarte 228 //TODO should iterate only through half of the neighbours here
293 duarte 226 // distances of points from this box to all neighbouring boxes: 26 iterations (26 neighbouring boxes)
294     for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){
295     for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){
296     for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){
297     if (!((x==floor.x)&&(y==floor.y)&&(z==floor.z))) { // skip this box
298     Point3i neighbor = new Point3i(x,y,z);
299     if (boxes.containsKey(neighbor)){
300 duarte 228 boxes.get(floor).getDistancesToNeighborBox(boxes.get(neighbor),distMatrix,directed);
301 duarte 226 }
302     }
303     }
304     }
305     }
306     }
307    
308 duarte 228 // getting the contacts (in residue serials) from the atom serials (partial) distance matrix
309 duarte 234 EdgeSet contacts = new EdgeSet();
310 duarte 226 for (i=0;i<distMatrix.length;i++){
311 duarte 228 for (j=0;j<distMatrix[i].length;j++){
312     // the condition distMatrix[i][j]!=0.0 takes care of skipping several things:
313     // - diagonal of the matrix in case of undirected
314     // - lower half of matrix in case of undirected
315 duarte 235 // - cells for which we didn't calculate a distance because the 2 points were not in same or neighbouring boxes (i.e. too far apart)
316 duarte 228 if (distMatrix[i][j]!=0.0 && distMatrix[i][j]<=cutoff){
317     int i_resser = atomser2resser.get(i_atomserials[i]);
318     int j_resser = atomser2resser.get(j_atomserials[j]);
319 duarte 234 Edge resser_pair = new Edge(i_resser,j_resser);
320 duarte 228 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
321 duarte 235 if (i_resser!=j_resser){ // duplicates are automatically taken care by the EdgeSet class which is a TreeSet and doesn't allow duplicates
322 duarte 226 contacts.add(resser_pair);
323     }
324     }
325 duarte 228
326 duarte 226 }
327     }
328 duarte 228
329     // creating and returning the graph object
330 duarte 226 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
331     for (int resser:resser2restype.keySet()){
332     nodes.put(resser,resser2restype.get(resser));
333     }
334     Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
335    
336     return graph;
337     }
338    
339 stehr 250 public void calcGridDensity(String ct, double cutoff, Map<Integer, Integer> densityCount) {
340     TreeMap<Integer,Point3d> i_coords = null;
341     TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types
342     boolean directed = false;
343     if (!ct.contains("/")){
344     i_coords = get_coords_for_ct(ct);
345     directed = false;
346     } else {
347     String i_ct = ct.split("/")[0];
348     String j_ct = ct.split("/")[1];
349     i_coords = get_coords_for_ct(i_ct);
350     j_coords = get_coords_for_ct(j_ct);
351     directed = true;
352     }
353     int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
354     int[] j_atomserials = null;
355    
356     int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A)
357    
358     int boxSize = (int) Math.floor(cutoff*SCALE);
359    
360     HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
361     int i=0;
362     for (int i_atomser:i_coords.keySet()){
363     //coordinates for atom serial atomser, we will use i as its identifier below
364     Point3d coord = i_coords.get(i_atomser);
365     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
366     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
367     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
368     Point3i floor = new Point3i(floorX,floorY,floorZ);
369     if (boxes.containsKey(floor)){
370     // we put the coords for atom i in its corresponding box (identified by floor)
371     boxes.get(floor).put_i_Point(i, coord);
372     if (!directed){
373     boxes.get(floor).put_j_Point(i, coord);
374     }
375     } else {
376     Box box = new Box(floor);
377     box.put_i_Point(i, coord);
378     if (!directed){
379     box.put_j_Point(i, coord);
380     }
381     boxes.put(floor,box);
382     }
383     i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
384     i++;
385     }
386     int j=0;
387     if (directed) {
388     j_atomserials = new int[j_coords.size()];
389     for (int j_atomser:j_coords.keySet()){
390     //coordinates for atom serial atomser, we will use j as its identifier below
391     Point3d coord = j_coords.get(j_atomser);
392     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
393     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
394     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
395     Point3i floor = new Point3i(floorX,floorY,floorZ);
396     if (boxes.containsKey(floor)){
397     // we put the coords for atom j in its corresponding box (identified by floor)
398     boxes.get(floor).put_j_Point(j, coord);
399     } else {
400     Box box = new Box(floor);
401     box.put_j_Point(j, coord);
402     boxes.put(floor,box);
403     }
404     j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
405     j++;
406     }
407     } else {
408     j_atomserials = i_atomserials;
409     }
410    
411     for(Point3i floor:boxes.keySet()) {
412     int size = boxes.get(floor).size();
413     if(densityCount.containsKey(size)) {
414     int old = densityCount.get(size);
415     densityCount.put(size, ++old);
416     } else {
417     densityCount.put(size, 1);
418     }
419     }
420     }
421    
422    
423 duarte 190 public int get_resser_from_pdbresser (String pdbresser){
424     return pdbresser2resser.get(pdbresser);
425     }
426    
427     public String get_pdbresser_from_resser (int resser){
428     return resser2pdbresser.get(resser);
429     }
430    
431 duarte 200 public int get_resser_from_atomser(int atomser){
432     return atomser2resser.get(atomser);
433     }
434 duarte 206
435     public String getChainCode(){
436     return this.chainCode;
437     }
438    
439     public String getPdbChainCode(){
440     return this.pdbChainCode;
441     }
442 duarte 219
443     public String getSecStructure(int resser){
444     return this.resser2secstruct.get(resser);
445     }
446 duarte 222
447     public TreeMap<String,Interval> getAllSecStructElements(){
448     return secstruct2resinterval;
449     }
450 duarte 235
451     /**
452     * Calculates rmsd (on atoms given by ct) of this Pdb object to otherPdb object
453 duarte 237 * Both objects must represent structures with same sequence (save unobserved residues or missing atoms)
454     *
455 duarte 235 * @param otherPdb
456 duarte 237 * @param ct the contact type (crossed contact types don't make sense here)
457 duarte 235 * @return
458 duarte 237 * @throws ConformationsNotSameSizeError
459 duarte 235 */
460 duarte 237 public double rmsd(Pdb otherPdb, String ct) throws ConformationsNotSameSizeError {
461     TreeMap<String,Point3d> thiscoords = this.get_coords_for_ct_4rmsd(ct);
462     TreeMap<String,Point3d> othercoords = otherPdb.get_coords_for_ct_4rmsd(ct);
463 duarte 235
464 duarte 237 // there might be unobserved residues or some missing atoms for a residue
465     // here we get the ones that are in common
466     ArrayList<String> common = new ArrayList<String>();
467     for (String resser_atom:thiscoords.keySet()){
468     if (othercoords.containsKey(resser_atom)){
469     common.add(resser_atom);
470     }
471     }
472    
473 duarte 235 // converting the TreeMaps to Vector3d arrays (input format for calculate_rmsd)
474 duarte 237 Vector3d[] conformation1 = new Vector3d[common.size()];
475     Vector3d[] conformation2 = new Vector3d[common.size()];
476     int i = 0;
477     for (String resser_atom:common){
478     conformation1[i] = new Vector3d(thiscoords.get(resser_atom).x,thiscoords.get(resser_atom).y,thiscoords.get(resser_atom).z);
479     conformation2[i] = new Vector3d(othercoords.get(resser_atom).x,othercoords.get(resser_atom).y,othercoords.get(resser_atom).z);
480 duarte 235 i++;
481     }
482 duarte 237
483     // this as well as calculating the rmsd, changes conformation1 and conformation2 to be optimally superposed
484 duarte 235 double rmsd = calculate_rmsd(conformation1, conformation2);
485    
486     // // printing out individual distances (conformation1 and conformation2 are now optimally superposed)
487     // for (i=0;i<conformation1.length;i++){
488     // Point3d point1 = new Point3d(conformation1[i].x,conformation1[i].y,conformation1[i].z);
489     // Point3d point2 = new Point3d(conformation2[i].x,conformation2[i].y,conformation2[i].z);
490     // System.out.println(point1.distance(point2));
491     // }
492    
493     return rmsd;
494    
495     }
496    
497     /**
498     * Calculates the RMSD between two conformations.
499     * conformation1: Vector3d array (matrix of dimensions [N,3])
500     * conformation2: Vector3d array (matrix of dimensions [N,3])
501     *
502     * Both conformation1 and conformation2 are modified to be optimally superposed
503     *
504     * Implementation taken (python) from http://bosco.infogami.com/Root_Mean_Square_Deviation,
505     * then ported to java using Jama matrix package
506     * (page has moved to: http://boscoh.com/protein/rmsd-root-mean-square-deviation)
507     * @param conformation1
508     * @param conformation2
509     * @return
510 duarte 237 * @throws ConformationsNotSameSizeError
511 duarte 235 */
512 duarte 237 private double calculate_rmsd(Vector3d[] conformation1, Vector3d[] conformation2) throws ConformationsNotSameSizeError{
513 duarte 235 if (conformation1.length!=conformation2.length) {
514 duarte 237 //System.err.println("Conformations not the same size");
515     throw new ConformationsNotSameSizeError(
516     "Given conformations have different size: conformation1: "+conformation1.length+", conformation2: "+conformation2.length);
517 duarte 235 }
518     int n_vec = conformation1.length;
519    
520     // 1st we bring both conformations to the same centre by subtracting their respectives centres
521     Vector3d center1 = new Vector3d();
522     Vector3d center2 = new Vector3d();
523     for (int i=0;i<n_vec;i++){ // summing all vectors in each conformation
524     center1.add(conformation1[i]);
525     center2.add(conformation2[i]);
526     }
527     // dividing by n_vec (average)
528     center1.scale((double)1/n_vec);
529     center2.scale((double)1/n_vec);
530     // translating our conformations to the same coordinate system by subtracting centers
531     for (Vector3d vec:conformation1){
532     vec.sub(center1);
533     }
534     for (Vector3d vec:conformation2){
535     vec.sub(center2);
536     }
537    
538     //E0: initial sum of squared lengths of both conformations
539     double sum1 = 0.0;
540     double sum2 = 0.0;
541     for (int i=0;i<n_vec;i++){
542     sum1 += conformation1[i].lengthSquared();
543     sum2 += conformation2[i].lengthSquared();
544     }
545     double E0 = sum1 + sum2;
546    
547     // singular value decomposition
548     Matrix vecs1 = vector3dAr2matrix(conformation1);
549     Matrix vecs2 = vector3dAr2matrix(conformation2);
550    
551     Matrix correlation_matrix = vecs2.transpose().times(vecs1); //gives a 3x3 matrix
552    
553     SingularValueDecomposition svd = correlation_matrix.svd();
554     Matrix U = svd.getU();
555     Matrix V_trans = svd.getV().transpose();
556     double[] singularValues = svd.getSingularValues();
557    
558     boolean is_reflection = false;
559     if (U.det()*V_trans.det()<0.0){
560     is_reflection = true;
561     }
562     if (is_reflection){
563     // reflect along smallest principal axis:
564     // we change sign of last coordinate (smallest singular value)
565 duarte 236 singularValues[singularValues.length-1]=(-1)*singularValues[singularValues.length-1];
566 duarte 235 }
567    
568     // getting sum of singular values
569     double sumSV = 0.0;
570     for (int i=0;i<singularValues.length;i++){
571     sumSV += singularValues[i];
572     }
573    
574     // rmsd square: Kabsch formula
575     double rmsd_sq = (E0 - 2.0*sumSV)/((double) n_vec);
576     rmsd_sq = Math.max(rmsd_sq, 0.0);
577    
578     // finally we modify conformation2 to be aligned to conformation1
579     if (is_reflection) { // first we check if we are in is_reflection case: we need to change sign to last row of U
580     for (int j=0;j<U.getColumnDimension();j++){
581     // we change sign to last row of U
582     int lastRow = U.getRowDimension()-1;
583     U.set(lastRow, j, (-1)*U.get(lastRow,j));
584     }
585     }
586     Matrix optimal_rotation = U.times(V_trans);
587     Matrix conf2 = vecs2.times(optimal_rotation);
588     for (int i=0;i<n_vec;i++){
589     conformation2[i].x = conf2.get(i,0);
590     conformation2[i].y = conf2.get(i,1);
591     conformation2[i].z = conf2.get(i,2);
592     }
593    
594     return Math.sqrt(rmsd_sq);
595     }
596    
597     /** Gets a Jama.Matrix object from a Vector3d[] (deep copies) */
598     private Matrix vector3dAr2matrix(Vector3d[] vecArray) {
599     double[][] array = new double[vecArray.length][3];
600     for (int i=0;i<vecArray.length;i++){
601     vecArray[i].get(array[i]);
602     }
603     return new Matrix(array);
604     }
605    
606 duarte 123 }