ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 302
Committed: Mon Aug 27 14:28:23 2007 UTC (17 years, 6 months ago) by stehr
File size: 29133 byte(s)
Log Message:
changes to calcGridDensity, now also counts number of neighbouring non-empty grid cells
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3 stehr 257 import java.io.*;
4 duarte 123 import java.util.HashMap;
5 duarte 216 import java.util.Locale;
6 stehr 250 import java.util.Map;
7 duarte 123 import java.util.TreeMap;
8     import java.util.ArrayList;
9    
10 duarte 226 import javax.vecmath.Point3d;
11     import javax.vecmath.Point3i;
12 duarte 235 import javax.vecmath.Vector3d;
13 duarte 226
14 duarte 235 import Jama.Matrix;
15     import Jama.SingularValueDecomposition;
16    
17 duarte 207 /**
18     * A single chain pdb protein structure
19     *
20     * @author Jose Duarte
21     * Class: Pdb
22     * Package: proteinstructure
23     */
24     public abstract class Pdb {
25 duarte 123
26 duarte 208 protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures)
27     public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
28 duarte 153
29 duarte 208 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
30     protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
31 duarte 226 protected HashMap<Integer,Point3d> atomser2coord; // atom serials to 3D coordinates
32 duarte 208 protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
33 duarte 237 protected HashMap<Integer,String> atomser2atom; // atom serials to atom names
34 duarte 208 protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
35     protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
36 duarte 190
37 stehr 274 protected SecondaryStructure secondaryStructure; // the secondary structure annotation for this pdb object (should never be null)
38 duarte 219
39 duarte 208 protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid
40 duarte 135
41 duarte 208 protected String sequence; // full sequence as it appears in SEQRES field
42     protected String pdbCode;
43     // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
44     protected String pdbChainCode;
45     // Our internal chain identifier:
46 duarte 135 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
47 duarte 208 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
48 duarte 207 protected String chainCode;
49 duarte 208 protected int model; // the model serial for NMR structures
50 duarte 123
51 duarte 278 // in case we read from pdb file, 2 possibilities:
52     // 1) SEQRES field is present: fullLength coincides with sequence length
53     // 2) SEQRES not present: fullLength is maximum observed residue (so if residue numbering is correct that only misses possible non-observed residues at end of chain)
54     protected int fullLength; // length of full sequence as it appears in SEQRES field
55     protected int obsLength; // length without unobserved, non standard aas
56    
57 duarte 208 protected String db; // the db from which we have taken the data (if applies)
58 duarte 153
59 stehr 257 /** Run DSSP externally and (re)assign the secondary structure annotation from the output */
60 stehr 259 public void runDssp(String dsspExecutable, String dsspParameters) throws IOException {
61 stehr 257 String startLine = " # RESIDUE AA STRUCTURE BP1 BP2 ACC";
62     String line;
63 stehr 280 int lineCount = 0;
64     char ssType;
65     TreeMap<Integer, Character> ssTypes;
66 duarte 260 int resNum;
67 stehr 259 String resNumStr;
68 stehr 257 File test = new File(dsspExecutable);
69     if(!test.canRead()) throw new IOException("DSSP Executable is not readable");
70 stehr 259 Process myDssp = Runtime.getRuntime().exec(dsspExecutable + " " + dsspParameters);
71 stehr 257 PrintWriter dsspInput = new PrintWriter(myDssp.getOutputStream());
72     BufferedReader dsspOutput = new BufferedReader(new InputStreamReader(myDssp.getInputStream()));
73     BufferedReader dsspError = new BufferedReader(new InputStreamReader(myDssp.getErrorStream()));
74     writeAtomLines(dsspInput); // pipe atom lines to dssp
75     dsspInput.close();
76 stehr 280 ssTypes = new TreeMap<Integer,Character>();
77 stehr 257 while((line = dsspOutput.readLine()) != null) {
78 stehr 259 lineCount++;
79 stehr 257 if(line.startsWith(startLine)) {
80 stehr 259 //System.out.println("Dssp Output: ");
81 stehr 257 break;
82     }
83     }
84     while((line = dsspOutput.readLine()) != null) {
85 stehr 259 lineCount++;
86     resNumStr = line.substring(5,10).trim();
87     ssType = line.charAt(16);
88     if(!resNumStr.equals("")) { // this should only happen if dssp inserts a line indicating a chain break
89     try {
90     resNum = Integer.valueOf(resNumStr);
91 stehr 280 ssTypes.put(resNum, ssType);
92 stehr 259 } catch (NumberFormatException e) {
93     System.err.println("Error while parsing DSSP output. Expected residue number, found '" + resNumStr + "' in line " + lineCount);
94     }
95     }
96 stehr 257 }
97 stehr 259 //for(char c:ssTypes) {System.out.print(c);}; System.out.println(".");
98 stehr 257 dsspOutput.close();
99     dsspError.close();
100 stehr 259
101 stehr 280 if(ssTypes.size() == 0) {
102 stehr 259 throw new IOException("No DSSP output found.");
103     }
104    
105 stehr 280 if(ssTypes.size() != get_length()) { // compare with number of observed residues
106     System.err.println("Error: DSSP output size (" + ssTypes.size() + ") does not match number of observed residues in structure (" + get_length() + ").");
107 stehr 259 }
108    
109     // assign secondary structure
110 stehr 274 this.secondaryStructure = new SecondaryStructure(); // forget the old annotation
111 stehr 280 char lastType = SecStrucElement.getFourStateTypeFromDsspType(ssTypes.get(ssTypes.firstKey()));
112     int lastResSer = ssTypes.firstKey();
113 stehr 259 char thisType;
114 stehr 274 int start = 0;
115 stehr 259 int elementCount = 0;
116 stehr 274 SecStrucElement ssElem;
117 stehr 259 String ssId;
118 stehr 280 for(int resSer:ssTypes.keySet()) {
119     thisType = SecStrucElement.getFourStateTypeFromDsspType(ssTypes.get(resSer));
120     if(thisType != lastType || resSer > lastResSer+1) {
121 stehr 259 // finish previous element, start new one
122 stehr 274 if(lastType != SecStrucElement.OTHER) {
123 stehr 259 elementCount++;
124     ssId = new Character(lastType).toString() + new Integer(elementCount).toString();
125 stehr 280 ssElem = new SecStrucElement(lastType, start,lastResSer,ssId);
126 stehr 274 secondaryStructure.add(ssElem);
127 stehr 259 }
128 stehr 280 start = resSer;
129 stehr 259 lastType = thisType;
130     }
131 stehr 280 lastResSer = resSer;
132 stehr 259 }
133     // finish last element
134 stehr 280 if(lastType != SecStrucElement.OTHER) {
135 stehr 259 elementCount++;
136     ssId = new Character(lastType).toString() + new Integer(elementCount).toString();
137 stehr 280 ssElem = new SecStrucElement(lastType, start, ssTypes.lastKey(), ssId);
138 stehr 274 secondaryStructure.add(ssElem);
139 stehr 259 }
140 stehr 274 secondaryStructure.setComment("DSSP");
141 stehr 257 }
142 stehr 259
143    
144 stehr 257 /** Writes atom lines for this structure to the given output stream */
145     private void writeAtomLines(PrintWriter Out) {
146 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
147 duarte 206 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
148 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
149     int atomserial = resser_atom2atomserial.get(resser_atom);
150     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
151     String atom = resser_atom.split("_")[1];
152     String res_type = resser2restype.get(res_serial);
153 duarte 226 Point3d coords = atomser2coord.get(atomserial);
154 stehr 257 String atomType = atom.substring(0,1);
155     Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords.x, coords.y, coords.z, atomType};
156 duarte 130 lines.put(atomserial, fields);
157 duarte 123 }
158 duarte 130 for (int atomserial:lines.keySet()){
159 duarte 216 // Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales)
160 stehr 257 Out.printf(Locale.US,"ATOM %5d %-3s %3s %1s%4d %8.3f%8.3f%8.3f 1.00 0.00 %s\n",lines.get(atomserial));
161 duarte 130 }
162 duarte 123 Out.println("END");
163 stehr 257 Out.close();
164 duarte 123 }
165 stehr 257
166     /**
167     * Dumps coordinate data into a file in pdb format (ATOM lines only)
168     * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
169     * @param outfile
170     * @throws IOException
171     */
172     public void dump2pdbfile(String outfile) throws IOException {
173     PrintWriter Out = new PrintWriter(new FileOutputStream(outfile));
174     writeAtomLines(Out);
175     }
176 duarte 123
177 duarte 237 /**
178     * Dump the full sequence of this Pdb object in fasta file format
179     * @param seqfile
180     * @throws IOException
181     */
182 duarte 123 public void dumpseq(String seqfile) throws IOException {
183     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
184 duarte 206 Out.println(">"+pdbCode+"_"+pdbChainCode);
185 duarte 123 Out.println(sequence);
186     Out.close();
187     }
188    
189 duarte 256 /**
190 stehr 259 * Returns the number of observed standard residues.
191     * TODO: Refactor method name
192 duarte 256 */
193 duarte 123 public int get_length(){
194 duarte 278 return obsLength;
195 duarte 123 }
196    
197 stehr 259 /** Returns the number of residues in the sequence of this protein. */
198     public int getFullLength() {
199 duarte 278 return fullLength;
200 stehr 259 }
201    
202 duarte 237 /**
203 duarte 256 * Returns number of (non-Hydrogen) atoms in the protein
204     * @return
205     */
206     public int getNumAtoms() {
207     return atomser2atom.size();
208     }
209    
210     /**
211 duarte 237 * Gets a TreeMap with atom serials as keys and their coordinates as values for the given contact type
212     * The contact type can't be a cross contact type, it doesn't make sense here
213     * @param ct
214     * @return
215     */
216 duarte 235 private TreeMap<Integer,Point3d> get_coords_for_ct(String ct) {
217 duarte 226 TreeMap<Integer,Point3d> coords = new TreeMap<Integer,Point3d>();
218 duarte 123 HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
219     for (int resser:resser2restype.keySet()){
220     String[] atoms = restype2atoms.get(resser2restype.get(resser));
221     for (String atom:atoms){
222     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
223     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
224 duarte 226 Point3d coord = atomser2coord.get(atomser);
225 duarte 123 coords.put(atomser, coord);
226     }
227     else {
228 duarte 282 //System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
229 duarte 123 }
230     }
231 duarte 275 // in ct="ALL" we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue)
232     if (ct.equals("ALL") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
233     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
234     Point3d coord = atomser2coord.get(atomser);
235     coords.put(atomser, coord);
236     }
237 duarte 123 }
238     return coords;
239     }
240 duarte 229
241     /**
242 duarte 237 * Gets a TreeMap with residue serials+"_"+atomname as keys and their coordinates as values for the given contact type
243     * The contact type can't be a cross contact type, it doesn't make sense here
244     * Used in rmsd method
245     * @param ct
246     * @return
247     */
248     private TreeMap<String,Point3d> get_coords_for_ct_4rmsd(String ct) {
249     TreeMap<String,Point3d> coords = new TreeMap<String,Point3d>();
250     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
251     for (int resser:resser2restype.keySet()){
252     String[] atoms = restype2atoms.get(resser2restype.get(resser));
253     for (String atom:atoms){
254     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
255     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
256     Point3d coord = atomser2coord.get(atomser);
257     coords.put(resser+"_"+atom, coord);
258     }
259     }
260 duarte 275 // in ct="ALL" we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue)
261     if (ct.equals("ALL") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
262     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
263     Point3d coord = atomser2coord.get(atomser);
264     coords.put(resser+"_"+"OXT", coord);
265     }
266 duarte 237 }
267     return coords;
268     }
269    
270     /**
271 duarte 229 * Returns the distance matrix as a HashMap with Contacts (residue serial pairs) as keys
272     * It doesn't make sense to call this method for multi atom contact
273     * types (for each residue serial pair there's more than 1 distance)
274     * Thus before calling this we should check AA.isValidSingleAtomCT(ct)
275     * @param ct the contact type
276     * @return
277     */
278 duarte 234 public HashMap<Edge, Double> calculate_dist_matrix(String ct){
279     HashMap<Edge,Double> distMatrixAtoms = new HashMap<Edge,Double>();
280 duarte 123 if (!ct.contains("/")){
281 duarte 226 TreeMap<Integer,Point3d> coords = get_coords_for_ct(ct);
282 duarte 123 for (int i_atomser:coords.keySet()){
283     for (int j_atomser:coords.keySet()){
284     if (j_atomser>i_atomser) {
285 duarte 234 Edge pair = new Edge(i_atomser,j_atomser);
286 duarte 229 distMatrixAtoms.put(pair, coords.get(i_atomser).distance(coords.get(j_atomser)));
287 duarte 123 }
288     }
289     }
290     } else {
291     String i_ct = ct.split("/")[0];
292     String j_ct = ct.split("/")[1];
293 duarte 226 TreeMap<Integer,Point3d> i_coords = get_coords_for_ct(i_ct);
294     TreeMap<Integer,Point3d> j_coords = get_coords_for_ct(j_ct);
295 duarte 123 for (int i_atomser:i_coords.keySet()){
296     for (int j_atomser:j_coords.keySet()){
297     if (j_atomser!=i_atomser){
298 duarte 234 Edge pair = new Edge(i_atomser,j_atomser);
299 duarte 229 distMatrixAtoms.put(pair, i_coords.get(i_atomser).distance(j_coords.get(j_atomser)));
300 duarte 123 }
301     }
302     }
303     }
304 duarte 229
305 duarte 234 HashMap<Edge,Double> distMatrixRes = new HashMap<Edge, Double>();
306     for (Edge cont: distMatrixAtoms.keySet()){
307 duarte 229 int i_resser = get_resser_from_atomser(cont.i);
308     int j_resser = get_resser_from_atomser(cont.j);
309 duarte 234 distMatrixRes.put(new Edge(i_resser,j_resser), distMatrixAtoms.get(cont));
310 duarte 229 }
311    
312     return distMatrixRes;
313 duarte 123 }
314    
315 duarte 129 /**
316 duarte 226 * Get the graph for given contact type and cutoff for this Pdb object.
317     * Returns a Graph object with the contacts
318     * We do geometric hashing for fast contact computation (without needing to calculate full distance matrix)
319     * @param ct
320     * @param cutoff
321     * @return
322     */
323 duarte 228 public Graph get_graph(String ct, double cutoff){
324     TreeMap<Integer,Point3d> i_coords = null;
325 stehr 250 TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types
326 duarte 228 boolean directed = false;
327     if (!ct.contains("/")){
328     i_coords = get_coords_for_ct(ct);
329     directed = false;
330     } else {
331     String i_ct = ct.split("/")[0];
332     String j_ct = ct.split("/")[1];
333     i_coords = get_coords_for_ct(i_ct);
334     j_coords = get_coords_for_ct(j_ct);
335     directed = true;
336     }
337     int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
338     int[] j_atomserials = null;
339 duarte 226
340     int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A)
341    
342     int boxSize = (int) Math.floor(cutoff*SCALE);
343    
344     HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
345     int i=0;
346 duarte 228 for (int i_atomser:i_coords.keySet()){
347 duarte 226 //coordinates for atom serial atomser, we will use i as its identifier below
348 duarte 228 Point3d coord = i_coords.get(i_atomser);
349 duarte 226 int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
350     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
351     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
352     Point3i floor = new Point3i(floorX,floorY,floorZ);
353     if (boxes.containsKey(floor)){
354     // we put the coords for atom i in its corresponding box (identified by floor)
355 duarte 228 boxes.get(floor).put_i_Point(i, coord);
356     if (!directed){
357     boxes.get(floor).put_j_Point(i, coord);
358     }
359 duarte 226 } else {
360     Box box = new Box(floor);
361 duarte 228 box.put_i_Point(i, coord);
362     if (!directed){
363     box.put_j_Point(i, coord);
364     }
365 duarte 226 boxes.put(floor,box);
366     }
367 duarte 228 i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
368 duarte 226 i++;
369     }
370 duarte 228 int j=0;
371     if (directed) {
372     j_atomserials = new int[j_coords.size()];
373     for (int j_atomser:j_coords.keySet()){
374     //coordinates for atom serial atomser, we will use j as its identifier below
375     Point3d coord = j_coords.get(j_atomser);
376     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
377     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
378     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
379     Point3i floor = new Point3i(floorX,floorY,floorZ);
380     if (boxes.containsKey(floor)){
381     // we put the coords for atom j in its corresponding box (identified by floor)
382     boxes.get(floor).put_j_Point(j, coord);
383     } else {
384     Box box = new Box(floor);
385     box.put_j_Point(j, coord);
386     boxes.put(floor,box);
387     }
388     j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
389     j++;
390     }
391     } else {
392     j_atomserials = i_atomserials;
393     }
394    
395 duarte 226
396 duarte 282 float[][]distMatrix = new float[i_atomserials.length][j_atomserials.length];
397 duarte 226
398     for (Point3i floor:boxes.keySet()){ // for each box
399     // distances of points within this box
400 duarte 228 boxes.get(floor).getDistancesWithinBox(distMatrix,directed);
401 duarte 226
402 duarte 228 //TODO should iterate only through half of the neighbours here
403 duarte 226 // distances of points from this box to all neighbouring boxes: 26 iterations (26 neighbouring boxes)
404     for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){
405     for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){
406     for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){
407     if (!((x==floor.x)&&(y==floor.y)&&(z==floor.z))) { // skip this box
408     Point3i neighbor = new Point3i(x,y,z);
409     if (boxes.containsKey(neighbor)){
410 duarte 228 boxes.get(floor).getDistancesToNeighborBox(boxes.get(neighbor),distMatrix,directed);
411 duarte 226 }
412     }
413     }
414     }
415     }
416     }
417    
418 duarte 228 // getting the contacts (in residue serials) from the atom serials (partial) distance matrix
419 duarte 234 EdgeSet contacts = new EdgeSet();
420 duarte 284 TreeMap<Edge,Double> weights = new TreeMap<Edge,Double>();
421 duarte 226 for (i=0;i<distMatrix.length;i++){
422 duarte 228 for (j=0;j<distMatrix[i].length;j++){
423     // the condition distMatrix[i][j]!=0.0 takes care of skipping several things:
424     // - diagonal of the matrix in case of undirected
425     // - lower half of matrix in case of undirected
426 duarte 235 // - cells for which we didn't calculate a distance because the 2 points were not in same or neighbouring boxes (i.e. too far apart)
427 duarte 282 if (distMatrix[i][j]!=0.0f && distMatrix[i][j]<=cutoff){
428 duarte 228 int i_resser = atomser2resser.get(i_atomserials[i]);
429     int j_resser = atomser2resser.get(j_atomserials[j]);
430 duarte 234 Edge resser_pair = new Edge(i_resser,j_resser);
431 duarte 228 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
432 duarte 235 if (i_resser!=j_resser){ // duplicates are automatically taken care by the EdgeSet class which is a TreeSet and doesn't allow duplicates
433 duarte 226 contacts.add(resser_pair);
434 duarte 284 // as weights we count the number of atom-edges per residue-edge
435     if (weights.containsKey(resser_pair)) {
436     weights.put(resser_pair, weights.get(resser_pair)+1.0);
437     } else {
438     weights.put(resser_pair, 1.0);
439     }
440 duarte 226 }
441     }
442 duarte 228
443 duarte 226 }
444     }
445 duarte 228
446     // creating and returning the graph object
447 duarte 226 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
448     for (int resser:resser2restype.keySet()){
449     nodes.put(resser,resser2restype.get(resser));
450     }
451 duarte 284 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model, secondaryStructure.copy(), weights);
452 duarte 226 return graph;
453     }
454    
455 stehr 250 public void calcGridDensity(String ct, double cutoff, Map<Integer, Integer> densityCount) {
456     TreeMap<Integer,Point3d> i_coords = null;
457 stehr 302 TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymmetric edge types
458 stehr 250 boolean directed = false;
459     if (!ct.contains("/")){
460 stehr 302 i_coords = get_coords_for_ct(ct); // mapping from atom serials to coordinates
461 stehr 250 directed = false;
462     } else {
463     String i_ct = ct.split("/")[0];
464     String j_ct = ct.split("/")[1];
465     i_coords = get_coords_for_ct(i_ct);
466     j_coords = get_coords_for_ct(j_ct);
467     directed = true;
468     }
469     int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
470     int[] j_atomserials = null;
471    
472 stehr 302 int SCALE=100; // i.e. we use units of hundredths of Angstroms (thus cutoffs can be specified with a maximum precission of 0.01A)
473 stehr 250
474     int boxSize = (int) Math.floor(cutoff*SCALE);
475    
476     HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
477     int i=0;
478     for (int i_atomser:i_coords.keySet()){
479     //coordinates for atom serial atomser, we will use i as its identifier below
480     Point3d coord = i_coords.get(i_atomser);
481     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
482     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
483     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
484     Point3i floor = new Point3i(floorX,floorY,floorZ);
485     if (boxes.containsKey(floor)){
486     // we put the coords for atom i in its corresponding box (identified by floor)
487     boxes.get(floor).put_i_Point(i, coord);
488     if (!directed){
489     boxes.get(floor).put_j_Point(i, coord);
490     }
491     } else {
492     Box box = new Box(floor);
493     box.put_i_Point(i, coord);
494     if (!directed){
495     box.put_j_Point(i, coord);
496     }
497     boxes.put(floor,box);
498     }
499     i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
500     i++;
501     }
502     int j=0;
503     if (directed) {
504     j_atomserials = new int[j_coords.size()];
505     for (int j_atomser:j_coords.keySet()){
506     //coordinates for atom serial atomser, we will use j as its identifier below
507     Point3d coord = j_coords.get(j_atomser);
508     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
509     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
510     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
511     Point3i floor = new Point3i(floorX,floorY,floorZ);
512     if (boxes.containsKey(floor)){
513     // we put the coords for atom j in its corresponding box (identified by floor)
514     boxes.get(floor).put_j_Point(j, coord);
515     } else {
516     Box box = new Box(floor);
517     box.put_j_Point(j, coord);
518     boxes.put(floor,box);
519     }
520     j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
521     j++;
522     }
523     } else {
524     j_atomserials = i_atomserials;
525     }
526    
527 stehr 302 // count density
528 stehr 250 for(Point3i floor:boxes.keySet()) {
529 stehr 302 //int size = boxes.get(floor).size();
530     int size = getNumGridNbs(boxes, floor, boxSize); // count number of neighbouring grid cells with points in them
531 stehr 250 if(densityCount.containsKey(size)) {
532     int old = densityCount.get(size);
533     densityCount.put(size, ++old);
534     } else {
535     densityCount.put(size, 1);
536     }
537     }
538 stehr 302
539    
540 stehr 250 }
541    
542 stehr 302 /** Returns the number of neighbours of this grid cell */
543     private int getNumGridNbs(HashMap<Point3i,Box> boxes, Point3i floor, int boxSize) {
544     Point3i neighbor;
545     int nbs = 0;
546     for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){
547     for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){
548     for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){
549     neighbor = new Point3i(x,y,z);
550     if (boxes.containsKey(neighbor)) nbs++;
551     }
552     }
553     }
554     // compensate for counting myself as a neighbour
555     if(boxes.containsKey(floor)) nbs--;
556     return nbs;
557     }
558    
559 duarte 190 public int get_resser_from_pdbresser (String pdbresser){
560     return pdbresser2resser.get(pdbresser);
561     }
562    
563     public String get_pdbresser_from_resser (int resser){
564     return resser2pdbresser.get(resser);
565     }
566    
567 duarte 200 public int get_resser_from_atomser(int atomser){
568     return atomser2resser.get(atomser);
569     }
570 duarte 206
571 duarte 275 public String getPdbCode() {
572     return this.pdbCode;
573     }
574    
575 duarte 206 public String getChainCode(){
576     return this.chainCode;
577     }
578    
579     public String getPdbChainCode(){
580     return this.pdbChainCode;
581     }
582 stehr 274
583     // secondary structure related methods
584 duarte 219
585 stehr 274 /**
586     * Returns true if secondary structure information is available, false otherwise.
587 stehr 259 */
588 stehr 257 public boolean hasSecondaryStructure() {
589 stehr 274 return !this.secondaryStructure.isEmpty();
590 stehr 257 }
591    
592 stehr 274 /**
593     * Returns the secondary structure annotation object of this graph.
594     */
595     public SecondaryStructure getSecondaryStructure() {
596     return this.secondaryStructure;
597 stehr 259 }
598    
599 stehr 274 // end of secondary structure related methods
600    
601 duarte 235 /**
602     * Calculates rmsd (on atoms given by ct) of this Pdb object to otherPdb object
603 duarte 237 * Both objects must represent structures with same sequence (save unobserved residues or missing atoms)
604     *
605 duarte 235 * @param otherPdb
606 duarte 237 * @param ct the contact type (crossed contact types don't make sense here)
607 duarte 235 * @return
608 duarte 237 * @throws ConformationsNotSameSizeError
609 duarte 235 */
610 duarte 237 public double rmsd(Pdb otherPdb, String ct) throws ConformationsNotSameSizeError {
611     TreeMap<String,Point3d> thiscoords = this.get_coords_for_ct_4rmsd(ct);
612     TreeMap<String,Point3d> othercoords = otherPdb.get_coords_for_ct_4rmsd(ct);
613 duarte 235
614 duarte 237 // there might be unobserved residues or some missing atoms for a residue
615     // here we get the ones that are in common
616     ArrayList<String> common = new ArrayList<String>();
617     for (String resser_atom:thiscoords.keySet()){
618     if (othercoords.containsKey(resser_atom)){
619     common.add(resser_atom);
620     }
621     }
622    
623 duarte 235 // converting the TreeMaps to Vector3d arrays (input format for calculate_rmsd)
624 duarte 237 Vector3d[] conformation1 = new Vector3d[common.size()];
625     Vector3d[] conformation2 = new Vector3d[common.size()];
626     int i = 0;
627     for (String resser_atom:common){
628     conformation1[i] = new Vector3d(thiscoords.get(resser_atom).x,thiscoords.get(resser_atom).y,thiscoords.get(resser_atom).z);
629     conformation2[i] = new Vector3d(othercoords.get(resser_atom).x,othercoords.get(resser_atom).y,othercoords.get(resser_atom).z);
630 duarte 235 i++;
631     }
632 duarte 237
633     // this as well as calculating the rmsd, changes conformation1 and conformation2 to be optimally superposed
634 duarte 235 double rmsd = calculate_rmsd(conformation1, conformation2);
635    
636     // // printing out individual distances (conformation1 and conformation2 are now optimally superposed)
637     // for (i=0;i<conformation1.length;i++){
638     // Point3d point1 = new Point3d(conformation1[i].x,conformation1[i].y,conformation1[i].z);
639     // Point3d point2 = new Point3d(conformation2[i].x,conformation2[i].y,conformation2[i].z);
640     // System.out.println(point1.distance(point2));
641     // }
642    
643     return rmsd;
644    
645     }
646    
647     /**
648     * Calculates the RMSD between two conformations.
649     * conformation1: Vector3d array (matrix of dimensions [N,3])
650     * conformation2: Vector3d array (matrix of dimensions [N,3])
651     *
652     * Both conformation1 and conformation2 are modified to be optimally superposed
653     *
654     * Implementation taken (python) from http://bosco.infogami.com/Root_Mean_Square_Deviation,
655     * then ported to java using Jama matrix package
656     * (page has moved to: http://boscoh.com/protein/rmsd-root-mean-square-deviation)
657     * @param conformation1
658     * @param conformation2
659     * @return
660 duarte 237 * @throws ConformationsNotSameSizeError
661 duarte 235 */
662 duarte 237 private double calculate_rmsd(Vector3d[] conformation1, Vector3d[] conformation2) throws ConformationsNotSameSizeError{
663 duarte 235 if (conformation1.length!=conformation2.length) {
664 duarte 237 //System.err.println("Conformations not the same size");
665     throw new ConformationsNotSameSizeError(
666     "Given conformations have different size: conformation1: "+conformation1.length+", conformation2: "+conformation2.length);
667 duarte 235 }
668     int n_vec = conformation1.length;
669    
670     // 1st we bring both conformations to the same centre by subtracting their respectives centres
671     Vector3d center1 = new Vector3d();
672     Vector3d center2 = new Vector3d();
673     for (int i=0;i<n_vec;i++){ // summing all vectors in each conformation
674     center1.add(conformation1[i]);
675     center2.add(conformation2[i]);
676     }
677     // dividing by n_vec (average)
678     center1.scale((double)1/n_vec);
679     center2.scale((double)1/n_vec);
680     // translating our conformations to the same coordinate system by subtracting centers
681     for (Vector3d vec:conformation1){
682     vec.sub(center1);
683     }
684     for (Vector3d vec:conformation2){
685     vec.sub(center2);
686     }
687    
688     //E0: initial sum of squared lengths of both conformations
689     double sum1 = 0.0;
690     double sum2 = 0.0;
691     for (int i=0;i<n_vec;i++){
692     sum1 += conformation1[i].lengthSquared();
693     sum2 += conformation2[i].lengthSquared();
694     }
695     double E0 = sum1 + sum2;
696    
697     // singular value decomposition
698     Matrix vecs1 = vector3dAr2matrix(conformation1);
699     Matrix vecs2 = vector3dAr2matrix(conformation2);
700    
701     Matrix correlation_matrix = vecs2.transpose().times(vecs1); //gives a 3x3 matrix
702    
703     SingularValueDecomposition svd = correlation_matrix.svd();
704     Matrix U = svd.getU();
705     Matrix V_trans = svd.getV().transpose();
706     double[] singularValues = svd.getSingularValues();
707    
708     boolean is_reflection = false;
709     if (U.det()*V_trans.det()<0.0){
710     is_reflection = true;
711     }
712     if (is_reflection){
713     // reflect along smallest principal axis:
714     // we change sign of last coordinate (smallest singular value)
715 duarte 236 singularValues[singularValues.length-1]=(-1)*singularValues[singularValues.length-1];
716 duarte 235 }
717    
718     // getting sum of singular values
719     double sumSV = 0.0;
720     for (int i=0;i<singularValues.length;i++){
721     sumSV += singularValues[i];
722     }
723    
724     // rmsd square: Kabsch formula
725     double rmsd_sq = (E0 - 2.0*sumSV)/((double) n_vec);
726     rmsd_sq = Math.max(rmsd_sq, 0.0);
727    
728     // finally we modify conformation2 to be aligned to conformation1
729     if (is_reflection) { // first we check if we are in is_reflection case: we need to change sign to last row of U
730     for (int j=0;j<U.getColumnDimension();j++){
731     // we change sign to last row of U
732     int lastRow = U.getRowDimension()-1;
733     U.set(lastRow, j, (-1)*U.get(lastRow,j));
734     }
735     }
736     Matrix optimal_rotation = U.times(V_trans);
737     Matrix conf2 = vecs2.times(optimal_rotation);
738     for (int i=0;i<n_vec;i++){
739     conformation2[i].x = conf2.get(i,0);
740     conformation2[i].y = conf2.get(i,1);
741     conformation2[i].z = conf2.get(i,2);
742     }
743    
744     return Math.sqrt(rmsd_sq);
745     }
746    
747     /** Gets a Jama.Matrix object from a Vector3d[] (deep copies) */
748     private Matrix vector3dAr2matrix(Vector3d[] vecArray) {
749     double[][] array = new double[vecArray.length][3];
750     for (int i=0;i<vecArray.length;i++){
751     vecArray[i].get(array[i]);
752     }
753     return new Matrix(array);
754     }
755    
756 duarte 123 }