1 |
duarte |
123 |
package proteinstructure; |
2 |
|
|
|
3 |
|
|
import java.io.FileOutputStream; |
4 |
|
|
import java.io.PrintStream; |
5 |
|
|
import java.io.IOException; |
6 |
|
|
import java.util.HashMap; |
7 |
duarte |
216 |
import java.util.Locale; |
8 |
stehr |
250 |
import java.util.Map; |
9 |
duarte |
123 |
import java.util.TreeMap; |
10 |
|
|
import java.util.ArrayList; |
11 |
|
|
|
12 |
duarte |
226 |
import javax.vecmath.Point3d; |
13 |
|
|
import javax.vecmath.Point3i; |
14 |
duarte |
235 |
import javax.vecmath.Vector3d; |
15 |
duarte |
226 |
|
16 |
duarte |
235 |
import Jama.Matrix; |
17 |
|
|
import Jama.SingularValueDecomposition; |
18 |
|
|
|
19 |
duarte |
207 |
/** |
20 |
|
|
* A single chain pdb protein structure |
21 |
|
|
* |
22 |
|
|
* @author Jose Duarte |
23 |
|
|
* Class: Pdb |
24 |
|
|
* Package: proteinstructure |
25 |
|
|
*/ |
26 |
|
|
public abstract class Pdb { |
27 |
duarte |
123 |
|
28 |
duarte |
208 |
protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures) |
29 |
|
|
public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence |
30 |
duarte |
153 |
|
31 |
duarte |
208 |
protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials |
32 |
|
|
protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type |
33 |
duarte |
226 |
protected HashMap<Integer,Point3d> atomser2coord; // atom serials to 3D coordinates |
34 |
duarte |
208 |
protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials |
35 |
duarte |
237 |
protected HashMap<Integer,String> atomser2atom; // atom serials to atom names |
36 |
duarte |
208 |
protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials |
37 |
|
|
protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings) |
38 |
duarte |
190 |
|
39 |
duarte |
222 |
protected HashMap<Integer,String> resser2secstruct; // residue serials to secondary structure |
40 |
|
|
protected TreeMap<String,Interval> secstruct2resinterval;// secondary structure element to residue serial intervals |
41 |
duarte |
219 |
|
42 |
duarte |
208 |
protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid |
43 |
duarte |
135 |
|
44 |
duarte |
208 |
protected String sequence; // full sequence as it appears in SEQRES field |
45 |
|
|
protected String pdbCode; |
46 |
|
|
// given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file) |
47 |
|
|
protected String pdbChainCode; |
48 |
|
|
// Our internal chain identifier: |
49 |
duarte |
135 |
// - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd) |
50 |
duarte |
208 |
// - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A" |
51 |
duarte |
207 |
protected String chainCode; |
52 |
duarte |
208 |
protected int model; // the model serial for NMR structures |
53 |
duarte |
123 |
|
54 |
duarte |
208 |
protected String db; // the db from which we have taken the data (if applies) |
55 |
duarte |
153 |
|
56 |
duarte |
208 |
|
57 |
|
|
|
58 |
duarte |
133 |
|
59 |
duarte |
153 |
/** |
60 |
|
|
* Dumps coordinate data into a file in pdb format (ATOM lines only) |
61 |
duarte |
206 |
* The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects |
62 |
duarte |
153 |
* @param outfile |
63 |
|
|
* @throws IOException |
64 |
|
|
*/ |
65 |
duarte |
123 |
public void dump2pdbfile(String outfile) throws IOException { |
66 |
duarte |
130 |
TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>(); |
67 |
duarte |
123 |
PrintStream Out = new PrintStream(new FileOutputStream(outfile)); |
68 |
duarte |
206 |
Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'"); |
69 |
duarte |
123 |
for (String resser_atom:resser_atom2atomserial.keySet()){ |
70 |
|
|
int atomserial = resser_atom2atomserial.get(resser_atom); |
71 |
|
|
int res_serial = Integer.parseInt(resser_atom.split("_")[0]); |
72 |
|
|
String atom = resser_atom.split("_")[1]; |
73 |
|
|
String res_type = resser2restype.get(res_serial); |
74 |
duarte |
226 |
Point3d coords = atomser2coord.get(atomserial); |
75 |
|
|
Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords.x, coords.y, coords.z}; |
76 |
duarte |
130 |
lines.put(atomserial, fields); |
77 |
duarte |
123 |
} |
78 |
duarte |
130 |
for (int atomserial:lines.keySet()){ |
79 |
duarte |
216 |
// Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales) |
80 |
|
|
Out.printf(Locale.US,"ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial)); |
81 |
duarte |
130 |
} |
82 |
duarte |
123 |
Out.println("END"); |
83 |
|
|
Out.close(); |
84 |
|
|
} |
85 |
|
|
|
86 |
duarte |
237 |
/** |
87 |
|
|
* Dump the full sequence of this Pdb object in fasta file format |
88 |
|
|
* @param seqfile |
89 |
|
|
* @throws IOException |
90 |
|
|
*/ |
91 |
duarte |
123 |
public void dumpseq(String seqfile) throws IOException { |
92 |
|
|
PrintStream Out = new PrintStream(new FileOutputStream(seqfile)); |
93 |
duarte |
206 |
Out.println(">"+pdbCode+"_"+pdbChainCode); |
94 |
duarte |
123 |
Out.println(sequence); |
95 |
|
|
Out.close(); |
96 |
|
|
} |
97 |
|
|
|
98 |
|
|
public int get_length(){ |
99 |
|
|
return resser2restype.size(); |
100 |
|
|
} |
101 |
|
|
|
102 |
duarte |
237 |
/** |
103 |
|
|
* Gets a TreeMap with atom serials as keys and their coordinates as values for the given contact type |
104 |
|
|
* The contact type can't be a cross contact type, it doesn't make sense here |
105 |
|
|
* @param ct |
106 |
|
|
* @return |
107 |
|
|
*/ |
108 |
duarte |
235 |
private TreeMap<Integer,Point3d> get_coords_for_ct(String ct) { |
109 |
duarte |
226 |
TreeMap<Integer,Point3d> coords = new TreeMap<Integer,Point3d>(); |
110 |
duarte |
123 |
HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct); |
111 |
|
|
for (int resser:resser2restype.keySet()){ |
112 |
|
|
String[] atoms = restype2atoms.get(resser2restype.get(resser)); |
113 |
|
|
for (String atom:atoms){ |
114 |
|
|
if (resser_atom2atomserial.containsKey(resser+"_"+atom)){ |
115 |
|
|
int atomser = resser_atom2atomserial.get(resser+"_"+atom); |
116 |
duarte |
226 |
Point3d coord = atomser2coord.get(atomser); |
117 |
duarte |
123 |
coords.put(atomser, coord); |
118 |
|
|
} |
119 |
|
|
else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){ |
120 |
|
|
int atomser = resser_atom2atomserial.get(resser+"_"+"OXT"); |
121 |
duarte |
226 |
Point3d coord = atomser2coord.get(atomser); |
122 |
duarte |
123 |
coords.put(atomser, coord); |
123 |
|
|
} |
124 |
|
|
else { |
125 |
|
|
System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser."); |
126 |
|
|
} |
127 |
|
|
} |
128 |
|
|
} |
129 |
|
|
return coords; |
130 |
|
|
} |
131 |
duarte |
229 |
|
132 |
|
|
/** |
133 |
duarte |
237 |
* Gets a TreeMap with residue serials+"_"+atomname as keys and their coordinates as values for the given contact type |
134 |
|
|
* The contact type can't be a cross contact type, it doesn't make sense here |
135 |
|
|
* Used in rmsd method |
136 |
|
|
* @param ct |
137 |
|
|
* @return |
138 |
|
|
*/ |
139 |
|
|
private TreeMap<String,Point3d> get_coords_for_ct_4rmsd(String ct) { |
140 |
|
|
TreeMap<String,Point3d> coords = new TreeMap<String,Point3d>(); |
141 |
|
|
HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct); |
142 |
|
|
for (int resser:resser2restype.keySet()){ |
143 |
|
|
String[] atoms = restype2atoms.get(resser2restype.get(resser)); |
144 |
|
|
for (String atom:atoms){ |
145 |
|
|
if (resser_atom2atomserial.containsKey(resser+"_"+atom)){ |
146 |
|
|
int atomser = resser_atom2atomserial.get(resser+"_"+atom); |
147 |
|
|
Point3d coord = atomser2coord.get(atomser); |
148 |
|
|
coords.put(resser+"_"+atom, coord); |
149 |
|
|
} |
150 |
|
|
else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){ |
151 |
|
|
int atomser = resser_atom2atomserial.get(resser+"_"+"OXT"); |
152 |
|
|
Point3d coord = atomser2coord.get(atomser); |
153 |
|
|
coords.put(resser+"_"+atom, coord); |
154 |
|
|
} |
155 |
|
|
} |
156 |
|
|
} |
157 |
|
|
return coords; |
158 |
|
|
} |
159 |
|
|
|
160 |
|
|
/** |
161 |
duarte |
229 |
* Returns the distance matrix as a HashMap with Contacts (residue serial pairs) as keys |
162 |
|
|
* It doesn't make sense to call this method for multi atom contact |
163 |
|
|
* types (for each residue serial pair there's more than 1 distance) |
164 |
|
|
* Thus before calling this we should check AA.isValidSingleAtomCT(ct) |
165 |
|
|
* @param ct the contact type |
166 |
|
|
* @return |
167 |
|
|
*/ |
168 |
duarte |
234 |
public HashMap<Edge, Double> calculate_dist_matrix(String ct){ |
169 |
|
|
HashMap<Edge,Double> distMatrixAtoms = new HashMap<Edge,Double>(); |
170 |
duarte |
123 |
if (!ct.contains("/")){ |
171 |
duarte |
226 |
TreeMap<Integer,Point3d> coords = get_coords_for_ct(ct); |
172 |
duarte |
123 |
for (int i_atomser:coords.keySet()){ |
173 |
|
|
for (int j_atomser:coords.keySet()){ |
174 |
|
|
if (j_atomser>i_atomser) { |
175 |
duarte |
234 |
Edge pair = new Edge(i_atomser,j_atomser); |
176 |
duarte |
229 |
distMatrixAtoms.put(pair, coords.get(i_atomser).distance(coords.get(j_atomser))); |
177 |
duarte |
123 |
} |
178 |
|
|
} |
179 |
|
|
} |
180 |
|
|
} else { |
181 |
|
|
String i_ct = ct.split("/")[0]; |
182 |
|
|
String j_ct = ct.split("/")[1]; |
183 |
duarte |
226 |
TreeMap<Integer,Point3d> i_coords = get_coords_for_ct(i_ct); |
184 |
|
|
TreeMap<Integer,Point3d> j_coords = get_coords_for_ct(j_ct); |
185 |
duarte |
123 |
for (int i_atomser:i_coords.keySet()){ |
186 |
|
|
for (int j_atomser:j_coords.keySet()){ |
187 |
|
|
if (j_atomser!=i_atomser){ |
188 |
duarte |
234 |
Edge pair = new Edge(i_atomser,j_atomser); |
189 |
duarte |
229 |
distMatrixAtoms.put(pair, i_coords.get(i_atomser).distance(j_coords.get(j_atomser))); |
190 |
duarte |
123 |
} |
191 |
|
|
} |
192 |
|
|
} |
193 |
|
|
} |
194 |
duarte |
229 |
|
195 |
duarte |
234 |
HashMap<Edge,Double> distMatrixRes = new HashMap<Edge, Double>(); |
196 |
|
|
for (Edge cont: distMatrixAtoms.keySet()){ |
197 |
duarte |
229 |
int i_resser = get_resser_from_atomser(cont.i); |
198 |
|
|
int j_resser = get_resser_from_atomser(cont.j); |
199 |
duarte |
234 |
distMatrixRes.put(new Edge(i_resser,j_resser), distMatrixAtoms.get(cont)); |
200 |
duarte |
229 |
} |
201 |
|
|
|
202 |
|
|
return distMatrixRes; |
203 |
duarte |
123 |
} |
204 |
|
|
|
205 |
duarte |
129 |
/** |
206 |
duarte |
226 |
* Get the graph for given contact type and cutoff for this Pdb object. |
207 |
|
|
* Returns a Graph object with the contacts |
208 |
|
|
* We do geometric hashing for fast contact computation (without needing to calculate full distance matrix) |
209 |
|
|
* @param ct |
210 |
|
|
* @param cutoff |
211 |
|
|
* @return |
212 |
|
|
*/ |
213 |
duarte |
228 |
public Graph get_graph(String ct, double cutoff){ |
214 |
|
|
TreeMap<Integer,Point3d> i_coords = null; |
215 |
stehr |
250 |
TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types |
216 |
duarte |
228 |
boolean directed = false; |
217 |
|
|
if (!ct.contains("/")){ |
218 |
|
|
i_coords = get_coords_for_ct(ct); |
219 |
|
|
directed = false; |
220 |
|
|
} else { |
221 |
|
|
String i_ct = ct.split("/")[0]; |
222 |
|
|
String j_ct = ct.split("/")[1]; |
223 |
|
|
i_coords = get_coords_for_ct(i_ct); |
224 |
|
|
j_coords = get_coords_for_ct(j_ct); |
225 |
|
|
directed = true; |
226 |
|
|
} |
227 |
|
|
int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials |
228 |
|
|
int[] j_atomserials = null; |
229 |
duarte |
226 |
|
230 |
|
|
int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A) |
231 |
|
|
|
232 |
|
|
int boxSize = (int) Math.floor(cutoff*SCALE); |
233 |
|
|
|
234 |
|
|
HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>(); |
235 |
|
|
int i=0; |
236 |
duarte |
228 |
for (int i_atomser:i_coords.keySet()){ |
237 |
duarte |
226 |
//coordinates for atom serial atomser, we will use i as its identifier below |
238 |
duarte |
228 |
Point3d coord = i_coords.get(i_atomser); |
239 |
duarte |
226 |
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
240 |
|
|
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
241 |
|
|
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
242 |
|
|
Point3i floor = new Point3i(floorX,floorY,floorZ); |
243 |
|
|
if (boxes.containsKey(floor)){ |
244 |
|
|
// we put the coords for atom i in its corresponding box (identified by floor) |
245 |
duarte |
228 |
boxes.get(floor).put_i_Point(i, coord); |
246 |
|
|
if (!directed){ |
247 |
|
|
boxes.get(floor).put_j_Point(i, coord); |
248 |
|
|
} |
249 |
duarte |
226 |
} else { |
250 |
|
|
Box box = new Box(floor); |
251 |
duarte |
228 |
box.put_i_Point(i, coord); |
252 |
|
|
if (!directed){ |
253 |
|
|
box.put_j_Point(i, coord); |
254 |
|
|
} |
255 |
duarte |
226 |
boxes.put(floor,box); |
256 |
|
|
} |
257 |
duarte |
228 |
i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
258 |
duarte |
226 |
i++; |
259 |
|
|
} |
260 |
duarte |
228 |
int j=0; |
261 |
|
|
if (directed) { |
262 |
|
|
j_atomserials = new int[j_coords.size()]; |
263 |
|
|
for (int j_atomser:j_coords.keySet()){ |
264 |
|
|
//coordinates for atom serial atomser, we will use j as its identifier below |
265 |
|
|
Point3d coord = j_coords.get(j_atomser); |
266 |
|
|
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
267 |
|
|
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
268 |
|
|
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
269 |
|
|
Point3i floor = new Point3i(floorX,floorY,floorZ); |
270 |
|
|
if (boxes.containsKey(floor)){ |
271 |
|
|
// we put the coords for atom j in its corresponding box (identified by floor) |
272 |
|
|
boxes.get(floor).put_j_Point(j, coord); |
273 |
|
|
} else { |
274 |
|
|
Box box = new Box(floor); |
275 |
|
|
box.put_j_Point(j, coord); |
276 |
|
|
boxes.put(floor,box); |
277 |
|
|
} |
278 |
|
|
j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
279 |
|
|
j++; |
280 |
|
|
} |
281 |
|
|
} else { |
282 |
|
|
j_atomserials = i_atomserials; |
283 |
|
|
} |
284 |
|
|
|
285 |
duarte |
226 |
|
286 |
duarte |
228 |
double[][]distMatrix = new double[i_atomserials.length][j_atomserials.length]; |
287 |
duarte |
226 |
|
288 |
|
|
for (Point3i floor:boxes.keySet()){ // for each box |
289 |
|
|
// distances of points within this box |
290 |
duarte |
228 |
boxes.get(floor).getDistancesWithinBox(distMatrix,directed); |
291 |
duarte |
226 |
|
292 |
duarte |
228 |
//TODO should iterate only through half of the neighbours here |
293 |
duarte |
226 |
// distances of points from this box to all neighbouring boxes: 26 iterations (26 neighbouring boxes) |
294 |
|
|
for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){ |
295 |
|
|
for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){ |
296 |
|
|
for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){ |
297 |
|
|
if (!((x==floor.x)&&(y==floor.y)&&(z==floor.z))) { // skip this box |
298 |
|
|
Point3i neighbor = new Point3i(x,y,z); |
299 |
|
|
if (boxes.containsKey(neighbor)){ |
300 |
duarte |
228 |
boxes.get(floor).getDistancesToNeighborBox(boxes.get(neighbor),distMatrix,directed); |
301 |
duarte |
226 |
} |
302 |
|
|
} |
303 |
|
|
} |
304 |
|
|
} |
305 |
|
|
} |
306 |
|
|
} |
307 |
|
|
|
308 |
duarte |
228 |
// getting the contacts (in residue serials) from the atom serials (partial) distance matrix |
309 |
duarte |
234 |
EdgeSet contacts = new EdgeSet(); |
310 |
duarte |
226 |
for (i=0;i<distMatrix.length;i++){ |
311 |
duarte |
228 |
for (j=0;j<distMatrix[i].length;j++){ |
312 |
|
|
// the condition distMatrix[i][j]!=0.0 takes care of skipping several things: |
313 |
|
|
// - diagonal of the matrix in case of undirected |
314 |
|
|
// - lower half of matrix in case of undirected |
315 |
duarte |
235 |
// - cells for which we didn't calculate a distance because the 2 points were not in same or neighbouring boxes (i.e. too far apart) |
316 |
duarte |
228 |
if (distMatrix[i][j]!=0.0 && distMatrix[i][j]<=cutoff){ |
317 |
|
|
int i_resser = atomser2resser.get(i_atomserials[i]); |
318 |
|
|
int j_resser = atomser2resser.get(j_atomserials[j]); |
319 |
duarte |
234 |
Edge resser_pair = new Edge(i_resser,j_resser); |
320 |
duarte |
228 |
// for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates |
321 |
duarte |
235 |
if (i_resser!=j_resser){ // duplicates are automatically taken care by the EdgeSet class which is a TreeSet and doesn't allow duplicates |
322 |
duarte |
226 |
contacts.add(resser_pair); |
323 |
|
|
} |
324 |
|
|
} |
325 |
duarte |
228 |
|
326 |
duarte |
226 |
} |
327 |
|
|
} |
328 |
duarte |
228 |
|
329 |
|
|
// creating and returning the graph object |
330 |
duarte |
226 |
TreeMap<Integer,String> nodes = new TreeMap<Integer,String>(); |
331 |
|
|
for (int resser:resser2restype.keySet()){ |
332 |
|
|
nodes.put(resser,resser2restype.get(resser)); |
333 |
|
|
} |
334 |
|
|
Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode); |
335 |
|
|
|
336 |
|
|
return graph; |
337 |
|
|
} |
338 |
|
|
|
339 |
stehr |
250 |
public void calcGridDensity(String ct, double cutoff, Map<Integer, Integer> densityCount) { |
340 |
|
|
TreeMap<Integer,Point3d> i_coords = null; |
341 |
|
|
TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types |
342 |
|
|
boolean directed = false; |
343 |
|
|
if (!ct.contains("/")){ |
344 |
|
|
i_coords = get_coords_for_ct(ct); |
345 |
|
|
directed = false; |
346 |
|
|
} else { |
347 |
|
|
String i_ct = ct.split("/")[0]; |
348 |
|
|
String j_ct = ct.split("/")[1]; |
349 |
|
|
i_coords = get_coords_for_ct(i_ct); |
350 |
|
|
j_coords = get_coords_for_ct(j_ct); |
351 |
|
|
directed = true; |
352 |
|
|
} |
353 |
|
|
int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials |
354 |
|
|
int[] j_atomserials = null; |
355 |
|
|
|
356 |
|
|
int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A) |
357 |
|
|
|
358 |
|
|
int boxSize = (int) Math.floor(cutoff*SCALE); |
359 |
|
|
|
360 |
|
|
HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>(); |
361 |
|
|
int i=0; |
362 |
|
|
for (int i_atomser:i_coords.keySet()){ |
363 |
|
|
//coordinates for atom serial atomser, we will use i as its identifier below |
364 |
|
|
Point3d coord = i_coords.get(i_atomser); |
365 |
|
|
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
366 |
|
|
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
367 |
|
|
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
368 |
|
|
Point3i floor = new Point3i(floorX,floorY,floorZ); |
369 |
|
|
if (boxes.containsKey(floor)){ |
370 |
|
|
// we put the coords for atom i in its corresponding box (identified by floor) |
371 |
|
|
boxes.get(floor).put_i_Point(i, coord); |
372 |
|
|
if (!directed){ |
373 |
|
|
boxes.get(floor).put_j_Point(i, coord); |
374 |
|
|
} |
375 |
|
|
} else { |
376 |
|
|
Box box = new Box(floor); |
377 |
|
|
box.put_i_Point(i, coord); |
378 |
|
|
if (!directed){ |
379 |
|
|
box.put_j_Point(i, coord); |
380 |
|
|
} |
381 |
|
|
boxes.put(floor,box); |
382 |
|
|
} |
383 |
|
|
i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
384 |
|
|
i++; |
385 |
|
|
} |
386 |
|
|
int j=0; |
387 |
|
|
if (directed) { |
388 |
|
|
j_atomserials = new int[j_coords.size()]; |
389 |
|
|
for (int j_atomser:j_coords.keySet()){ |
390 |
|
|
//coordinates for atom serial atomser, we will use j as its identifier below |
391 |
|
|
Point3d coord = j_coords.get(j_atomser); |
392 |
|
|
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
393 |
|
|
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
394 |
|
|
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
395 |
|
|
Point3i floor = new Point3i(floorX,floorY,floorZ); |
396 |
|
|
if (boxes.containsKey(floor)){ |
397 |
|
|
// we put the coords for atom j in its corresponding box (identified by floor) |
398 |
|
|
boxes.get(floor).put_j_Point(j, coord); |
399 |
|
|
} else { |
400 |
|
|
Box box = new Box(floor); |
401 |
|
|
box.put_j_Point(j, coord); |
402 |
|
|
boxes.put(floor,box); |
403 |
|
|
} |
404 |
|
|
j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
405 |
|
|
j++; |
406 |
|
|
} |
407 |
|
|
} else { |
408 |
|
|
j_atomserials = i_atomserials; |
409 |
|
|
} |
410 |
|
|
|
411 |
|
|
for(Point3i floor:boxes.keySet()) { |
412 |
|
|
int size = boxes.get(floor).size(); |
413 |
|
|
if(densityCount.containsKey(size)) { |
414 |
|
|
int old = densityCount.get(size); |
415 |
|
|
densityCount.put(size, ++old); |
416 |
|
|
} else { |
417 |
|
|
densityCount.put(size, 1); |
418 |
|
|
} |
419 |
|
|
} |
420 |
|
|
} |
421 |
|
|
|
422 |
|
|
|
423 |
duarte |
190 |
public int get_resser_from_pdbresser (String pdbresser){ |
424 |
|
|
return pdbresser2resser.get(pdbresser); |
425 |
|
|
} |
426 |
|
|
|
427 |
|
|
public String get_pdbresser_from_resser (int resser){ |
428 |
|
|
return resser2pdbresser.get(resser); |
429 |
|
|
} |
430 |
|
|
|
431 |
duarte |
200 |
public int get_resser_from_atomser(int atomser){ |
432 |
|
|
return atomser2resser.get(atomser); |
433 |
|
|
} |
434 |
duarte |
206 |
|
435 |
|
|
public String getChainCode(){ |
436 |
|
|
return this.chainCode; |
437 |
|
|
} |
438 |
|
|
|
439 |
|
|
public String getPdbChainCode(){ |
440 |
|
|
return this.pdbChainCode; |
441 |
|
|
} |
442 |
duarte |
219 |
|
443 |
|
|
public String getSecStructure(int resser){ |
444 |
|
|
return this.resser2secstruct.get(resser); |
445 |
|
|
} |
446 |
duarte |
222 |
|
447 |
|
|
public TreeMap<String,Interval> getAllSecStructElements(){ |
448 |
|
|
return secstruct2resinterval; |
449 |
|
|
} |
450 |
duarte |
235 |
|
451 |
|
|
/** |
452 |
|
|
* Calculates rmsd (on atoms given by ct) of this Pdb object to otherPdb object |
453 |
duarte |
237 |
* Both objects must represent structures with same sequence (save unobserved residues or missing atoms) |
454 |
|
|
* |
455 |
duarte |
235 |
* @param otherPdb |
456 |
duarte |
237 |
* @param ct the contact type (crossed contact types don't make sense here) |
457 |
duarte |
235 |
* @return |
458 |
duarte |
237 |
* @throws ConformationsNotSameSizeError |
459 |
duarte |
235 |
*/ |
460 |
duarte |
237 |
public double rmsd(Pdb otherPdb, String ct) throws ConformationsNotSameSizeError { |
461 |
|
|
TreeMap<String,Point3d> thiscoords = this.get_coords_for_ct_4rmsd(ct); |
462 |
|
|
TreeMap<String,Point3d> othercoords = otherPdb.get_coords_for_ct_4rmsd(ct); |
463 |
duarte |
235 |
|
464 |
duarte |
237 |
// there might be unobserved residues or some missing atoms for a residue |
465 |
|
|
// here we get the ones that are in common |
466 |
|
|
ArrayList<String> common = new ArrayList<String>(); |
467 |
|
|
for (String resser_atom:thiscoords.keySet()){ |
468 |
|
|
if (othercoords.containsKey(resser_atom)){ |
469 |
|
|
common.add(resser_atom); |
470 |
|
|
} |
471 |
|
|
} |
472 |
|
|
|
473 |
duarte |
235 |
// converting the TreeMaps to Vector3d arrays (input format for calculate_rmsd) |
474 |
duarte |
237 |
Vector3d[] conformation1 = new Vector3d[common.size()]; |
475 |
|
|
Vector3d[] conformation2 = new Vector3d[common.size()]; |
476 |
|
|
int i = 0; |
477 |
|
|
for (String resser_atom:common){ |
478 |
|
|
conformation1[i] = new Vector3d(thiscoords.get(resser_atom).x,thiscoords.get(resser_atom).y,thiscoords.get(resser_atom).z); |
479 |
|
|
conformation2[i] = new Vector3d(othercoords.get(resser_atom).x,othercoords.get(resser_atom).y,othercoords.get(resser_atom).z); |
480 |
duarte |
235 |
i++; |
481 |
|
|
} |
482 |
duarte |
237 |
|
483 |
|
|
// this as well as calculating the rmsd, changes conformation1 and conformation2 to be optimally superposed |
484 |
duarte |
235 |
double rmsd = calculate_rmsd(conformation1, conformation2); |
485 |
|
|
|
486 |
|
|
// // printing out individual distances (conformation1 and conformation2 are now optimally superposed) |
487 |
|
|
// for (i=0;i<conformation1.length;i++){ |
488 |
|
|
// Point3d point1 = new Point3d(conformation1[i].x,conformation1[i].y,conformation1[i].z); |
489 |
|
|
// Point3d point2 = new Point3d(conformation2[i].x,conformation2[i].y,conformation2[i].z); |
490 |
|
|
// System.out.println(point1.distance(point2)); |
491 |
|
|
// } |
492 |
|
|
|
493 |
|
|
return rmsd; |
494 |
|
|
|
495 |
|
|
} |
496 |
|
|
|
497 |
|
|
/** |
498 |
|
|
* Calculates the RMSD between two conformations. |
499 |
|
|
* conformation1: Vector3d array (matrix of dimensions [N,3]) |
500 |
|
|
* conformation2: Vector3d array (matrix of dimensions [N,3]) |
501 |
|
|
* |
502 |
|
|
* Both conformation1 and conformation2 are modified to be optimally superposed |
503 |
|
|
* |
504 |
|
|
* Implementation taken (python) from http://bosco.infogami.com/Root_Mean_Square_Deviation, |
505 |
|
|
* then ported to java using Jama matrix package |
506 |
|
|
* (page has moved to: http://boscoh.com/protein/rmsd-root-mean-square-deviation) |
507 |
|
|
* @param conformation1 |
508 |
|
|
* @param conformation2 |
509 |
|
|
* @return |
510 |
duarte |
237 |
* @throws ConformationsNotSameSizeError |
511 |
duarte |
235 |
*/ |
512 |
duarte |
237 |
private double calculate_rmsd(Vector3d[] conformation1, Vector3d[] conformation2) throws ConformationsNotSameSizeError{ |
513 |
duarte |
235 |
if (conformation1.length!=conformation2.length) { |
514 |
duarte |
237 |
//System.err.println("Conformations not the same size"); |
515 |
|
|
throw new ConformationsNotSameSizeError( |
516 |
|
|
"Given conformations have different size: conformation1: "+conformation1.length+", conformation2: "+conformation2.length); |
517 |
duarte |
235 |
} |
518 |
|
|
int n_vec = conformation1.length; |
519 |
|
|
|
520 |
|
|
// 1st we bring both conformations to the same centre by subtracting their respectives centres |
521 |
|
|
Vector3d center1 = new Vector3d(); |
522 |
|
|
Vector3d center2 = new Vector3d(); |
523 |
|
|
for (int i=0;i<n_vec;i++){ // summing all vectors in each conformation |
524 |
|
|
center1.add(conformation1[i]); |
525 |
|
|
center2.add(conformation2[i]); |
526 |
|
|
} |
527 |
|
|
// dividing by n_vec (average) |
528 |
|
|
center1.scale((double)1/n_vec); |
529 |
|
|
center2.scale((double)1/n_vec); |
530 |
|
|
// translating our conformations to the same coordinate system by subtracting centers |
531 |
|
|
for (Vector3d vec:conformation1){ |
532 |
|
|
vec.sub(center1); |
533 |
|
|
} |
534 |
|
|
for (Vector3d vec:conformation2){ |
535 |
|
|
vec.sub(center2); |
536 |
|
|
} |
537 |
|
|
|
538 |
|
|
//E0: initial sum of squared lengths of both conformations |
539 |
|
|
double sum1 = 0.0; |
540 |
|
|
double sum2 = 0.0; |
541 |
|
|
for (int i=0;i<n_vec;i++){ |
542 |
|
|
sum1 += conformation1[i].lengthSquared(); |
543 |
|
|
sum2 += conformation2[i].lengthSquared(); |
544 |
|
|
} |
545 |
|
|
double E0 = sum1 + sum2; |
546 |
|
|
|
547 |
|
|
// singular value decomposition |
548 |
|
|
Matrix vecs1 = vector3dAr2matrix(conformation1); |
549 |
|
|
Matrix vecs2 = vector3dAr2matrix(conformation2); |
550 |
|
|
|
551 |
|
|
Matrix correlation_matrix = vecs2.transpose().times(vecs1); //gives a 3x3 matrix |
552 |
|
|
|
553 |
|
|
SingularValueDecomposition svd = correlation_matrix.svd(); |
554 |
|
|
Matrix U = svd.getU(); |
555 |
|
|
Matrix V_trans = svd.getV().transpose(); |
556 |
|
|
double[] singularValues = svd.getSingularValues(); |
557 |
|
|
|
558 |
|
|
boolean is_reflection = false; |
559 |
|
|
if (U.det()*V_trans.det()<0.0){ |
560 |
|
|
is_reflection = true; |
561 |
|
|
} |
562 |
|
|
if (is_reflection){ |
563 |
|
|
// reflect along smallest principal axis: |
564 |
|
|
// we change sign of last coordinate (smallest singular value) |
565 |
duarte |
236 |
singularValues[singularValues.length-1]=(-1)*singularValues[singularValues.length-1]; |
566 |
duarte |
235 |
} |
567 |
|
|
|
568 |
|
|
// getting sum of singular values |
569 |
|
|
double sumSV = 0.0; |
570 |
|
|
for (int i=0;i<singularValues.length;i++){ |
571 |
|
|
sumSV += singularValues[i]; |
572 |
|
|
} |
573 |
|
|
|
574 |
|
|
// rmsd square: Kabsch formula |
575 |
|
|
double rmsd_sq = (E0 - 2.0*sumSV)/((double) n_vec); |
576 |
|
|
rmsd_sq = Math.max(rmsd_sq, 0.0); |
577 |
|
|
|
578 |
|
|
// finally we modify conformation2 to be aligned to conformation1 |
579 |
|
|
if (is_reflection) { // first we check if we are in is_reflection case: we need to change sign to last row of U |
580 |
|
|
for (int j=0;j<U.getColumnDimension();j++){ |
581 |
|
|
// we change sign to last row of U |
582 |
|
|
int lastRow = U.getRowDimension()-1; |
583 |
|
|
U.set(lastRow, j, (-1)*U.get(lastRow,j)); |
584 |
|
|
} |
585 |
|
|
} |
586 |
|
|
Matrix optimal_rotation = U.times(V_trans); |
587 |
|
|
Matrix conf2 = vecs2.times(optimal_rotation); |
588 |
|
|
for (int i=0;i<n_vec;i++){ |
589 |
|
|
conformation2[i].x = conf2.get(i,0); |
590 |
|
|
conformation2[i].y = conf2.get(i,1); |
591 |
|
|
conformation2[i].z = conf2.get(i,2); |
592 |
|
|
} |
593 |
|
|
|
594 |
|
|
return Math.sqrt(rmsd_sq); |
595 |
|
|
} |
596 |
|
|
|
597 |
|
|
/** Gets a Jama.Matrix object from a Vector3d[] (deep copies) */ |
598 |
|
|
private Matrix vector3dAr2matrix(Vector3d[] vecArray) { |
599 |
|
|
double[][] array = new double[vecArray.length][3]; |
600 |
|
|
for (int i=0;i<vecArray.length;i++){ |
601 |
|
|
vecArray[i].get(array[i]); |
602 |
|
|
} |
603 |
|
|
return new Matrix(array); |
604 |
|
|
} |
605 |
|
|
|
606 |
duarte |
123 |
} |