1 |
duarte |
123 |
package proteinstructure; |
2 |
|
|
|
3 |
stehr |
257 |
import java.io.*; |
4 |
duarte |
123 |
import java.util.HashMap; |
5 |
duarte |
216 |
import java.util.Locale; |
6 |
stehr |
250 |
import java.util.Map; |
7 |
duarte |
123 |
import java.util.TreeMap; |
8 |
|
|
import java.util.ArrayList; |
9 |
|
|
|
10 |
duarte |
226 |
import javax.vecmath.Point3d; |
11 |
|
|
import javax.vecmath.Point3i; |
12 |
duarte |
235 |
import javax.vecmath.Vector3d; |
13 |
duarte |
226 |
|
14 |
duarte |
235 |
import Jama.Matrix; |
15 |
|
|
import Jama.SingularValueDecomposition; |
16 |
|
|
|
17 |
duarte |
207 |
/** |
18 |
|
|
* A single chain pdb protein structure |
19 |
|
|
* |
20 |
|
|
* @author Jose Duarte |
21 |
|
|
* Class: Pdb |
22 |
|
|
* Package: proteinstructure |
23 |
|
|
*/ |
24 |
|
|
public abstract class Pdb { |
25 |
duarte |
123 |
|
26 |
duarte |
208 |
protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures) |
27 |
|
|
public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence |
28 |
duarte |
153 |
|
29 |
duarte |
208 |
protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials |
30 |
|
|
protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type |
31 |
duarte |
226 |
protected HashMap<Integer,Point3d> atomser2coord; // atom serials to 3D coordinates |
32 |
duarte |
208 |
protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials |
33 |
duarte |
237 |
protected HashMap<Integer,String> atomser2atom; // atom serials to atom names |
34 |
duarte |
208 |
protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials |
35 |
|
|
protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings) |
36 |
duarte |
190 |
|
37 |
stehr |
274 |
protected SecondaryStructure secondaryStructure; // the secondary structure annotation for this pdb object (should never be null) |
38 |
duarte |
219 |
|
39 |
duarte |
208 |
protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid |
40 |
duarte |
135 |
|
41 |
duarte |
208 |
protected String sequence; // full sequence as it appears in SEQRES field |
42 |
|
|
protected String pdbCode; |
43 |
|
|
// given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file) |
44 |
|
|
protected String pdbChainCode; |
45 |
|
|
// Our internal chain identifier: |
46 |
duarte |
135 |
// - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd) |
47 |
duarte |
208 |
// - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A" |
48 |
duarte |
207 |
protected String chainCode; |
49 |
duarte |
208 |
protected int model; // the model serial for NMR structures |
50 |
duarte |
123 |
|
51 |
duarte |
278 |
// in case we read from pdb file, 2 possibilities: |
52 |
|
|
// 1) SEQRES field is present: fullLength coincides with sequence length |
53 |
|
|
// 2) SEQRES not present: fullLength is maximum observed residue (so if residue numbering is correct that only misses possible non-observed residues at end of chain) |
54 |
|
|
protected int fullLength; // length of full sequence as it appears in SEQRES field |
55 |
|
|
protected int obsLength; // length without unobserved, non standard aas |
56 |
|
|
|
57 |
duarte |
208 |
protected String db; // the db from which we have taken the data (if applies) |
58 |
duarte |
153 |
|
59 |
stehr |
257 |
/** Run DSSP externally and (re)assign the secondary structure annotation from the output */ |
60 |
stehr |
259 |
public void runDssp(String dsspExecutable, String dsspParameters) throws IOException { |
61 |
stehr |
257 |
String startLine = " # RESIDUE AA STRUCTURE BP1 BP2 ACC"; |
62 |
|
|
String line; |
63 |
stehr |
280 |
int lineCount = 0; |
64 |
|
|
char ssType; |
65 |
|
|
TreeMap<Integer, Character> ssTypes; |
66 |
duarte |
260 |
int resNum; |
67 |
stehr |
259 |
String resNumStr; |
68 |
stehr |
257 |
File test = new File(dsspExecutable); |
69 |
|
|
if(!test.canRead()) throw new IOException("DSSP Executable is not readable"); |
70 |
stehr |
259 |
Process myDssp = Runtime.getRuntime().exec(dsspExecutable + " " + dsspParameters); |
71 |
stehr |
257 |
PrintWriter dsspInput = new PrintWriter(myDssp.getOutputStream()); |
72 |
|
|
BufferedReader dsspOutput = new BufferedReader(new InputStreamReader(myDssp.getInputStream())); |
73 |
|
|
BufferedReader dsspError = new BufferedReader(new InputStreamReader(myDssp.getErrorStream())); |
74 |
|
|
writeAtomLines(dsspInput); // pipe atom lines to dssp |
75 |
|
|
dsspInput.close(); |
76 |
stehr |
280 |
ssTypes = new TreeMap<Integer,Character>(); |
77 |
stehr |
257 |
while((line = dsspOutput.readLine()) != null) { |
78 |
stehr |
259 |
lineCount++; |
79 |
stehr |
257 |
if(line.startsWith(startLine)) { |
80 |
stehr |
259 |
//System.out.println("Dssp Output: "); |
81 |
stehr |
257 |
break; |
82 |
|
|
} |
83 |
|
|
} |
84 |
|
|
while((line = dsspOutput.readLine()) != null) { |
85 |
stehr |
259 |
lineCount++; |
86 |
|
|
resNumStr = line.substring(5,10).trim(); |
87 |
|
|
ssType = line.charAt(16); |
88 |
|
|
if(!resNumStr.equals("")) { // this should only happen if dssp inserts a line indicating a chain break |
89 |
|
|
try { |
90 |
|
|
resNum = Integer.valueOf(resNumStr); |
91 |
stehr |
280 |
ssTypes.put(resNum, ssType); |
92 |
stehr |
259 |
} catch (NumberFormatException e) { |
93 |
|
|
System.err.println("Error while parsing DSSP output. Expected residue number, found '" + resNumStr + "' in line " + lineCount); |
94 |
|
|
} |
95 |
|
|
} |
96 |
stehr |
257 |
} |
97 |
stehr |
259 |
//for(char c:ssTypes) {System.out.print(c);}; System.out.println("."); |
98 |
stehr |
257 |
dsspOutput.close(); |
99 |
|
|
dsspError.close(); |
100 |
stehr |
259 |
|
101 |
stehr |
280 |
if(ssTypes.size() == 0) { |
102 |
stehr |
259 |
throw new IOException("No DSSP output found."); |
103 |
|
|
} |
104 |
|
|
|
105 |
stehr |
280 |
if(ssTypes.size() != get_length()) { // compare with number of observed residues |
106 |
|
|
System.err.println("Error: DSSP output size (" + ssTypes.size() + ") does not match number of observed residues in structure (" + get_length() + ")."); |
107 |
stehr |
259 |
} |
108 |
|
|
|
109 |
|
|
// assign secondary structure |
110 |
stehr |
274 |
this.secondaryStructure = new SecondaryStructure(); // forget the old annotation |
111 |
stehr |
280 |
char lastType = SecStrucElement.getFourStateTypeFromDsspType(ssTypes.get(ssTypes.firstKey())); |
112 |
|
|
int lastResSer = ssTypes.firstKey(); |
113 |
stehr |
259 |
char thisType; |
114 |
stehr |
274 |
int start = 0; |
115 |
stehr |
259 |
int elementCount = 0; |
116 |
stehr |
274 |
SecStrucElement ssElem; |
117 |
stehr |
259 |
String ssId; |
118 |
stehr |
280 |
for(int resSer:ssTypes.keySet()) { |
119 |
|
|
thisType = SecStrucElement.getFourStateTypeFromDsspType(ssTypes.get(resSer)); |
120 |
|
|
if(thisType != lastType || resSer > lastResSer+1) { |
121 |
stehr |
259 |
// finish previous element, start new one |
122 |
stehr |
274 |
if(lastType != SecStrucElement.OTHER) { |
123 |
stehr |
259 |
elementCount++; |
124 |
|
|
ssId = new Character(lastType).toString() + new Integer(elementCount).toString(); |
125 |
stehr |
280 |
ssElem = new SecStrucElement(lastType, start,lastResSer,ssId); |
126 |
stehr |
274 |
secondaryStructure.add(ssElem); |
127 |
stehr |
259 |
} |
128 |
stehr |
280 |
start = resSer; |
129 |
stehr |
259 |
lastType = thisType; |
130 |
|
|
} |
131 |
stehr |
280 |
lastResSer = resSer; |
132 |
stehr |
259 |
} |
133 |
|
|
// finish last element |
134 |
stehr |
280 |
if(lastType != SecStrucElement.OTHER) { |
135 |
stehr |
259 |
elementCount++; |
136 |
|
|
ssId = new Character(lastType).toString() + new Integer(elementCount).toString(); |
137 |
stehr |
280 |
ssElem = new SecStrucElement(lastType, start, ssTypes.lastKey(), ssId); |
138 |
stehr |
274 |
secondaryStructure.add(ssElem); |
139 |
stehr |
259 |
} |
140 |
stehr |
274 |
secondaryStructure.setComment("DSSP"); |
141 |
stehr |
257 |
} |
142 |
stehr |
259 |
|
143 |
|
|
|
144 |
stehr |
257 |
/** Writes atom lines for this structure to the given output stream */ |
145 |
|
|
private void writeAtomLines(PrintWriter Out) { |
146 |
duarte |
130 |
TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>(); |
147 |
duarte |
206 |
Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'"); |
148 |
duarte |
123 |
for (String resser_atom:resser_atom2atomserial.keySet()){ |
149 |
|
|
int atomserial = resser_atom2atomserial.get(resser_atom); |
150 |
|
|
int res_serial = Integer.parseInt(resser_atom.split("_")[0]); |
151 |
|
|
String atom = resser_atom.split("_")[1]; |
152 |
|
|
String res_type = resser2restype.get(res_serial); |
153 |
duarte |
226 |
Point3d coords = atomser2coord.get(atomserial); |
154 |
stehr |
257 |
String atomType = atom.substring(0,1); |
155 |
|
|
Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords.x, coords.y, coords.z, atomType}; |
156 |
duarte |
130 |
lines.put(atomserial, fields); |
157 |
duarte |
123 |
} |
158 |
duarte |
130 |
for (int atomserial:lines.keySet()){ |
159 |
duarte |
216 |
// Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales) |
160 |
stehr |
257 |
Out.printf(Locale.US,"ATOM %5d %-3s %3s %1s%4d %8.3f%8.3f%8.3f 1.00 0.00 %s\n",lines.get(atomserial)); |
161 |
duarte |
130 |
} |
162 |
duarte |
123 |
Out.println("END"); |
163 |
stehr |
257 |
Out.close(); |
164 |
duarte |
123 |
} |
165 |
stehr |
257 |
|
166 |
|
|
/** |
167 |
|
|
* Dumps coordinate data into a file in pdb format (ATOM lines only) |
168 |
|
|
* The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects |
169 |
|
|
* @param outfile |
170 |
|
|
* @throws IOException |
171 |
|
|
*/ |
172 |
|
|
public void dump2pdbfile(String outfile) throws IOException { |
173 |
|
|
PrintWriter Out = new PrintWriter(new FileOutputStream(outfile)); |
174 |
|
|
writeAtomLines(Out); |
175 |
|
|
} |
176 |
duarte |
123 |
|
177 |
duarte |
237 |
/** |
178 |
|
|
* Dump the full sequence of this Pdb object in fasta file format |
179 |
|
|
* @param seqfile |
180 |
|
|
* @throws IOException |
181 |
|
|
*/ |
182 |
duarte |
123 |
public void dumpseq(String seqfile) throws IOException { |
183 |
|
|
PrintStream Out = new PrintStream(new FileOutputStream(seqfile)); |
184 |
duarte |
206 |
Out.println(">"+pdbCode+"_"+pdbChainCode); |
185 |
duarte |
123 |
Out.println(sequence); |
186 |
|
|
Out.close(); |
187 |
|
|
} |
188 |
|
|
|
189 |
duarte |
256 |
/** |
190 |
stehr |
259 |
* Returns the number of observed standard residues. |
191 |
|
|
* TODO: Refactor method name |
192 |
duarte |
256 |
*/ |
193 |
duarte |
123 |
public int get_length(){ |
194 |
duarte |
278 |
return obsLength; |
195 |
duarte |
123 |
} |
196 |
|
|
|
197 |
stehr |
259 |
/** Returns the number of residues in the sequence of this protein. */ |
198 |
|
|
public int getFullLength() { |
199 |
duarte |
278 |
return fullLength; |
200 |
stehr |
259 |
} |
201 |
|
|
|
202 |
duarte |
237 |
/** |
203 |
duarte |
256 |
* Returns number of (non-Hydrogen) atoms in the protein |
204 |
|
|
* @return |
205 |
|
|
*/ |
206 |
|
|
public int getNumAtoms() { |
207 |
|
|
return atomser2atom.size(); |
208 |
|
|
} |
209 |
|
|
|
210 |
|
|
/** |
211 |
duarte |
237 |
* Gets a TreeMap with atom serials as keys and their coordinates as values for the given contact type |
212 |
|
|
* The contact type can't be a cross contact type, it doesn't make sense here |
213 |
|
|
* @param ct |
214 |
|
|
* @return |
215 |
|
|
*/ |
216 |
duarte |
235 |
private TreeMap<Integer,Point3d> get_coords_for_ct(String ct) { |
217 |
duarte |
226 |
TreeMap<Integer,Point3d> coords = new TreeMap<Integer,Point3d>(); |
218 |
duarte |
123 |
HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct); |
219 |
|
|
for (int resser:resser2restype.keySet()){ |
220 |
|
|
String[] atoms = restype2atoms.get(resser2restype.get(resser)); |
221 |
|
|
for (String atom:atoms){ |
222 |
|
|
if (resser_atom2atomserial.containsKey(resser+"_"+atom)){ |
223 |
|
|
int atomser = resser_atom2atomserial.get(resser+"_"+atom); |
224 |
duarte |
226 |
Point3d coord = atomser2coord.get(atomser); |
225 |
duarte |
123 |
coords.put(atomser, coord); |
226 |
|
|
} |
227 |
|
|
else { |
228 |
duarte |
282 |
//System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser."); |
229 |
duarte |
123 |
} |
230 |
|
|
} |
231 |
duarte |
275 |
// in ct="ALL" we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue) |
232 |
|
|
if (ct.equals("ALL") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){ |
233 |
|
|
int atomser = resser_atom2atomserial.get(resser+"_"+"OXT"); |
234 |
|
|
Point3d coord = atomser2coord.get(atomser); |
235 |
|
|
coords.put(atomser, coord); |
236 |
|
|
} |
237 |
duarte |
123 |
} |
238 |
|
|
return coords; |
239 |
|
|
} |
240 |
duarte |
229 |
|
241 |
|
|
/** |
242 |
duarte |
237 |
* Gets a TreeMap with residue serials+"_"+atomname as keys and their coordinates as values for the given contact type |
243 |
|
|
* The contact type can't be a cross contact type, it doesn't make sense here |
244 |
|
|
* Used in rmsd method |
245 |
|
|
* @param ct |
246 |
|
|
* @return |
247 |
|
|
*/ |
248 |
|
|
private TreeMap<String,Point3d> get_coords_for_ct_4rmsd(String ct) { |
249 |
|
|
TreeMap<String,Point3d> coords = new TreeMap<String,Point3d>(); |
250 |
|
|
HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct); |
251 |
|
|
for (int resser:resser2restype.keySet()){ |
252 |
|
|
String[] atoms = restype2atoms.get(resser2restype.get(resser)); |
253 |
|
|
for (String atom:atoms){ |
254 |
|
|
if (resser_atom2atomserial.containsKey(resser+"_"+atom)){ |
255 |
|
|
int atomser = resser_atom2atomserial.get(resser+"_"+atom); |
256 |
|
|
Point3d coord = atomser2coord.get(atomser); |
257 |
|
|
coords.put(resser+"_"+atom, coord); |
258 |
|
|
} |
259 |
|
|
} |
260 |
duarte |
275 |
// in ct="ALL" we still miss the OXT, we need to add it now if it is there (it will be there when this resser is the last residue) |
261 |
|
|
if (ct.equals("ALL") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){ |
262 |
|
|
int atomser = resser_atom2atomserial.get(resser+"_"+"OXT"); |
263 |
|
|
Point3d coord = atomser2coord.get(atomser); |
264 |
|
|
coords.put(resser+"_"+"OXT", coord); |
265 |
|
|
} |
266 |
duarte |
237 |
} |
267 |
|
|
return coords; |
268 |
|
|
} |
269 |
|
|
|
270 |
|
|
/** |
271 |
duarte |
229 |
* Returns the distance matrix as a HashMap with Contacts (residue serial pairs) as keys |
272 |
|
|
* It doesn't make sense to call this method for multi atom contact |
273 |
|
|
* types (for each residue serial pair there's more than 1 distance) |
274 |
|
|
* Thus before calling this we should check AA.isValidSingleAtomCT(ct) |
275 |
|
|
* @param ct the contact type |
276 |
|
|
* @return |
277 |
|
|
*/ |
278 |
duarte |
234 |
public HashMap<Edge, Double> calculate_dist_matrix(String ct){ |
279 |
|
|
HashMap<Edge,Double> distMatrixAtoms = new HashMap<Edge,Double>(); |
280 |
duarte |
123 |
if (!ct.contains("/")){ |
281 |
duarte |
226 |
TreeMap<Integer,Point3d> coords = get_coords_for_ct(ct); |
282 |
duarte |
123 |
for (int i_atomser:coords.keySet()){ |
283 |
|
|
for (int j_atomser:coords.keySet()){ |
284 |
|
|
if (j_atomser>i_atomser) { |
285 |
duarte |
234 |
Edge pair = new Edge(i_atomser,j_atomser); |
286 |
duarte |
229 |
distMatrixAtoms.put(pair, coords.get(i_atomser).distance(coords.get(j_atomser))); |
287 |
duarte |
123 |
} |
288 |
|
|
} |
289 |
|
|
} |
290 |
|
|
} else { |
291 |
|
|
String i_ct = ct.split("/")[0]; |
292 |
|
|
String j_ct = ct.split("/")[1]; |
293 |
duarte |
226 |
TreeMap<Integer,Point3d> i_coords = get_coords_for_ct(i_ct); |
294 |
|
|
TreeMap<Integer,Point3d> j_coords = get_coords_for_ct(j_ct); |
295 |
duarte |
123 |
for (int i_atomser:i_coords.keySet()){ |
296 |
|
|
for (int j_atomser:j_coords.keySet()){ |
297 |
|
|
if (j_atomser!=i_atomser){ |
298 |
duarte |
234 |
Edge pair = new Edge(i_atomser,j_atomser); |
299 |
duarte |
229 |
distMatrixAtoms.put(pair, i_coords.get(i_atomser).distance(j_coords.get(j_atomser))); |
300 |
duarte |
123 |
} |
301 |
|
|
} |
302 |
|
|
} |
303 |
|
|
} |
304 |
duarte |
229 |
|
305 |
duarte |
234 |
HashMap<Edge,Double> distMatrixRes = new HashMap<Edge, Double>(); |
306 |
|
|
for (Edge cont: distMatrixAtoms.keySet()){ |
307 |
duarte |
229 |
int i_resser = get_resser_from_atomser(cont.i); |
308 |
|
|
int j_resser = get_resser_from_atomser(cont.j); |
309 |
duarte |
234 |
distMatrixRes.put(new Edge(i_resser,j_resser), distMatrixAtoms.get(cont)); |
310 |
duarte |
229 |
} |
311 |
|
|
|
312 |
|
|
return distMatrixRes; |
313 |
duarte |
123 |
} |
314 |
|
|
|
315 |
duarte |
129 |
/** |
316 |
duarte |
226 |
* Get the graph for given contact type and cutoff for this Pdb object. |
317 |
|
|
* Returns a Graph object with the contacts |
318 |
|
|
* We do geometric hashing for fast contact computation (without needing to calculate full distance matrix) |
319 |
|
|
* @param ct |
320 |
|
|
* @param cutoff |
321 |
|
|
* @return |
322 |
|
|
*/ |
323 |
duarte |
228 |
public Graph get_graph(String ct, double cutoff){ |
324 |
|
|
TreeMap<Integer,Point3d> i_coords = null; |
325 |
stehr |
250 |
TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types |
326 |
duarte |
228 |
boolean directed = false; |
327 |
|
|
if (!ct.contains("/")){ |
328 |
|
|
i_coords = get_coords_for_ct(ct); |
329 |
|
|
directed = false; |
330 |
|
|
} else { |
331 |
|
|
String i_ct = ct.split("/")[0]; |
332 |
|
|
String j_ct = ct.split("/")[1]; |
333 |
|
|
i_coords = get_coords_for_ct(i_ct); |
334 |
|
|
j_coords = get_coords_for_ct(j_ct); |
335 |
|
|
directed = true; |
336 |
|
|
} |
337 |
|
|
int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials |
338 |
|
|
int[] j_atomserials = null; |
339 |
duarte |
226 |
|
340 |
|
|
int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A) |
341 |
|
|
|
342 |
|
|
int boxSize = (int) Math.floor(cutoff*SCALE); |
343 |
|
|
|
344 |
|
|
HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>(); |
345 |
|
|
int i=0; |
346 |
duarte |
228 |
for (int i_atomser:i_coords.keySet()){ |
347 |
duarte |
226 |
//coordinates for atom serial atomser, we will use i as its identifier below |
348 |
duarte |
228 |
Point3d coord = i_coords.get(i_atomser); |
349 |
duarte |
226 |
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
350 |
|
|
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
351 |
|
|
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
352 |
|
|
Point3i floor = new Point3i(floorX,floorY,floorZ); |
353 |
|
|
if (boxes.containsKey(floor)){ |
354 |
|
|
// we put the coords for atom i in its corresponding box (identified by floor) |
355 |
duarte |
228 |
boxes.get(floor).put_i_Point(i, coord); |
356 |
|
|
if (!directed){ |
357 |
|
|
boxes.get(floor).put_j_Point(i, coord); |
358 |
|
|
} |
359 |
duarte |
226 |
} else { |
360 |
|
|
Box box = new Box(floor); |
361 |
duarte |
228 |
box.put_i_Point(i, coord); |
362 |
|
|
if (!directed){ |
363 |
|
|
box.put_j_Point(i, coord); |
364 |
|
|
} |
365 |
duarte |
226 |
boxes.put(floor,box); |
366 |
|
|
} |
367 |
duarte |
228 |
i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
368 |
duarte |
226 |
i++; |
369 |
|
|
} |
370 |
duarte |
228 |
int j=0; |
371 |
|
|
if (directed) { |
372 |
|
|
j_atomserials = new int[j_coords.size()]; |
373 |
|
|
for (int j_atomser:j_coords.keySet()){ |
374 |
|
|
//coordinates for atom serial atomser, we will use j as its identifier below |
375 |
|
|
Point3d coord = j_coords.get(j_atomser); |
376 |
|
|
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
377 |
|
|
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
378 |
|
|
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
379 |
|
|
Point3i floor = new Point3i(floorX,floorY,floorZ); |
380 |
|
|
if (boxes.containsKey(floor)){ |
381 |
|
|
// we put the coords for atom j in its corresponding box (identified by floor) |
382 |
|
|
boxes.get(floor).put_j_Point(j, coord); |
383 |
|
|
} else { |
384 |
|
|
Box box = new Box(floor); |
385 |
|
|
box.put_j_Point(j, coord); |
386 |
|
|
boxes.put(floor,box); |
387 |
|
|
} |
388 |
|
|
j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
389 |
|
|
j++; |
390 |
|
|
} |
391 |
|
|
} else { |
392 |
|
|
j_atomserials = i_atomserials; |
393 |
|
|
} |
394 |
|
|
|
395 |
duarte |
226 |
|
396 |
duarte |
282 |
float[][]distMatrix = new float[i_atomserials.length][j_atomserials.length]; |
397 |
duarte |
226 |
|
398 |
|
|
for (Point3i floor:boxes.keySet()){ // for each box |
399 |
|
|
// distances of points within this box |
400 |
duarte |
228 |
boxes.get(floor).getDistancesWithinBox(distMatrix,directed); |
401 |
duarte |
226 |
|
402 |
duarte |
228 |
//TODO should iterate only through half of the neighbours here |
403 |
duarte |
226 |
// distances of points from this box to all neighbouring boxes: 26 iterations (26 neighbouring boxes) |
404 |
|
|
for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){ |
405 |
|
|
for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){ |
406 |
|
|
for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){ |
407 |
|
|
if (!((x==floor.x)&&(y==floor.y)&&(z==floor.z))) { // skip this box |
408 |
|
|
Point3i neighbor = new Point3i(x,y,z); |
409 |
|
|
if (boxes.containsKey(neighbor)){ |
410 |
duarte |
228 |
boxes.get(floor).getDistancesToNeighborBox(boxes.get(neighbor),distMatrix,directed); |
411 |
duarte |
226 |
} |
412 |
|
|
} |
413 |
|
|
} |
414 |
|
|
} |
415 |
|
|
} |
416 |
|
|
} |
417 |
|
|
|
418 |
duarte |
228 |
// getting the contacts (in residue serials) from the atom serials (partial) distance matrix |
419 |
duarte |
234 |
EdgeSet contacts = new EdgeSet(); |
420 |
duarte |
226 |
for (i=0;i<distMatrix.length;i++){ |
421 |
duarte |
228 |
for (j=0;j<distMatrix[i].length;j++){ |
422 |
|
|
// the condition distMatrix[i][j]!=0.0 takes care of skipping several things: |
423 |
|
|
// - diagonal of the matrix in case of undirected |
424 |
|
|
// - lower half of matrix in case of undirected |
425 |
duarte |
235 |
// - cells for which we didn't calculate a distance because the 2 points were not in same or neighbouring boxes (i.e. too far apart) |
426 |
duarte |
282 |
if (distMatrix[i][j]!=0.0f && distMatrix[i][j]<=cutoff){ |
427 |
duarte |
228 |
int i_resser = atomser2resser.get(i_atomserials[i]); |
428 |
|
|
int j_resser = atomser2resser.get(j_atomserials[j]); |
429 |
duarte |
234 |
Edge resser_pair = new Edge(i_resser,j_resser); |
430 |
duarte |
228 |
// for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates |
431 |
duarte |
235 |
if (i_resser!=j_resser){ // duplicates are automatically taken care by the EdgeSet class which is a TreeSet and doesn't allow duplicates |
432 |
duarte |
226 |
contacts.add(resser_pair); |
433 |
|
|
} |
434 |
|
|
} |
435 |
duarte |
228 |
|
436 |
duarte |
226 |
} |
437 |
|
|
} |
438 |
duarte |
228 |
|
439 |
|
|
// creating and returning the graph object |
440 |
duarte |
226 |
TreeMap<Integer,String> nodes = new TreeMap<Integer,String>(); |
441 |
|
|
for (int resser:resser2restype.keySet()){ |
442 |
|
|
nodes.put(resser,resser2restype.get(resser)); |
443 |
|
|
} |
444 |
stehr |
274 |
Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model, secondaryStructure); |
445 |
duarte |
226 |
return graph; |
446 |
|
|
} |
447 |
|
|
|
448 |
stehr |
250 |
public void calcGridDensity(String ct, double cutoff, Map<Integer, Integer> densityCount) { |
449 |
|
|
TreeMap<Integer,Point3d> i_coords = null; |
450 |
|
|
TreeMap<Integer,Point3d> j_coords = null; // only relevant for asymetric edge types |
451 |
|
|
boolean directed = false; |
452 |
|
|
if (!ct.contains("/")){ |
453 |
|
|
i_coords = get_coords_for_ct(ct); |
454 |
|
|
directed = false; |
455 |
|
|
} else { |
456 |
|
|
String i_ct = ct.split("/")[0]; |
457 |
|
|
String j_ct = ct.split("/")[1]; |
458 |
|
|
i_coords = get_coords_for_ct(i_ct); |
459 |
|
|
j_coords = get_coords_for_ct(j_ct); |
460 |
|
|
directed = true; |
461 |
|
|
} |
462 |
|
|
int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials |
463 |
|
|
int[] j_atomserials = null; |
464 |
|
|
|
465 |
|
|
int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A) |
466 |
|
|
|
467 |
|
|
int boxSize = (int) Math.floor(cutoff*SCALE); |
468 |
|
|
|
469 |
|
|
HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>(); |
470 |
|
|
int i=0; |
471 |
|
|
for (int i_atomser:i_coords.keySet()){ |
472 |
|
|
//coordinates for atom serial atomser, we will use i as its identifier below |
473 |
|
|
Point3d coord = i_coords.get(i_atomser); |
474 |
|
|
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
475 |
|
|
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
476 |
|
|
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
477 |
|
|
Point3i floor = new Point3i(floorX,floorY,floorZ); |
478 |
|
|
if (boxes.containsKey(floor)){ |
479 |
|
|
// we put the coords for atom i in its corresponding box (identified by floor) |
480 |
|
|
boxes.get(floor).put_i_Point(i, coord); |
481 |
|
|
if (!directed){ |
482 |
|
|
boxes.get(floor).put_j_Point(i, coord); |
483 |
|
|
} |
484 |
|
|
} else { |
485 |
|
|
Box box = new Box(floor); |
486 |
|
|
box.put_i_Point(i, coord); |
487 |
|
|
if (!directed){ |
488 |
|
|
box.put_j_Point(i, coord); |
489 |
|
|
} |
490 |
|
|
boxes.put(floor,box); |
491 |
|
|
} |
492 |
|
|
i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
493 |
|
|
i++; |
494 |
|
|
} |
495 |
|
|
int j=0; |
496 |
|
|
if (directed) { |
497 |
|
|
j_atomserials = new int[j_coords.size()]; |
498 |
|
|
for (int j_atomser:j_coords.keySet()){ |
499 |
|
|
//coordinates for atom serial atomser, we will use j as its identifier below |
500 |
|
|
Point3d coord = j_coords.get(j_atomser); |
501 |
|
|
int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize)); |
502 |
|
|
int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize)); |
503 |
|
|
int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize)); |
504 |
|
|
Point3i floor = new Point3i(floorX,floorY,floorZ); |
505 |
|
|
if (boxes.containsKey(floor)){ |
506 |
|
|
// we put the coords for atom j in its corresponding box (identified by floor) |
507 |
|
|
boxes.get(floor).put_j_Point(j, coord); |
508 |
|
|
} else { |
509 |
|
|
Box box = new Box(floor); |
510 |
|
|
box.put_j_Point(j, coord); |
511 |
|
|
boxes.put(floor,box); |
512 |
|
|
} |
513 |
|
|
j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered |
514 |
|
|
j++; |
515 |
|
|
} |
516 |
|
|
} else { |
517 |
|
|
j_atomserials = i_atomserials; |
518 |
|
|
} |
519 |
|
|
|
520 |
|
|
for(Point3i floor:boxes.keySet()) { |
521 |
|
|
int size = boxes.get(floor).size(); |
522 |
|
|
if(densityCount.containsKey(size)) { |
523 |
|
|
int old = densityCount.get(size); |
524 |
|
|
densityCount.put(size, ++old); |
525 |
|
|
} else { |
526 |
|
|
densityCount.put(size, 1); |
527 |
|
|
} |
528 |
|
|
} |
529 |
|
|
} |
530 |
|
|
|
531 |
duarte |
190 |
public int get_resser_from_pdbresser (String pdbresser){ |
532 |
|
|
return pdbresser2resser.get(pdbresser); |
533 |
|
|
} |
534 |
|
|
|
535 |
|
|
public String get_pdbresser_from_resser (int resser){ |
536 |
|
|
return resser2pdbresser.get(resser); |
537 |
|
|
} |
538 |
|
|
|
539 |
duarte |
200 |
public int get_resser_from_atomser(int atomser){ |
540 |
|
|
return atomser2resser.get(atomser); |
541 |
|
|
} |
542 |
duarte |
206 |
|
543 |
duarte |
275 |
public String getPdbCode() { |
544 |
|
|
return this.pdbCode; |
545 |
|
|
} |
546 |
|
|
|
547 |
duarte |
206 |
public String getChainCode(){ |
548 |
|
|
return this.chainCode; |
549 |
|
|
} |
550 |
|
|
|
551 |
|
|
public String getPdbChainCode(){ |
552 |
|
|
return this.pdbChainCode; |
553 |
|
|
} |
554 |
stehr |
274 |
|
555 |
|
|
// secondary structure related methods |
556 |
duarte |
219 |
|
557 |
stehr |
274 |
/** |
558 |
|
|
* Returns true if secondary structure information is available, false otherwise. |
559 |
stehr |
259 |
*/ |
560 |
stehr |
257 |
public boolean hasSecondaryStructure() { |
561 |
stehr |
274 |
return !this.secondaryStructure.isEmpty(); |
562 |
stehr |
257 |
} |
563 |
|
|
|
564 |
stehr |
274 |
/** |
565 |
|
|
* Returns the secondary structure annotation object of this graph. |
566 |
|
|
*/ |
567 |
|
|
public SecondaryStructure getSecondaryStructure() { |
568 |
|
|
return this.secondaryStructure; |
569 |
stehr |
259 |
} |
570 |
|
|
|
571 |
stehr |
274 |
// end of secondary structure related methods |
572 |
|
|
|
573 |
duarte |
235 |
/** |
574 |
|
|
* Calculates rmsd (on atoms given by ct) of this Pdb object to otherPdb object |
575 |
duarte |
237 |
* Both objects must represent structures with same sequence (save unobserved residues or missing atoms) |
576 |
|
|
* |
577 |
duarte |
235 |
* @param otherPdb |
578 |
duarte |
237 |
* @param ct the contact type (crossed contact types don't make sense here) |
579 |
duarte |
235 |
* @return |
580 |
duarte |
237 |
* @throws ConformationsNotSameSizeError |
581 |
duarte |
235 |
*/ |
582 |
duarte |
237 |
public double rmsd(Pdb otherPdb, String ct) throws ConformationsNotSameSizeError { |
583 |
|
|
TreeMap<String,Point3d> thiscoords = this.get_coords_for_ct_4rmsd(ct); |
584 |
|
|
TreeMap<String,Point3d> othercoords = otherPdb.get_coords_for_ct_4rmsd(ct); |
585 |
duarte |
235 |
|
586 |
duarte |
237 |
// there might be unobserved residues or some missing atoms for a residue |
587 |
|
|
// here we get the ones that are in common |
588 |
|
|
ArrayList<String> common = new ArrayList<String>(); |
589 |
|
|
for (String resser_atom:thiscoords.keySet()){ |
590 |
|
|
if (othercoords.containsKey(resser_atom)){ |
591 |
|
|
common.add(resser_atom); |
592 |
|
|
} |
593 |
|
|
} |
594 |
|
|
|
595 |
duarte |
235 |
// converting the TreeMaps to Vector3d arrays (input format for calculate_rmsd) |
596 |
duarte |
237 |
Vector3d[] conformation1 = new Vector3d[common.size()]; |
597 |
|
|
Vector3d[] conformation2 = new Vector3d[common.size()]; |
598 |
|
|
int i = 0; |
599 |
|
|
for (String resser_atom:common){ |
600 |
|
|
conformation1[i] = new Vector3d(thiscoords.get(resser_atom).x,thiscoords.get(resser_atom).y,thiscoords.get(resser_atom).z); |
601 |
|
|
conformation2[i] = new Vector3d(othercoords.get(resser_atom).x,othercoords.get(resser_atom).y,othercoords.get(resser_atom).z); |
602 |
duarte |
235 |
i++; |
603 |
|
|
} |
604 |
duarte |
237 |
|
605 |
|
|
// this as well as calculating the rmsd, changes conformation1 and conformation2 to be optimally superposed |
606 |
duarte |
235 |
double rmsd = calculate_rmsd(conformation1, conformation2); |
607 |
|
|
|
608 |
|
|
// // printing out individual distances (conformation1 and conformation2 are now optimally superposed) |
609 |
|
|
// for (i=0;i<conformation1.length;i++){ |
610 |
|
|
// Point3d point1 = new Point3d(conformation1[i].x,conformation1[i].y,conformation1[i].z); |
611 |
|
|
// Point3d point2 = new Point3d(conformation2[i].x,conformation2[i].y,conformation2[i].z); |
612 |
|
|
// System.out.println(point1.distance(point2)); |
613 |
|
|
// } |
614 |
|
|
|
615 |
|
|
return rmsd; |
616 |
|
|
|
617 |
|
|
} |
618 |
|
|
|
619 |
|
|
/** |
620 |
|
|
* Calculates the RMSD between two conformations. |
621 |
|
|
* conformation1: Vector3d array (matrix of dimensions [N,3]) |
622 |
|
|
* conformation2: Vector3d array (matrix of dimensions [N,3]) |
623 |
|
|
* |
624 |
|
|
* Both conformation1 and conformation2 are modified to be optimally superposed |
625 |
|
|
* |
626 |
|
|
* Implementation taken (python) from http://bosco.infogami.com/Root_Mean_Square_Deviation, |
627 |
|
|
* then ported to java using Jama matrix package |
628 |
|
|
* (page has moved to: http://boscoh.com/protein/rmsd-root-mean-square-deviation) |
629 |
|
|
* @param conformation1 |
630 |
|
|
* @param conformation2 |
631 |
|
|
* @return |
632 |
duarte |
237 |
* @throws ConformationsNotSameSizeError |
633 |
duarte |
235 |
*/ |
634 |
duarte |
237 |
private double calculate_rmsd(Vector3d[] conformation1, Vector3d[] conformation2) throws ConformationsNotSameSizeError{ |
635 |
duarte |
235 |
if (conformation1.length!=conformation2.length) { |
636 |
duarte |
237 |
//System.err.println("Conformations not the same size"); |
637 |
|
|
throw new ConformationsNotSameSizeError( |
638 |
|
|
"Given conformations have different size: conformation1: "+conformation1.length+", conformation2: "+conformation2.length); |
639 |
duarte |
235 |
} |
640 |
|
|
int n_vec = conformation1.length; |
641 |
|
|
|
642 |
|
|
// 1st we bring both conformations to the same centre by subtracting their respectives centres |
643 |
|
|
Vector3d center1 = new Vector3d(); |
644 |
|
|
Vector3d center2 = new Vector3d(); |
645 |
|
|
for (int i=0;i<n_vec;i++){ // summing all vectors in each conformation |
646 |
|
|
center1.add(conformation1[i]); |
647 |
|
|
center2.add(conformation2[i]); |
648 |
|
|
} |
649 |
|
|
// dividing by n_vec (average) |
650 |
|
|
center1.scale((double)1/n_vec); |
651 |
|
|
center2.scale((double)1/n_vec); |
652 |
|
|
// translating our conformations to the same coordinate system by subtracting centers |
653 |
|
|
for (Vector3d vec:conformation1){ |
654 |
|
|
vec.sub(center1); |
655 |
|
|
} |
656 |
|
|
for (Vector3d vec:conformation2){ |
657 |
|
|
vec.sub(center2); |
658 |
|
|
} |
659 |
|
|
|
660 |
|
|
//E0: initial sum of squared lengths of both conformations |
661 |
|
|
double sum1 = 0.0; |
662 |
|
|
double sum2 = 0.0; |
663 |
|
|
for (int i=0;i<n_vec;i++){ |
664 |
|
|
sum1 += conformation1[i].lengthSquared(); |
665 |
|
|
sum2 += conformation2[i].lengthSquared(); |
666 |
|
|
} |
667 |
|
|
double E0 = sum1 + sum2; |
668 |
|
|
|
669 |
|
|
// singular value decomposition |
670 |
|
|
Matrix vecs1 = vector3dAr2matrix(conformation1); |
671 |
|
|
Matrix vecs2 = vector3dAr2matrix(conformation2); |
672 |
|
|
|
673 |
|
|
Matrix correlation_matrix = vecs2.transpose().times(vecs1); //gives a 3x3 matrix |
674 |
|
|
|
675 |
|
|
SingularValueDecomposition svd = correlation_matrix.svd(); |
676 |
|
|
Matrix U = svd.getU(); |
677 |
|
|
Matrix V_trans = svd.getV().transpose(); |
678 |
|
|
double[] singularValues = svd.getSingularValues(); |
679 |
|
|
|
680 |
|
|
boolean is_reflection = false; |
681 |
|
|
if (U.det()*V_trans.det()<0.0){ |
682 |
|
|
is_reflection = true; |
683 |
|
|
} |
684 |
|
|
if (is_reflection){ |
685 |
|
|
// reflect along smallest principal axis: |
686 |
|
|
// we change sign of last coordinate (smallest singular value) |
687 |
duarte |
236 |
singularValues[singularValues.length-1]=(-1)*singularValues[singularValues.length-1]; |
688 |
duarte |
235 |
} |
689 |
|
|
|
690 |
|
|
// getting sum of singular values |
691 |
|
|
double sumSV = 0.0; |
692 |
|
|
for (int i=0;i<singularValues.length;i++){ |
693 |
|
|
sumSV += singularValues[i]; |
694 |
|
|
} |
695 |
|
|
|
696 |
|
|
// rmsd square: Kabsch formula |
697 |
|
|
double rmsd_sq = (E0 - 2.0*sumSV)/((double) n_vec); |
698 |
|
|
rmsd_sq = Math.max(rmsd_sq, 0.0); |
699 |
|
|
|
700 |
|
|
// finally we modify conformation2 to be aligned to conformation1 |
701 |
|
|
if (is_reflection) { // first we check if we are in is_reflection case: we need to change sign to last row of U |
702 |
|
|
for (int j=0;j<U.getColumnDimension();j++){ |
703 |
|
|
// we change sign to last row of U |
704 |
|
|
int lastRow = U.getRowDimension()-1; |
705 |
|
|
U.set(lastRow, j, (-1)*U.get(lastRow,j)); |
706 |
|
|
} |
707 |
|
|
} |
708 |
|
|
Matrix optimal_rotation = U.times(V_trans); |
709 |
|
|
Matrix conf2 = vecs2.times(optimal_rotation); |
710 |
|
|
for (int i=0;i<n_vec;i++){ |
711 |
|
|
conformation2[i].x = conf2.get(i,0); |
712 |
|
|
conformation2[i].y = conf2.get(i,1); |
713 |
|
|
conformation2[i].z = conf2.get(i,2); |
714 |
|
|
} |
715 |
|
|
|
716 |
|
|
return Math.sqrt(rmsd_sq); |
717 |
|
|
} |
718 |
|
|
|
719 |
|
|
/** Gets a Jama.Matrix object from a Vector3d[] (deep copies) */ |
720 |
|
|
private Matrix vector3dAr2matrix(Vector3d[] vecArray) { |
721 |
|
|
double[][] array = new double[vecArray.length][3]; |
722 |
|
|
for (int i=0;i<vecArray.length;i++){ |
723 |
|
|
vecArray[i].get(array[i]); |
724 |
|
|
} |
725 |
|
|
return new Matrix(array); |
726 |
|
|
} |
727 |
|
|
|
728 |
duarte |
123 |
} |