1 |
duarte |
796 |
package embed; |
2 |
|
|
|
3 |
duarte |
797 |
import java.util.ArrayList; |
4 |
|
|
import java.util.Collections; |
5 |
|
|
import java.util.HashMap; |
6 |
duarte |
796 |
import java.util.Random; |
7 |
|
|
import java.util.TreeMap; |
8 |
|
|
|
9 |
|
|
import javax.vecmath.Vector3d; |
10 |
|
|
|
11 |
|
|
import org.apache.commons.collections15.Transformer; |
12 |
|
|
|
13 |
|
|
import Jama.Matrix; |
14 |
|
|
|
15 |
|
|
import edu.uci.ics.jung.algorithms.shortestpath.DijkstraDistance; |
16 |
duarte |
797 |
import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath; |
17 |
duarte |
796 |
import edu.uci.ics.jung.graph.SparseGraph; |
18 |
|
|
import edu.uci.ics.jung.graph.util.EdgeType; |
19 |
|
|
import edu.uci.ics.jung.graph.util.Pair; |
20 |
|
|
import proteinstructure.AAinfo; |
21 |
|
|
import proteinstructure.Pdb; |
22 |
|
|
import proteinstructure.PdbasePdb; |
23 |
|
|
import proteinstructure.RIGEdge; |
24 |
|
|
import proteinstructure.RIGNode; |
25 |
|
|
import proteinstructure.RIGraph; |
26 |
|
|
|
27 |
|
|
/** |
28 |
|
|
* Implementation of the bounds smoothing part of the EMBED algorithm of Crippen and Havel |
29 |
|
|
* Given a sparse set of distance ranges between a set of atoms it finds distance bounds for |
30 |
|
|
* all pairs of atoms, using the triangle inequality. |
31 |
|
|
* |
32 |
|
|
* Taken from "Distance Geometry: Theory, Algorithms, and Chemical Applications" (section 3.1) by T.F. Havel, |
33 |
|
|
* in Encyclopedia of Computational Chemistry (Wiley, New York, 1998). |
34 |
duarte |
804 |
* See also: |
35 |
|
|
* "Distance Geometry and Molecular Conformation" (Chapter 5) by G.M. Crippen and T.F. Havel (Wiley) |
36 |
|
|
* "Sampling and efficiency of metric matrix distance geometry: A novel partial |
37 |
|
|
* metrization algorithm", Kuszewski J, Nilges M, Bruenger AT, 1992, Journal of Biomolecular NMR |
38 |
duarte |
796 |
* |
39 |
|
|
* @author duarte |
40 |
|
|
* |
41 |
|
|
*/ |
42 |
|
|
public class BoundsSmoother { |
43 |
|
|
|
44 |
duarte |
800 |
protected static final double BB_CA_DIST = 3.8; |
45 |
duarte |
799 |
private static final double HARD_SPHERES_BOUND = AAinfo.DIST_MIN_CA ; |
46 |
duarte |
800 |
|
47 |
|
|
private static final boolean DEBUG = false; |
48 |
duarte |
802 |
private static final long DEBUG_SEED = 123456; |
49 |
|
|
|
50 |
duarte |
804 |
private static final int NUM_ROOTS_PARTIAL_METRIZATION = 4; // we choose 4 as in Kuszewski et al. |
51 |
duarte |
796 |
|
52 |
|
|
/*----------------- helper classes and transformers -----------------*/ |
53 |
|
|
|
54 |
duarte |
797 |
Transformer<SimpleEdge, Number> WeightTransformer = new Transformer<SimpleEdge, Number>() { |
55 |
|
|
public Number transform(SimpleEdge input) { |
56 |
duarte |
796 |
return input.weight; |
57 |
|
|
} |
58 |
|
|
}; |
59 |
|
|
|
60 |
|
|
private class SimpleEdge { |
61 |
|
|
public double weight; |
62 |
|
|
public SimpleEdge(double weight) { |
63 |
|
|
this.weight = weight; |
64 |
|
|
} |
65 |
|
|
} |
66 |
duarte |
797 |
|
67 |
|
|
private class BoundsDigraphNode { |
68 |
|
|
public static final boolean LEFT = true; |
69 |
|
|
public static final boolean RIGHT = false; |
70 |
|
|
private boolean side; // true: left, false: right |
71 |
|
|
private int resSerial; |
72 |
|
|
public BoundsDigraphNode(int resSerial, boolean side) { |
73 |
|
|
this.resSerial = resSerial; |
74 |
|
|
this.side = side; |
75 |
|
|
} |
76 |
|
|
public boolean isRight() { |
77 |
|
|
return !side; |
78 |
|
|
} |
79 |
|
|
public boolean isLeft() { |
80 |
|
|
return side; |
81 |
|
|
} |
82 |
|
|
public int getSerial() { |
83 |
|
|
return resSerial; |
84 |
|
|
} |
85 |
|
|
public boolean equals(Object other) { |
86 |
|
|
if (! (other instanceof BoundsDigraphNode)) return false; |
87 |
|
|
BoundsDigraphNode otherNode = (BoundsDigraphNode)other; |
88 |
|
|
if (otherNode.resSerial==this.resSerial && otherNode.side == this.side) { |
89 |
|
|
return true; |
90 |
|
|
} else { |
91 |
|
|
return false; |
92 |
|
|
} |
93 |
|
|
} |
94 |
|
|
public String toString() { |
95 |
|
|
return resSerial+(side?"L":"R"); |
96 |
|
|
} |
97 |
|
|
} |
98 |
duarte |
796 |
|
99 |
|
|
/*---------------------- member variables ----------------------------*/ |
100 |
|
|
|
101 |
|
|
private RIGraph rig; |
102 |
duarte |
802 |
private TreeMap<Integer,Integer> idx2resser; |
103 |
duarte |
796 |
private int conformationSize; |
104 |
duarte |
797 |
private HashMap<Boolean, HashMap<Integer,BoundsDigraphNode>> nodesBoundsDigraph; // map of serial/side to nodes in the bounds digraph |
105 |
|
|
private double lmax; // maximum of the lower bounds: offset value for the boundsDigraph (not to have negative weights so that we can use Dijkstra's algo) |
106 |
duarte |
796 |
|
107 |
duarte |
804 |
private Bound[][] bounds; // the bounds (half-)matrix with lower/upper bounds for all pairs (or with nulls for pairs without assigned bounds yet) |
108 |
|
|
|
109 |
duarte |
802 |
private Random rand; // the random generator for sampleBounds and metrize |
110 |
|
|
|
111 |
duarte |
796 |
/*------------------------ constructors ------------------------------*/ |
112 |
|
|
|
113 |
|
|
/** |
114 |
|
|
* Constructs a new BoundsSmoother object given a RIGraph. |
115 |
|
|
* The RIGraph is converted into a set of distance ranges using the cutoff |
116 |
|
|
* as upper limit and hard-spheres as lower limit. |
117 |
|
|
* @param graph |
118 |
|
|
*/ |
119 |
|
|
public BoundsSmoother(RIGraph graph) { |
120 |
|
|
this.rig = graph; |
121 |
|
|
this.conformationSize = this.rig.getObsLength(); |
122 |
duarte |
802 |
if (DEBUG) { |
123 |
|
|
rand = new Random(DEBUG_SEED); |
124 |
|
|
} else { |
125 |
|
|
rand = new Random(); |
126 |
|
|
} |
127 |
duarte |
796 |
} |
128 |
|
|
|
129 |
|
|
/*----------------------- public methods ----------------------------*/ |
130 |
|
|
|
131 |
|
|
/** |
132 |
|
|
* Computes bounds for all pairs, based on the set of sparse distance ranges |
133 |
duarte |
804 |
* The returned array is a new array not the reference to the internal bounds array. |
134 |
duarte |
796 |
* @return a 2-dimensional array with the bounds for all pairs of residues, the |
135 |
|
|
* indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)} |
136 |
|
|
* and are guaranteed to be in the same order as the residue serials. |
137 |
|
|
*/ |
138 |
|
|
public Bound[][] getBoundsAllPairs() { |
139 |
duarte |
804 |
convertRIGraphToBoundsMatrix(this.rig); // this initializes the bounds array |
140 |
|
|
computeTriangleInequality(); |
141 |
|
|
return copyBounds(bounds); // we return a copy of the internal bounds array so that we can keep modifying it without side-effects to the returned reference |
142 |
duarte |
796 |
} |
143 |
duarte |
798 |
|
144 |
duarte |
796 |
/** |
145 |
duarte |
804 |
* Gets a random sample from the internal bounds array of all pairs distance ranges |
146 |
duarte |
802 |
* @return a simmetric metric matrix (both sides filled) |
147 |
duarte |
804 |
* @throws NullPointerException if the internal bounds array doesn't contain bounds for all pairs |
148 |
duarte |
796 |
*/ |
149 |
duarte |
804 |
public Matrix sampleBounds() { |
150 |
duarte |
796 |
double[][] matrix = new double[conformationSize][conformationSize]; |
151 |
|
|
for (int i=0;i<conformationSize;i++) { |
152 |
|
|
for (int j=0;j<conformationSize;j++) { |
153 |
|
|
if (j>i) { |
154 |
|
|
matrix[i][j]= bounds[i][j].lower+rand.nextDouble()*(bounds[i][j].upper-bounds[i][j].lower); |
155 |
|
|
} else if (j<i) { |
156 |
|
|
matrix[i][j]=matrix[j][i]; |
157 |
|
|
} |
158 |
|
|
} |
159 |
|
|
} |
160 |
|
|
return new Matrix(matrix); |
161 |
|
|
} |
162 |
duarte |
798 |
|
163 |
duarte |
803 |
/** |
164 |
duarte |
804 |
* Performs partial metrization for the internal bounds array. |
165 |
|
|
* The internal bounds array is updated with the new bounds after metrization. |
166 |
|
|
* The idea is that metrization doesn't need to be done for all atoms but only |
167 |
|
|
* for a handful of them (called roots). This results in a much faster algorithm having |
168 |
|
|
* almost the same sampling properties as full metrization. |
169 |
|
|
* See "Sampling and efficiency of metric matrix distance geometry: A novel partial |
170 |
|
|
* metrization algorithm", Kuszewski J, Nilges M, Bruenger AT, 1992, Journal of Biomolecular NMR |
171 |
duarte |
803 |
* @return |
172 |
|
|
*/ |
173 |
duarte |
804 |
public Matrix metrize() { |
174 |
duarte |
803 |
|
175 |
duarte |
802 |
ArrayList<Integer> roots = new ArrayList<Integer>(); |
176 |
|
|
for (int count=1;count<=NUM_ROOTS_PARTIAL_METRIZATION;count++) { |
177 |
|
|
int root = rand.nextInt(conformationSize); |
178 |
duarte |
804 |
if (roots.contains(root)) { |
179 |
|
|
root = rand.nextInt(conformationSize); |
180 |
|
|
} |
181 |
|
|
if (roots.contains(root)) { |
182 |
|
|
System.err.println("Warning: repeated root atom while doing metrization: "+root); |
183 |
|
|
} |
184 |
duarte |
803 |
if (DEBUG) System.out.println("Picked root: "+root); |
185 |
duarte |
802 |
roots.add(root); |
186 |
duarte |
804 |
sampleBoundForRoot(root); // this alters directly the input bounds array |
187 |
|
|
computeTriangleInequality(); |
188 |
duarte |
802 |
} |
189 |
|
|
|
190 |
duarte |
803 |
// finally pick a value at random for all the other bounds |
191 |
duarte |
804 |
return sampleBounds(); |
192 |
duarte |
802 |
} |
193 |
|
|
|
194 |
duarte |
796 |
/** |
195 |
|
|
* Maps from residue serials to indices of the matrices returned by {@link #getBoundsAllPairs()} and |
196 |
|
|
* {@link #sampleBounds(Bound[][])} |
197 |
|
|
* @param idx |
198 |
|
|
* @return |
199 |
|
|
*/ |
200 |
|
|
public int getResserFromIdx(int idx) { |
201 |
duarte |
802 |
return idx2resser.get(idx); |
202 |
duarte |
796 |
} |
203 |
|
|
|
204 |
|
|
/*----------------------- private methods ---------------------------*/ |
205 |
|
|
|
206 |
duarte |
804 |
/** |
207 |
|
|
* For given root atom samples a value from the distance ranges of the root |
208 |
|
|
* to all of its neighbours (updating the internal bounds array with the new sampled bounds) |
209 |
|
|
* @param root |
210 |
|
|
*/ |
211 |
|
|
private void sampleBoundForRoot(int root) { |
212 |
duarte |
803 |
for (int neighb=0;neighb<conformationSize;neighb++) { |
213 |
|
|
if (neighb==root) continue; // avoid the diagonal (which contains a null Bound) |
214 |
duarte |
802 |
int i = root; |
215 |
duarte |
803 |
int j = neighb; |
216 |
|
|
if (neighb<root) { |
217 |
|
|
i = neighb; |
218 |
duarte |
802 |
j = root; |
219 |
|
|
} |
220 |
|
|
double sampledValue = bounds[i][j].lower+rand.nextDouble()*(bounds[i][j].upper-bounds[i][j].lower); |
221 |
duarte |
803 |
bounds[i][j].lower = sampledValue; |
222 |
|
|
bounds[i][j].upper = sampledValue; |
223 |
|
|
if (DEBUG) System.out.print(bounds[i][j]); |
224 |
duarte |
802 |
} |
225 |
duarte |
803 |
if (DEBUG) System.out.println("\n"); |
226 |
duarte |
802 |
} |
227 |
|
|
|
228 |
duarte |
796 |
/** |
229 |
duarte |
804 |
* Computes bounds for all pairs through triangle inequalities from the bounds member variable |
230 |
|
|
* containing a set of lower/upper bounds (sparse or full) |
231 |
|
|
* The bounds array is modified to contain the new bounds. |
232 |
|
|
* This is guaranteed to run in o(nmlog(n)), thus if the input is sparse then it's pretty fast: o(n2log(n)) but if |
233 |
|
|
* the input is the full set of bounds then we have o(n3log(n)). |
234 |
duarte |
798 |
*/ |
235 |
duarte |
804 |
private void computeTriangleInequality() { |
236 |
duarte |
803 |
double MARGIN = 0.0001; // for comparing doubles we need some tolerance value |
237 |
duarte |
804 |
|
238 |
|
|
double[][] lowerBounds = getLowerBoundsAllPairs(); |
239 |
|
|
double[][] upperBounds = getUpperBoundsAllPairs(); |
240 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
241 |
|
|
for (int j=i+1;j<conformationSize;j++) { |
242 |
duarte |
798 |
double upperBound = upperBounds[i][j]; |
243 |
|
|
double lowerBound = lowerBounds[i][j]; |
244 |
duarte |
804 |
|
245 |
|
|
if (bounds[i][j]!=null) { |
246 |
|
|
if (lowerBound>upperBound+MARGIN) { |
247 |
|
|
//System.err.println("old: "+sparseBounds[i][j]+" new: "+new Bound(lowerBound,upperBound)); |
248 |
|
|
|
249 |
|
|
// During metrization sometimes a new upper bound is found that is below the new lower bound |
250 |
|
|
// (actually in these cases the new lower bound coincides with the old one i.e. nothing new was |
251 |
|
|
// found through triangle inequality for the lower bound). |
252 |
|
|
// For some reason it doesn't happen the other way around: a new lower bound found that is |
253 |
|
|
// above the new (coinciding with old) upper bound. I suppose this is because the triangle inequality |
254 |
|
|
// "is a lot more effective at reducing the upper bounds than increasing the lower bounds" (quoting Havel) |
255 |
|
|
// To correct this we set both lower and upper to the newly found upper, i.e. we assume that |
256 |
|
|
// the new upper bound is better because is in accordance to the triangle inequality |
257 |
|
|
lowerBound=upperBound; |
258 |
|
|
|
259 |
|
|
//if (upperBound<sparseBounds[i][j].upper-MARGIN) |
260 |
|
|
// System.err.printf("new upper bound (%4.1f) for pair %3d %3d is smaller than old upper bound (%4.1f)\n",upperBound,i,j,sparseBounds[i][j].upper); |
261 |
|
|
//if (lowerBound>sparseBounds[i][j].lower+MARGIN) |
262 |
|
|
// System.err.printf("new lower bound (%4.1f) for pair %3d %3d is bigger than old lower bound (%4.1f)\n",lowerBound,i,j,sparseBounds[i][j].lower); |
263 |
|
|
} |
264 |
|
|
|
265 |
|
|
bounds[i][j].lower = lowerBound; |
266 |
|
|
bounds[i][j].upper = upperBound; |
267 |
|
|
} else { |
268 |
|
|
bounds[i][j] = new Bound(lowerBound,upperBound); |
269 |
duarte |
803 |
} |
270 |
duarte |
802 |
// sanity check: lower bounds can't be bigger than upper bounds! |
271 |
duarte |
803 |
if (lowerBound>upperBound+MARGIN) { |
272 |
duarte |
802 |
System.err.printf("Warning: lower bound (%4.1f) for pair "+i+" "+j+" is bigger than upper bound (%4.1f)\n",lowerBound,upperBound); |
273 |
duarte |
798 |
} |
274 |
|
|
} |
275 |
|
|
} |
276 |
|
|
} |
277 |
|
|
|
278 |
|
|
/** |
279 |
duarte |
804 |
* Computes upper bounds for all pairs from a set of upper bounds (sparse or full) based |
280 |
duarte |
796 |
* on the triangle inequality. |
281 |
|
|
* The computation is actually just an all pairs shortest path using Dijkstra's |
282 |
|
|
* shortest path algorithm (as the set of distance coming from contact maps is |
283 |
|
|
* very sparse this algorithm should be more efficient than Floyd's: Dijkstra's is |
284 |
|
|
* o(nm logn) and Floyd's is o(n3)) |
285 |
|
|
* @return a 2-dimensional array with the upper bounds for all pairs of residues, the |
286 |
|
|
* indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)} |
287 |
|
|
* and are guaranteed to be in the same order as the residue serials. |
288 |
|
|
*/ |
289 |
duarte |
804 |
private double[][] getUpperBoundsAllPairs() { |
290 |
duarte |
796 |
double[][] upperBoundsMatrix = new double[conformationSize][conformationSize]; |
291 |
duarte |
804 |
SparseGraph<Integer,SimpleEdge> upperBoundGraph = convertBoundsMatrixToUpperBoundGraph(); |
292 |
duarte |
796 |
DijkstraDistance<Integer, SimpleEdge> dd = new DijkstraDistance<Integer, SimpleEdge>(upperBoundGraph,WeightTransformer); |
293 |
duarte |
802 |
|
294 |
|
|
for (int i=0;i<conformationSize;i++) { |
295 |
|
|
for (int j=i+1;j<conformationSize;j++) { |
296 |
|
|
upperBoundsMatrix[i][j] = dd.getDistance(i, j).doubleValue(); |
297 |
duarte |
796 |
} |
298 |
|
|
} |
299 |
|
|
return upperBoundsMatrix; |
300 |
|
|
} |
301 |
|
|
|
302 |
|
|
/** |
303 |
duarte |
804 |
* Computes lower bounds for all pairs given a set of upper/lower bounds (sparse or full) |
304 |
|
|
* based on the triangle inequality. |
305 |
duarte |
796 |
* |
306 |
duarte |
797 |
* NOTE that because we use Dijkstra's algorithm for the computation of the shortest paths, we can't use negative |
307 |
|
|
* weights (see http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm). Because of this the boundsDigraph result |
308 |
duarte |
804 |
* of calling {@link #convertBoundsMatrixToBoundsDigraph()}} have values offset with the maximum lower bound (lmax). |
309 |
duarte |
797 |
* Thus after computing the shortest paths we have to revert back that offset by counting the number of hops the shortest path has. |
310 |
|
|
* |
311 |
duarte |
796 |
* @return a 2-dimensional array with the lower bounds for all pairs of residues, the |
312 |
|
|
* indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)} |
313 |
|
|
* and are guaranteed to be in the same order as the residue serials. |
314 |
duarte |
797 |
* |
315 |
duarte |
804 |
* @see {@link #convertBoundsMatrixToBoundsDigraph()} and {@link #getUpperBoundsAllPairs()} |
316 |
duarte |
796 |
*/ |
317 |
duarte |
804 |
private double[][] getLowerBoundsAllPairs() { |
318 |
duarte |
797 |
double[][] lowerBoundsMatrix = new double[conformationSize][conformationSize]; |
319 |
|
|
// this is the bounds digraph as described by Crippen and Havel |
320 |
duarte |
804 |
SparseGraph<BoundsDigraphNode,SimpleEdge> boundsDigraph = convertBoundsMatrixToBoundsDigraph(); |
321 |
duarte |
797 |
DijkstraShortestPath<BoundsDigraphNode, SimpleEdge> dd = new DijkstraShortestPath<BoundsDigraphNode, SimpleEdge>(boundsDigraph,WeightTransformer); |
322 |
duarte |
802 |
|
323 |
|
|
for (int i=0;i<conformationSize;i++) { |
324 |
|
|
for (int j=i+1;j<conformationSize;j++) { |
325 |
|
|
int hops = dd.getPath(nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j)).size(); |
326 |
|
|
double lower = Math.abs( |
327 |
duarte |
797 |
(dd.getDistance(nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), |
328 |
duarte |
802 |
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j) |
329 |
|
|
).doubleValue()) |
330 |
duarte |
797 |
- (hops*lmax)); // the lower limit for the triangle inequality is: Math.abs(shortestpath-(hops*lmax)) |
331 |
duarte |
802 |
lowerBoundsMatrix[i][j] = Math.max(lower, HARD_SPHERES_BOUND); // we only set the new lower bound to the one found if is above the HARD_SPHERES_BOUND |
332 |
duarte |
796 |
} |
333 |
|
|
} |
334 |
duarte |
797 |
return lowerBoundsMatrix; |
335 |
duarte |
796 |
|
336 |
|
|
} |
337 |
|
|
|
338 |
|
|
/** |
339 |
duarte |
802 |
* Converts the bounds matrix to a graph with only the upper bounds: nodes indices of bounds |
340 |
|
|
* matrix, edges upper bounds (in SimpleEdge objects containing the upper bound value |
341 |
duarte |
796 |
* in their weight field). |
342 |
duarte |
802 |
* @param bounds |
343 |
duarte |
796 |
* @return |
344 |
|
|
*/ |
345 |
duarte |
804 |
private SparseGraph<Integer,SimpleEdge> convertBoundsMatrixToUpperBoundGraph() { |
346 |
duarte |
796 |
SparseGraph<Integer,SimpleEdge> upperBoundGraph = new SparseGraph<Integer, SimpleEdge>(); |
347 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
348 |
|
|
for (int j=0;j<conformationSize;j++) { |
349 |
|
|
if (bounds[i][j]!=null) { |
350 |
|
|
upperBoundGraph.addEdge(new SimpleEdge(bounds[i][j].upper), i, j, EdgeType.UNDIRECTED); |
351 |
|
|
} |
352 |
|
|
} |
353 |
duarte |
796 |
} |
354 |
|
|
return upperBoundGraph; |
355 |
|
|
} |
356 |
duarte |
797 |
|
357 |
duarte |
796 |
/** |
358 |
duarte |
797 |
* Constructs a bounds digraph (as described by Crippen and Havel) to compute the triangle inequality limits |
359 |
|
|
* for the lower bounds. |
360 |
|
|
* The graph is composed by 2 subgraphs (we call them left and right) each of them containing the set of all atoms. |
361 |
|
|
* Within the subgraphs there is an undirected edge between atoms i,j with weight the upper bounds for i,j |
362 |
|
|
* Between the subgraphs there is a directed edge from left to right between atoms i(left) to j(right) |
363 |
|
|
* with weight the negative of the lower bound i,j |
364 |
|
|
* NOTE that because we use Dijkstra's algorithm for the computation of the shortest paths, we can't use negative |
365 |
|
|
* weights (see http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm). Thus we offset the values here to the maximum lower bound. |
366 |
|
|
* After computing the shortest paths we have to revert back that offset by counting the number of hops the shortest path has. |
367 |
|
|
* @return |
368 |
|
|
*/ |
369 |
duarte |
804 |
private SparseGraph<BoundsDigraphNode,SimpleEdge> convertBoundsMatrixToBoundsDigraph() { |
370 |
duarte |
797 |
// to do the offset thing (see docs above) we need to know first of all the max lower bound |
371 |
|
|
ArrayList<Double> lowerBounds = new ArrayList<Double>(); |
372 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
373 |
|
|
for (int j=0;j<conformationSize;j++) { |
374 |
|
|
if (bounds[i][j]!=null) { |
375 |
|
|
lowerBounds.add(bounds[i][j].lower); |
376 |
|
|
} |
377 |
|
|
} |
378 |
duarte |
797 |
} |
379 |
|
|
lmax = Collections.max(lowerBounds); // this is the offset value |
380 |
|
|
|
381 |
|
|
SparseGraph<BoundsDigraphNode,SimpleEdge> boundsDigraph = new SparseGraph<BoundsDigraphNode, SimpleEdge>(); |
382 |
|
|
// we have to store all nodes in a HashMap, so we can retrieve them by residue serial and side after |
383 |
|
|
nodesBoundsDigraph = new HashMap<Boolean, HashMap<Integer,BoundsDigraphNode>>(); |
384 |
|
|
nodesBoundsDigraph.put(BoundsDigraphNode.LEFT , new HashMap<Integer, BoundsDigraphNode>()); |
385 |
|
|
nodesBoundsDigraph.put(BoundsDigraphNode.RIGHT, new HashMap<Integer, BoundsDigraphNode>()); |
386 |
|
|
// first we create the nodes and store them into the HashMap |
387 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
388 |
duarte |
797 |
BoundsDigraphNode leftNode = new BoundsDigraphNode(i, BoundsDigraphNode.LEFT); |
389 |
|
|
BoundsDigraphNode rightNode = new BoundsDigraphNode(i, BoundsDigraphNode.RIGHT); |
390 |
|
|
boundsDigraph.addVertex(leftNode); |
391 |
|
|
boundsDigraph.addVertex(rightNode); |
392 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).put(i,leftNode); |
393 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).put(i,rightNode); |
394 |
|
|
} |
395 |
|
|
|
396 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
397 |
|
|
for (int j=0;j<conformationSize;j++) { |
398 |
|
|
if (bounds[i][j]!=null) { |
399 |
|
|
// first we add the upper bounds as undirected edges to the 2 subgraphs (left and right) |
400 |
|
|
boundsDigraph.addEdge(new SimpleEdge(lmax+bounds[i][j].upper), |
401 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), |
402 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(j), |
403 |
|
|
EdgeType.UNDIRECTED); |
404 |
|
|
boundsDigraph.addEdge(new SimpleEdge(lmax+bounds[i][j].upper), |
405 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(i), |
406 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j), |
407 |
|
|
EdgeType.UNDIRECTED); |
408 |
|
|
// then we add the negative of the lower bounds as directed edges connecting nodes of subgraph left to subgraph right |
409 |
|
|
boundsDigraph.addEdge(new SimpleEdge(lmax-bounds[i][j].lower), |
410 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), |
411 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j), |
412 |
|
|
EdgeType.DIRECTED); |
413 |
|
|
} |
414 |
|
|
} |
415 |
duarte |
797 |
} |
416 |
|
|
return boundsDigraph; |
417 |
|
|
} |
418 |
|
|
|
419 |
|
|
/** |
420 |
duarte |
802 |
* Convert the given RIGraph to a bounds matrix. The indices of the matrix can be mapped back to residue |
421 |
|
|
* serials through {@link #getResserFromIdx(int)} |
422 |
duarte |
796 |
* Will only admit single atom contact type RIGraphs |
423 |
|
|
* @param graph |
424 |
|
|
* @return |
425 |
|
|
* @throws IllegalArgumentException if contact type of given RIGraph is not a single atom contact type |
426 |
|
|
*/ |
427 |
duarte |
802 |
private Bound[][] convertRIGraphToBoundsMatrix(RIGraph graph) { |
428 |
duarte |
796 |
// code cloned from ConstraintsMaker.createDistanceConstraints with some modifications |
429 |
duarte |
804 |
bounds = new Bound[conformationSize][conformationSize]; |
430 |
duarte |
802 |
TreeMap<Integer,Integer> resser2idx = new TreeMap<Integer, Integer>(); |
431 |
|
|
this.idx2resser = new TreeMap<Integer, Integer>(); |
432 |
|
|
int idx = 0; |
433 |
|
|
for (int resser:graph.getSerials()) { |
434 |
|
|
resser2idx.put(resser,idx); |
435 |
|
|
idx2resser.put(idx,resser); |
436 |
|
|
idx++; |
437 |
|
|
} |
438 |
duarte |
796 |
double cutoff = graph.getCutoff(); |
439 |
|
|
String ct = graph.getContactType(); |
440 |
|
|
String i_ct = ct; |
441 |
|
|
String j_ct = ct; |
442 |
|
|
if (ct.contains("/")){ |
443 |
|
|
i_ct = ct.split("/")[0]; |
444 |
|
|
j_ct = ct.split("/")[1]; |
445 |
|
|
} |
446 |
|
|
|
447 |
|
|
if (!AAinfo.isValidSingleAtomContactType(i_ct) || !AAinfo.isValidSingleAtomContactType(j_ct)){ |
448 |
|
|
throw new IllegalArgumentException("Contact type "+i_ct+" or "+j_ct+" is not valid for reconstruction"); |
449 |
|
|
} |
450 |
|
|
|
451 |
|
|
for (RIGEdge cont:graph.getEdges()){ |
452 |
|
|
Pair<RIGNode> pair = graph.getEndpoints(cont); |
453 |
|
|
String i_res = pair.getFirst().getResidueType(); |
454 |
|
|
String j_res = pair.getSecond().getResidueType(); |
455 |
|
|
|
456 |
|
|
// as dist_min we take the average of the two dist mins, if i_ct and j_ct are the same then this will be the same as dist_min for ct |
457 |
|
|
double dist_min = (AAinfo.getLowerBoundDistance(i_ct,i_res,j_res)+AAinfo.getLowerBoundDistance(j_ct,i_res,j_res))/2; |
458 |
|
|
// for single atom contact types getUpperBoundDistance and getLowerBoundDistance will return 0 thus for those cases dist_max = cutoff |
459 |
|
|
double dist_max = AAinfo.getUpperBoundDistance(i_ct, i_res, j_res)/2+AAinfo.getUpperBoundDistance(i_ct, i_res, j_res)/2+cutoff; |
460 |
|
|
|
461 |
duarte |
799 |
if (pair.getSecond().getResidueSerial()>pair.getFirst().getResidueSerial()+1) { //we don't add the first diagonal, we add it later as contiguous CA constraints |
462 |
duarte |
802 |
bounds[resser2idx.get(pair.getFirst().getResidueSerial())][resser2idx.get(pair.getSecond().getResidueSerial())] = new Bound(dist_min, dist_max); |
463 |
duarte |
799 |
} |
464 |
duarte |
796 |
} |
465 |
duarte |
799 |
// adding contiguous CA distance backbone constraints |
466 |
duarte |
802 |
for (int i=0;i<conformationSize-1;i++) { |
467 |
|
|
bounds[i][i+1]=new Bound(BB_CA_DIST,BB_CA_DIST); |
468 |
|
|
} |
469 |
|
|
return bounds; |
470 |
|
|
} |
471 |
duarte |
804 |
|
472 |
|
|
private void printBounds() { |
473 |
|
|
printBounds(this.bounds); |
474 |
|
|
} |
475 |
duarte |
802 |
|
476 |
duarte |
804 |
/*------------------------ statics ------------------------------*/ |
477 |
|
|
|
478 |
duarte |
802 |
/** |
479 |
|
|
* Deep copies given array of bounds |
480 |
|
|
* @param bounds |
481 |
|
|
* @return |
482 |
|
|
*/ |
483 |
|
|
private static Bound[][] copyBounds(Bound[][] bounds) { |
484 |
|
|
Bound[][] newBounds = new Bound[bounds.length][bounds.length]; |
485 |
|
|
for (int i=0;i<bounds.length;i++) { |
486 |
|
|
for (int j=0;j<bounds[i].length;j++) { |
487 |
|
|
if (bounds[i][j]!=null) { |
488 |
|
|
newBounds[i][j] = new Bound(bounds[i][j].lower,bounds[i][j].upper); |
489 |
|
|
} |
490 |
duarte |
799 |
} |
491 |
|
|
} |
492 |
duarte |
802 |
return newBounds; |
493 |
duarte |
796 |
} |
494 |
duarte |
804 |
|
495 |
duarte |
802 |
private static void printBounds(Bound[][] bounds) { |
496 |
|
|
for (int i=0;i<bounds.length;i++) { |
497 |
|
|
for (int j=0;j<bounds[i].length;j++) { |
498 |
|
|
if (bounds[i][j]==null) { |
499 |
|
|
System.out.printf("%11s","null"); |
500 |
|
|
} else { |
501 |
|
|
System.out.print(bounds[i][j]); |
502 |
|
|
} |
503 |
|
|
} |
504 |
|
|
System.out.println(); |
505 |
|
|
} |
506 |
|
|
System.out.println(); |
507 |
|
|
} |
508 |
|
|
|
509 |
|
|
private static void printViolations (Matrix matrixEmbedded, Bound[][] bounds) { |
510 |
|
|
int count = 0; |
511 |
|
|
for (int i=0;i<matrixEmbedded.getRowDimension();i++) { |
512 |
|
|
for (int j=i+1;j<matrixEmbedded.getColumnDimension();j++) { |
513 |
|
|
if ((matrixEmbedded.get(i,j)<bounds[i][j].lower) || (matrixEmbedded.get(i,j)>bounds[i][j].upper)) { |
514 |
|
|
System.out.printf("%3d %3d %4.1f %s\n",i,j,matrixEmbedded.get(i,j),bounds[i][j].toString()); |
515 |
|
|
count++; |
516 |
|
|
} |
517 |
|
|
} |
518 |
|
|
} |
519 |
|
|
int cells = (matrixEmbedded.getRowDimension()*(matrixEmbedded.getRowDimension()-1))/2; |
520 |
|
|
System.out.println("Number of violations: "+count+" out of "+cells+" cells in half matrix"); |
521 |
|
|
} |
522 |
|
|
|
523 |
|
|
private static int getNumberViolations(Matrix matrixEmbedded, Bound[][] bounds) { |
524 |
|
|
int count = 0; |
525 |
|
|
for (int i=0;i<matrixEmbedded.getRowDimension();i++) { |
526 |
|
|
for (int j=i+1;j<matrixEmbedded.getColumnDimension();j++) { |
527 |
|
|
if ((matrixEmbedded.get(i,j)<bounds[i][j].lower) || (matrixEmbedded.get(i,j)>bounds[i][j].upper)) { |
528 |
|
|
count++; |
529 |
|
|
} |
530 |
|
|
} |
531 |
|
|
} |
532 |
|
|
return count; |
533 |
|
|
} |
534 |
|
|
|
535 |
|
|
private static void printMatrix(Matrix matrix) { |
536 |
|
|
for (int i=0;i<matrix.getRowDimension();i++) { |
537 |
|
|
for (int j=0;j<matrix.getColumnDimension();j++) { |
538 |
|
|
System.out.printf("%4.1f ",matrix.get(i, j)); |
539 |
|
|
} |
540 |
|
|
System.out.println(); |
541 |
|
|
} |
542 |
|
|
} |
543 |
|
|
|
544 |
duarte |
796 |
/*-------------------------- main -------------------------------*/ |
545 |
|
|
|
546 |
|
|
/** |
547 |
|
|
* To test the class |
548 |
|
|
*/ |
549 |
|
|
public static void main (String[] args) throws Exception { |
550 |
duarte |
802 |
boolean debug = false; |
551 |
|
|
boolean writeFiles = false; |
552 |
duarte |
804 |
int numModels = 10; |
553 |
duarte |
802 |
Embedder.ScalingMethod scalingMethod = Embedder.ScalingMethod.AVRG_INTER_CA_DIST; |
554 |
duarte |
804 |
boolean metrize = true; // if true metrization performed, if false random sampling |
555 |
duarte |
802 |
|
556 |
duarte |
798 |
String pdbCode = "1bxy"; |
557 |
|
|
String pdbChainCode = "A"; |
558 |
duarte |
802 |
String ct = "Ca"; |
559 |
|
|
double cutoff = 8.0; |
560 |
duarte |
798 |
|
561 |
|
|
Pdb pdb = new PdbasePdb(pdbCode); |
562 |
|
|
pdb.load(pdbChainCode); |
563 |
|
|
Pdb pdbEmbedded = new PdbasePdb(pdbCode); |
564 |
|
|
pdbEmbedded.load(pdbChainCode); |
565 |
|
|
|
566 |
duarte |
802 |
RIGraph graph = pdb.get_graph(ct, cutoff); |
567 |
duarte |
796 |
BoundsSmoother bs = new BoundsSmoother(graph); |
568 |
duarte |
804 |
Bound[][] initialBoundsAllPairs = bs.getBoundsAllPairs(); |
569 |
duarte |
803 |
|
570 |
duarte |
798 |
if (debug) { |
571 |
duarte |
804 |
// all pairs bounds after triangle inequality |
572 |
|
|
printBounds(initialBoundsAllPairs); |
573 |
duarte |
796 |
} |
574 |
duarte |
802 |
|
575 |
duarte |
803 |
|
576 |
duarte |
802 |
System.out.printf("%6s\t%6s\t%6s", "rmsd","rmsdm","viols"); |
577 |
duarte |
796 |
System.out.println(); |
578 |
duarte |
798 |
for (int model=0;model<numModels;model++) { |
579 |
duarte |
803 |
|
580 |
duarte |
804 |
Matrix matrix = null; |
581 |
|
|
if (!metrize) { |
582 |
|
|
matrix = bs.sampleBounds(); |
583 |
|
|
} else { |
584 |
|
|
matrix = bs.metrize(); |
585 |
|
|
} |
586 |
|
|
|
587 |
|
|
if (debug) { |
588 |
duarte |
803 |
// bounds after metrization |
589 |
duarte |
804 |
bs.printBounds(); |
590 |
|
|
} |
591 |
duarte |
803 |
|
592 |
duarte |
798 |
if (debug) { |
593 |
duarte |
802 |
printMatrix(matrix); |
594 |
duarte |
796 |
} |
595 |
duarte |
798 |
|
596 |
duarte |
802 |
Embedder embedder = new Embedder(matrix); |
597 |
|
|
Vector3d[] embedding = embedder.embed(scalingMethod); |
598 |
duarte |
798 |
pdbEmbedded.setAtomsCoords(embedding, "CA"); |
599 |
|
|
|
600 |
|
|
double rmsd = pdb.rmsd(pdbEmbedded, "Ca"); |
601 |
|
|
pdb.mirror(); |
602 |
|
|
double rmsdm = pdb.rmsd(pdbEmbedded, "Ca"); |
603 |
duarte |
800 |
|
604 |
duarte |
799 |
if (rmsd>rmsdm ) { |
605 |
|
|
pdbEmbedded.mirror(); |
606 |
|
|
} |
607 |
duarte |
800 |
|
608 |
duarte |
802 |
if (writeFiles) |
609 |
duarte |
804 |
pdbEmbedded.dump2pdbfile("/project/StruPPi/jose/embed/embed_"+pdbCode+pdbChainCode+"_"+model+".pdb"); |
610 |
duarte |
802 |
|
611 |
|
|
Matrix matrixEmbedded = pdbEmbedded.calculateDistMatrix("Ca"); |
612 |
duarte |
799 |
|
613 |
duarte |
804 |
System.out.printf("%6.3f\t%6.3f\t%6d",rmsd,rmsdm,getNumberViolations(matrixEmbedded, initialBoundsAllPairs)); |
614 |
duarte |
796 |
System.out.println(); |
615 |
duarte |
799 |
|
616 |
duarte |
800 |
if (debug) { |
617 |
duarte |
804 |
printViolations(matrixEmbedded, initialBoundsAllPairs); |
618 |
duarte |
799 |
} |
619 |
duarte |
796 |
} |
620 |
|
|
} |
621 |
|
|
|
622 |
|
|
} |