1 |
duarte |
796 |
package embed; |
2 |
|
|
|
3 |
duarte |
797 |
import java.util.ArrayList; |
4 |
|
|
import java.util.Collections; |
5 |
|
|
import java.util.HashMap; |
6 |
duarte |
796 |
import java.util.Random; |
7 |
|
|
|
8 |
|
|
import org.apache.commons.collections15.Transformer; |
9 |
|
|
|
10 |
|
|
import Jama.Matrix; |
11 |
|
|
|
12 |
|
|
import edu.uci.ics.jung.algorithms.shortestpath.DijkstraDistance; |
13 |
duarte |
797 |
import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath; |
14 |
duarte |
796 |
import edu.uci.ics.jung.graph.SparseGraph; |
15 |
|
|
import edu.uci.ics.jung.graph.util.EdgeType; |
16 |
|
|
import proteinstructure.AAinfo; |
17 |
|
|
|
18 |
|
|
/** |
19 |
|
|
* Implementation of the bounds smoothing part of the EMBED algorithm of Crippen and Havel |
20 |
|
|
* Given a sparse set of distance ranges between a set of atoms it finds distance bounds for |
21 |
|
|
* all pairs of atoms, using the triangle inequality. |
22 |
|
|
* |
23 |
|
|
* Taken from "Distance Geometry: Theory, Algorithms, and Chemical Applications" (section 3.1) by T.F. Havel, |
24 |
|
|
* in Encyclopedia of Computational Chemistry (Wiley, New York, 1998). |
25 |
duarte |
804 |
* See also: |
26 |
|
|
* "Distance Geometry and Molecular Conformation" (Chapter 5) by G.M. Crippen and T.F. Havel (Wiley) |
27 |
|
|
* "Sampling and efficiency of metric matrix distance geometry: A novel partial |
28 |
|
|
* metrization algorithm", Kuszewski J, Nilges M, Bruenger AT, 1992, Journal of Biomolecular NMR |
29 |
duarte |
796 |
* |
30 |
|
|
* @author duarte |
31 |
|
|
* |
32 |
|
|
*/ |
33 |
|
|
public class BoundsSmoother { |
34 |
|
|
|
35 |
duarte |
799 |
private static final double HARD_SPHERES_BOUND = AAinfo.DIST_MIN_CA ; |
36 |
duarte |
800 |
|
37 |
|
|
private static final boolean DEBUG = false; |
38 |
duarte |
802 |
private static final long DEBUG_SEED = 123456; |
39 |
|
|
|
40 |
duarte |
804 |
private static final int NUM_ROOTS_PARTIAL_METRIZATION = 4; // we choose 4 as in Kuszewski et al. |
41 |
duarte |
796 |
|
42 |
|
|
/*----------------- helper classes and transformers -----------------*/ |
43 |
|
|
|
44 |
duarte |
797 |
Transformer<SimpleEdge, Number> WeightTransformer = new Transformer<SimpleEdge, Number>() { |
45 |
|
|
public Number transform(SimpleEdge input) { |
46 |
duarte |
796 |
return input.weight; |
47 |
|
|
} |
48 |
|
|
}; |
49 |
|
|
|
50 |
|
|
private class SimpleEdge { |
51 |
|
|
public double weight; |
52 |
|
|
public SimpleEdge(double weight) { |
53 |
|
|
this.weight = weight; |
54 |
|
|
} |
55 |
|
|
} |
56 |
duarte |
797 |
|
57 |
|
|
private class BoundsDigraphNode { |
58 |
|
|
public static final boolean LEFT = true; |
59 |
|
|
public static final boolean RIGHT = false; |
60 |
|
|
private boolean side; // true: left, false: right |
61 |
|
|
private int resSerial; |
62 |
|
|
public BoundsDigraphNode(int resSerial, boolean side) { |
63 |
|
|
this.resSerial = resSerial; |
64 |
|
|
this.side = side; |
65 |
|
|
} |
66 |
|
|
public boolean isRight() { |
67 |
|
|
return !side; |
68 |
|
|
} |
69 |
|
|
public boolean isLeft() { |
70 |
|
|
return side; |
71 |
|
|
} |
72 |
|
|
public int getSerial() { |
73 |
|
|
return resSerial; |
74 |
|
|
} |
75 |
|
|
public boolean equals(Object other) { |
76 |
|
|
if (! (other instanceof BoundsDigraphNode)) return false; |
77 |
|
|
BoundsDigraphNode otherNode = (BoundsDigraphNode)other; |
78 |
|
|
if (otherNode.resSerial==this.resSerial && otherNode.side == this.side) { |
79 |
|
|
return true; |
80 |
|
|
} else { |
81 |
|
|
return false; |
82 |
|
|
} |
83 |
|
|
} |
84 |
|
|
public String toString() { |
85 |
|
|
return resSerial+(side?"L":"R"); |
86 |
|
|
} |
87 |
|
|
} |
88 |
duarte |
796 |
|
89 |
|
|
/*---------------------- member variables ----------------------------*/ |
90 |
|
|
|
91 |
|
|
private int conformationSize; |
92 |
duarte |
797 |
private HashMap<Boolean, HashMap<Integer,BoundsDigraphNode>> nodesBoundsDigraph; // map of serial/side to nodes in the bounds digraph |
93 |
|
|
private double lmax; // maximum of the lower bounds: offset value for the boundsDigraph (not to have negative weights so that we can use Dijkstra's algo) |
94 |
duarte |
796 |
|
95 |
duarte |
804 |
private Bound[][] bounds; // the bounds (half-)matrix with lower/upper bounds for all pairs (or with nulls for pairs without assigned bounds yet) |
96 |
|
|
|
97 |
duarte |
802 |
private Random rand; // the random generator for sampleBounds and metrize |
98 |
|
|
|
99 |
duarte |
796 |
/*------------------------ constructors ------------------------------*/ |
100 |
|
|
|
101 |
|
|
/** |
102 |
|
|
* Constructs a new BoundsSmoother object given a RIGraph. |
103 |
|
|
* The RIGraph is converted into a set of distance ranges using the cutoff |
104 |
|
|
* as upper limit and hard-spheres as lower limit. |
105 |
|
|
* @param graph |
106 |
|
|
*/ |
107 |
duarte |
821 |
public BoundsSmoother(Bound[][] bounds) { |
108 |
|
|
this.bounds = bounds; |
109 |
|
|
this.conformationSize = bounds.length; |
110 |
duarte |
796 |
} |
111 |
|
|
|
112 |
|
|
/*----------------------- public methods ----------------------------*/ |
113 |
|
|
|
114 |
|
|
/** |
115 |
|
|
* Computes bounds for all pairs, based on the set of sparse distance ranges |
116 |
duarte |
804 |
* The returned array is a new array not the reference to the internal bounds array. |
117 |
duarte |
796 |
* @return a 2-dimensional array with the bounds for all pairs of residues, the |
118 |
|
|
* indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)} |
119 |
|
|
* and are guaranteed to be in the same order as the residue serials. |
120 |
|
|
*/ |
121 |
|
|
public Bound[][] getBoundsAllPairs() { |
122 |
duarte |
804 |
computeTriangleInequality(); |
123 |
|
|
return copyBounds(bounds); // we return a copy of the internal bounds array so that we can keep modifying it without side-effects to the returned reference |
124 |
duarte |
796 |
} |
125 |
duarte |
798 |
|
126 |
duarte |
796 |
/** |
127 |
duarte |
821 |
* Gets a random sample from the internal bounds array of all pairs of distance ranges |
128 |
|
|
* @return a symmetric metric matrix (both sides filled) |
129 |
|
|
* @throws NullPointerException if the internal bounds array doesn't contain bounds for all pairs |
130 |
duarte |
796 |
*/ |
131 |
duarte |
804 |
public Matrix sampleBounds() { |
132 |
duarte |
821 |
return sampleBounds(true); |
133 |
duarte |
796 |
} |
134 |
duarte |
821 |
|
135 |
duarte |
803 |
/** |
136 |
duarte |
804 |
* Performs partial metrization for the internal bounds array. |
137 |
|
|
* The internal bounds array is updated with the new bounds after metrization. |
138 |
|
|
* The idea is that metrization doesn't need to be done for all atoms but only |
139 |
|
|
* for a handful of them (called roots). This results in a much faster algorithm having |
140 |
|
|
* almost the same sampling properties as full metrization. |
141 |
|
|
* See "Sampling and efficiency of metric matrix distance geometry: A novel partial |
142 |
|
|
* metrization algorithm", Kuszewski J, Nilges M, Bruenger AT, 1992, Journal of Biomolecular NMR |
143 |
duarte |
803 |
* @return |
144 |
|
|
*/ |
145 |
duarte |
804 |
public Matrix metrize() { |
146 |
duarte |
803 |
|
147 |
duarte |
821 |
initSeed(); |
148 |
|
|
|
149 |
duarte |
802 |
ArrayList<Integer> roots = new ArrayList<Integer>(); |
150 |
|
|
for (int count=1;count<=NUM_ROOTS_PARTIAL_METRIZATION;count++) { |
151 |
|
|
int root = rand.nextInt(conformationSize); |
152 |
duarte |
804 |
if (roots.contains(root)) { |
153 |
|
|
root = rand.nextInt(conformationSize); |
154 |
|
|
} |
155 |
|
|
if (roots.contains(root)) { |
156 |
|
|
System.err.println("Warning: repeated root atom while doing metrization: "+root); |
157 |
|
|
} |
158 |
duarte |
803 |
if (DEBUG) System.out.println("Picked root: "+root); |
159 |
duarte |
802 |
roots.add(root); |
160 |
duarte |
804 |
sampleBoundForRoot(root); // this alters directly the input bounds array |
161 |
|
|
computeTriangleInequality(); |
162 |
duarte |
802 |
} |
163 |
|
|
|
164 |
duarte |
803 |
// finally pick a value at random for all the other bounds |
165 |
duarte |
821 |
return sampleBounds(false); |
166 |
duarte |
802 |
} |
167 |
|
|
|
168 |
duarte |
821 |
/*----------------------- private methods ---------------------------*/ |
169 |
|
|
|
170 |
duarte |
796 |
/** |
171 |
duarte |
821 |
* Initialises (or resets the random seed). |
172 |
|
|
* If DEBUG flag is true the random seed will be a fixed value DEBUG_SEED |
173 |
duarte |
796 |
*/ |
174 |
duarte |
821 |
private void initSeed() { |
175 |
|
|
if (DEBUG) { |
176 |
|
|
rand = new Random(DEBUG_SEED); |
177 |
|
|
} else { |
178 |
|
|
rand = new Random(); |
179 |
|
|
} |
180 |
duarte |
796 |
} |
181 |
|
|
|
182 |
duarte |
804 |
/** |
183 |
duarte |
821 |
* Gets a random sample from the internal bounds array of all pairs distance ranges |
184 |
|
|
* @param initSeed if true the random seed is reinitialised, if false the existing one will be used |
185 |
|
|
* @return a symmetric metric matrix (both sides filled) |
186 |
|
|
* @throws NullPointerException if the internal bounds array doesn't contain bounds for all pairs |
187 |
|
|
*/ |
188 |
|
|
private Matrix sampleBounds(boolean initSeed) { |
189 |
|
|
if (initSeed) { |
190 |
|
|
initSeed(); |
191 |
|
|
} |
192 |
|
|
double[][] matrix = new double[conformationSize][conformationSize]; |
193 |
|
|
for (int i=0;i<conformationSize;i++) { |
194 |
|
|
for (int j=0;j<conformationSize;j++) { |
195 |
|
|
if (j>i) { |
196 |
|
|
matrix[i][j]= bounds[i][j].lower+rand.nextDouble()*(bounds[i][j].upper-bounds[i][j].lower); |
197 |
|
|
} else if (j<i) { |
198 |
|
|
matrix[i][j]=matrix[j][i]; |
199 |
|
|
} |
200 |
|
|
} |
201 |
|
|
} |
202 |
|
|
return new Matrix(matrix); |
203 |
|
|
} |
204 |
|
|
|
205 |
|
|
/** |
206 |
duarte |
804 |
* For given root atom samples a value from the distance ranges of the root |
207 |
|
|
* to all of its neighbours (updating the internal bounds array with the new sampled bounds) |
208 |
|
|
* @param root |
209 |
|
|
*/ |
210 |
|
|
private void sampleBoundForRoot(int root) { |
211 |
duarte |
803 |
for (int neighb=0;neighb<conformationSize;neighb++) { |
212 |
|
|
if (neighb==root) continue; // avoid the diagonal (which contains a null Bound) |
213 |
duarte |
802 |
int i = root; |
214 |
duarte |
803 |
int j = neighb; |
215 |
|
|
if (neighb<root) { |
216 |
|
|
i = neighb; |
217 |
duarte |
802 |
j = root; |
218 |
|
|
} |
219 |
|
|
double sampledValue = bounds[i][j].lower+rand.nextDouble()*(bounds[i][j].upper-bounds[i][j].lower); |
220 |
duarte |
803 |
bounds[i][j].lower = sampledValue; |
221 |
|
|
bounds[i][j].upper = sampledValue; |
222 |
|
|
if (DEBUG) System.out.print(bounds[i][j]); |
223 |
duarte |
802 |
} |
224 |
duarte |
803 |
if (DEBUG) System.out.println("\n"); |
225 |
duarte |
802 |
} |
226 |
|
|
|
227 |
duarte |
796 |
/** |
228 |
duarte |
804 |
* Computes bounds for all pairs through triangle inequalities from the bounds member variable |
229 |
|
|
* containing a set of lower/upper bounds (sparse or full) |
230 |
|
|
* The bounds array is modified to contain the new bounds. |
231 |
|
|
* This is guaranteed to run in o(nmlog(n)), thus if the input is sparse then it's pretty fast: o(n2log(n)) but if |
232 |
|
|
* the input is the full set of bounds then we have o(n3log(n)). |
233 |
duarte |
798 |
*/ |
234 |
duarte |
804 |
private void computeTriangleInequality() { |
235 |
duarte |
803 |
double MARGIN = 0.0001; // for comparing doubles we need some tolerance value |
236 |
duarte |
804 |
|
237 |
|
|
double[][] lowerBounds = getLowerBoundsAllPairs(); |
238 |
|
|
double[][] upperBounds = getUpperBoundsAllPairs(); |
239 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
240 |
|
|
for (int j=i+1;j<conformationSize;j++) { |
241 |
duarte |
798 |
double upperBound = upperBounds[i][j]; |
242 |
|
|
double lowerBound = lowerBounds[i][j]; |
243 |
duarte |
804 |
|
244 |
|
|
if (bounds[i][j]!=null) { |
245 |
|
|
if (lowerBound>upperBound+MARGIN) { |
246 |
|
|
//System.err.println("old: "+sparseBounds[i][j]+" new: "+new Bound(lowerBound,upperBound)); |
247 |
|
|
|
248 |
|
|
// During metrization sometimes a new upper bound is found that is below the new lower bound |
249 |
|
|
// (actually in these cases the new lower bound coincides with the old one i.e. nothing new was |
250 |
|
|
// found through triangle inequality for the lower bound). |
251 |
|
|
// For some reason it doesn't happen the other way around: a new lower bound found that is |
252 |
|
|
// above the new (coinciding with old) upper bound. I suppose this is because the triangle inequality |
253 |
|
|
// "is a lot more effective at reducing the upper bounds than increasing the lower bounds" (quoting Havel) |
254 |
|
|
// To correct this we set both lower and upper to the newly found upper, i.e. we assume that |
255 |
|
|
// the new upper bound is better because is in accordance to the triangle inequality |
256 |
|
|
lowerBound=upperBound; |
257 |
|
|
|
258 |
|
|
//if (upperBound<sparseBounds[i][j].upper-MARGIN) |
259 |
|
|
// System.err.printf("new upper bound (%4.1f) for pair %3d %3d is smaller than old upper bound (%4.1f)\n",upperBound,i,j,sparseBounds[i][j].upper); |
260 |
|
|
//if (lowerBound>sparseBounds[i][j].lower+MARGIN) |
261 |
|
|
// System.err.printf("new lower bound (%4.1f) for pair %3d %3d is bigger than old lower bound (%4.1f)\n",lowerBound,i,j,sparseBounds[i][j].lower); |
262 |
|
|
} |
263 |
|
|
|
264 |
|
|
bounds[i][j].lower = lowerBound; |
265 |
|
|
bounds[i][j].upper = upperBound; |
266 |
|
|
} else { |
267 |
|
|
bounds[i][j] = new Bound(lowerBound,upperBound); |
268 |
duarte |
803 |
} |
269 |
duarte |
802 |
// sanity check: lower bounds can't be bigger than upper bounds! |
270 |
duarte |
803 |
if (lowerBound>upperBound+MARGIN) { |
271 |
duarte |
802 |
System.err.printf("Warning: lower bound (%4.1f) for pair "+i+" "+j+" is bigger than upper bound (%4.1f)\n",lowerBound,upperBound); |
272 |
duarte |
798 |
} |
273 |
|
|
} |
274 |
|
|
} |
275 |
|
|
} |
276 |
|
|
|
277 |
|
|
/** |
278 |
duarte |
804 |
* Computes upper bounds for all pairs from a set of upper bounds (sparse or full) based |
279 |
duarte |
796 |
* on the triangle inequality. |
280 |
|
|
* The computation is actually just an all pairs shortest path using Dijkstra's |
281 |
|
|
* shortest path algorithm (as the set of distance coming from contact maps is |
282 |
|
|
* very sparse this algorithm should be more efficient than Floyd's: Dijkstra's is |
283 |
|
|
* o(nm logn) and Floyd's is o(n3)) |
284 |
|
|
* @return a 2-dimensional array with the upper bounds for all pairs of residues, the |
285 |
|
|
* indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)} |
286 |
|
|
* and are guaranteed to be in the same order as the residue serials. |
287 |
|
|
*/ |
288 |
duarte |
804 |
private double[][] getUpperBoundsAllPairs() { |
289 |
duarte |
796 |
double[][] upperBoundsMatrix = new double[conformationSize][conformationSize]; |
290 |
duarte |
804 |
SparseGraph<Integer,SimpleEdge> upperBoundGraph = convertBoundsMatrixToUpperBoundGraph(); |
291 |
duarte |
796 |
DijkstraDistance<Integer, SimpleEdge> dd = new DijkstraDistance<Integer, SimpleEdge>(upperBoundGraph,WeightTransformer); |
292 |
duarte |
802 |
|
293 |
|
|
for (int i=0;i<conformationSize;i++) { |
294 |
|
|
for (int j=i+1;j<conformationSize;j++) { |
295 |
|
|
upperBoundsMatrix[i][j] = dd.getDistance(i, j).doubleValue(); |
296 |
duarte |
796 |
} |
297 |
|
|
} |
298 |
|
|
return upperBoundsMatrix; |
299 |
|
|
} |
300 |
|
|
|
301 |
|
|
/** |
302 |
duarte |
804 |
* Computes lower bounds for all pairs given a set of upper/lower bounds (sparse or full) |
303 |
|
|
* based on the triangle inequality. |
304 |
duarte |
796 |
* |
305 |
duarte |
797 |
* NOTE that because we use Dijkstra's algorithm for the computation of the shortest paths, we can't use negative |
306 |
|
|
* weights (see http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm). Because of this the boundsDigraph result |
307 |
duarte |
804 |
* of calling {@link #convertBoundsMatrixToBoundsDigraph()}} have values offset with the maximum lower bound (lmax). |
308 |
duarte |
797 |
* Thus after computing the shortest paths we have to revert back that offset by counting the number of hops the shortest path has. |
309 |
|
|
* |
310 |
duarte |
796 |
* @return a 2-dimensional array with the lower bounds for all pairs of residues, the |
311 |
|
|
* indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)} |
312 |
|
|
* and are guaranteed to be in the same order as the residue serials. |
313 |
duarte |
797 |
* |
314 |
duarte |
804 |
* @see {@link #convertBoundsMatrixToBoundsDigraph()} and {@link #getUpperBoundsAllPairs()} |
315 |
duarte |
796 |
*/ |
316 |
duarte |
804 |
private double[][] getLowerBoundsAllPairs() { |
317 |
duarte |
797 |
double[][] lowerBoundsMatrix = new double[conformationSize][conformationSize]; |
318 |
|
|
// this is the bounds digraph as described by Crippen and Havel |
319 |
duarte |
804 |
SparseGraph<BoundsDigraphNode,SimpleEdge> boundsDigraph = convertBoundsMatrixToBoundsDigraph(); |
320 |
duarte |
797 |
DijkstraShortestPath<BoundsDigraphNode, SimpleEdge> dd = new DijkstraShortestPath<BoundsDigraphNode, SimpleEdge>(boundsDigraph,WeightTransformer); |
321 |
duarte |
802 |
|
322 |
|
|
for (int i=0;i<conformationSize;i++) { |
323 |
|
|
for (int j=i+1;j<conformationSize;j++) { |
324 |
|
|
int hops = dd.getPath(nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j)).size(); |
325 |
|
|
double lower = Math.abs( |
326 |
duarte |
797 |
(dd.getDistance(nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), |
327 |
duarte |
802 |
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j) |
328 |
|
|
).doubleValue()) |
329 |
duarte |
797 |
- (hops*lmax)); // the lower limit for the triangle inequality is: Math.abs(shortestpath-(hops*lmax)) |
330 |
duarte |
802 |
lowerBoundsMatrix[i][j] = Math.max(lower, HARD_SPHERES_BOUND); // we only set the new lower bound to the one found if is above the HARD_SPHERES_BOUND |
331 |
duarte |
796 |
} |
332 |
|
|
} |
333 |
duarte |
797 |
return lowerBoundsMatrix; |
334 |
duarte |
796 |
|
335 |
|
|
} |
336 |
|
|
|
337 |
|
|
/** |
338 |
duarte |
802 |
* Converts the bounds matrix to a graph with only the upper bounds: nodes indices of bounds |
339 |
|
|
* matrix, edges upper bounds (in SimpleEdge objects containing the upper bound value |
340 |
duarte |
796 |
* in their weight field). |
341 |
duarte |
802 |
* @param bounds |
342 |
duarte |
796 |
* @return |
343 |
|
|
*/ |
344 |
duarte |
804 |
private SparseGraph<Integer,SimpleEdge> convertBoundsMatrixToUpperBoundGraph() { |
345 |
duarte |
796 |
SparseGraph<Integer,SimpleEdge> upperBoundGraph = new SparseGraph<Integer, SimpleEdge>(); |
346 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
347 |
|
|
for (int j=0;j<conformationSize;j++) { |
348 |
|
|
if (bounds[i][j]!=null) { |
349 |
|
|
upperBoundGraph.addEdge(new SimpleEdge(bounds[i][j].upper), i, j, EdgeType.UNDIRECTED); |
350 |
|
|
} |
351 |
|
|
} |
352 |
duarte |
796 |
} |
353 |
|
|
return upperBoundGraph; |
354 |
|
|
} |
355 |
duarte |
797 |
|
356 |
duarte |
796 |
/** |
357 |
duarte |
797 |
* Constructs a bounds digraph (as described by Crippen and Havel) to compute the triangle inequality limits |
358 |
|
|
* for the lower bounds. |
359 |
|
|
* The graph is composed by 2 subgraphs (we call them left and right) each of them containing the set of all atoms. |
360 |
|
|
* Within the subgraphs there is an undirected edge between atoms i,j with weight the upper bounds for i,j |
361 |
|
|
* Between the subgraphs there is a directed edge from left to right between atoms i(left) to j(right) |
362 |
|
|
* with weight the negative of the lower bound i,j |
363 |
|
|
* NOTE that because we use Dijkstra's algorithm for the computation of the shortest paths, we can't use negative |
364 |
|
|
* weights (see http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm). Thus we offset the values here to the maximum lower bound. |
365 |
|
|
* After computing the shortest paths we have to revert back that offset by counting the number of hops the shortest path has. |
366 |
|
|
* @return |
367 |
|
|
*/ |
368 |
duarte |
804 |
private SparseGraph<BoundsDigraphNode,SimpleEdge> convertBoundsMatrixToBoundsDigraph() { |
369 |
duarte |
797 |
// to do the offset thing (see docs above) we need to know first of all the max lower bound |
370 |
|
|
ArrayList<Double> lowerBounds = new ArrayList<Double>(); |
371 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
372 |
|
|
for (int j=0;j<conformationSize;j++) { |
373 |
|
|
if (bounds[i][j]!=null) { |
374 |
|
|
lowerBounds.add(bounds[i][j].lower); |
375 |
|
|
} |
376 |
|
|
} |
377 |
duarte |
797 |
} |
378 |
|
|
lmax = Collections.max(lowerBounds); // this is the offset value |
379 |
|
|
|
380 |
|
|
SparseGraph<BoundsDigraphNode,SimpleEdge> boundsDigraph = new SparseGraph<BoundsDigraphNode, SimpleEdge>(); |
381 |
|
|
// we have to store all nodes in a HashMap, so we can retrieve them by residue serial and side after |
382 |
|
|
nodesBoundsDigraph = new HashMap<Boolean, HashMap<Integer,BoundsDigraphNode>>(); |
383 |
|
|
nodesBoundsDigraph.put(BoundsDigraphNode.LEFT , new HashMap<Integer, BoundsDigraphNode>()); |
384 |
|
|
nodesBoundsDigraph.put(BoundsDigraphNode.RIGHT, new HashMap<Integer, BoundsDigraphNode>()); |
385 |
|
|
// first we create the nodes and store them into the HashMap |
386 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
387 |
duarte |
797 |
BoundsDigraphNode leftNode = new BoundsDigraphNode(i, BoundsDigraphNode.LEFT); |
388 |
|
|
BoundsDigraphNode rightNode = new BoundsDigraphNode(i, BoundsDigraphNode.RIGHT); |
389 |
|
|
boundsDigraph.addVertex(leftNode); |
390 |
|
|
boundsDigraph.addVertex(rightNode); |
391 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).put(i,leftNode); |
392 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).put(i,rightNode); |
393 |
|
|
} |
394 |
|
|
|
395 |
duarte |
802 |
for (int i=0;i<conformationSize;i++) { |
396 |
|
|
for (int j=0;j<conformationSize;j++) { |
397 |
|
|
if (bounds[i][j]!=null) { |
398 |
|
|
// first we add the upper bounds as undirected edges to the 2 subgraphs (left and right) |
399 |
|
|
boundsDigraph.addEdge(new SimpleEdge(lmax+bounds[i][j].upper), |
400 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), |
401 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(j), |
402 |
|
|
EdgeType.UNDIRECTED); |
403 |
|
|
boundsDigraph.addEdge(new SimpleEdge(lmax+bounds[i][j].upper), |
404 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(i), |
405 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j), |
406 |
|
|
EdgeType.UNDIRECTED); |
407 |
|
|
// then we add the negative of the lower bounds as directed edges connecting nodes of subgraph left to subgraph right |
408 |
|
|
boundsDigraph.addEdge(new SimpleEdge(lmax-bounds[i][j].lower), |
409 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), |
410 |
|
|
nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j), |
411 |
|
|
EdgeType.DIRECTED); |
412 |
|
|
} |
413 |
|
|
} |
414 |
duarte |
797 |
} |
415 |
|
|
return boundsDigraph; |
416 |
|
|
} |
417 |
|
|
|
418 |
duarte |
821 |
protected void printBounds() { |
419 |
duarte |
804 |
printBounds(this.bounds); |
420 |
|
|
} |
421 |
duarte |
802 |
|
422 |
duarte |
804 |
/*------------------------ statics ------------------------------*/ |
423 |
|
|
|
424 |
duarte |
802 |
/** |
425 |
|
|
* Deep copies given array of bounds |
426 |
|
|
* @param bounds |
427 |
|
|
* @return |
428 |
|
|
*/ |
429 |
duarte |
821 |
protected static Bound[][] copyBounds(Bound[][] bounds) { |
430 |
duarte |
802 |
Bound[][] newBounds = new Bound[bounds.length][bounds.length]; |
431 |
|
|
for (int i=0;i<bounds.length;i++) { |
432 |
|
|
for (int j=0;j<bounds[i].length;j++) { |
433 |
|
|
if (bounds[i][j]!=null) { |
434 |
|
|
newBounds[i][j] = new Bound(bounds[i][j].lower,bounds[i][j].upper); |
435 |
|
|
} |
436 |
duarte |
799 |
} |
437 |
|
|
} |
438 |
duarte |
802 |
return newBounds; |
439 |
duarte |
796 |
} |
440 |
duarte |
804 |
|
441 |
duarte |
821 |
protected static void printBounds(Bound[][] bounds) { |
442 |
duarte |
802 |
for (int i=0;i<bounds.length;i++) { |
443 |
|
|
for (int j=0;j<bounds[i].length;j++) { |
444 |
|
|
if (bounds[i][j]==null) { |
445 |
|
|
System.out.printf("%11s","null"); |
446 |
|
|
} else { |
447 |
|
|
System.out.print(bounds[i][j]); |
448 |
|
|
} |
449 |
|
|
} |
450 |
|
|
System.out.println(); |
451 |
|
|
} |
452 |
|
|
System.out.println(); |
453 |
|
|
} |
454 |
|
|
|
455 |
duarte |
796 |
/*-------------------------- main -------------------------------*/ |
456 |
|
|
|
457 |
|
|
/** |
458 |
|
|
* To test the class |
459 |
|
|
*/ |
460 |
duarte |
821 |
// public static void main (String[] args) throws Exception { |
461 |
|
|
// boolean debug = false; |
462 |
|
|
// boolean writeFiles = false; |
463 |
|
|
// int numModels = 10; |
464 |
|
|
// Embedder.ScalingMethod scalingMethod = Embedder.ScalingMethod.AVRG_INTER_CA_DIST; |
465 |
|
|
// boolean metrize = true; // if true metrization performed, if false random sampling |
466 |
|
|
// |
467 |
|
|
// String pdbCode = "1bxy"; |
468 |
|
|
// String pdbChainCode = "A"; |
469 |
|
|
// String ct = "Ca"; |
470 |
|
|
// double cutoff = 8.0; |
471 |
|
|
// |
472 |
|
|
// Pdb pdb = new PdbasePdb(pdbCode); |
473 |
|
|
// pdb.load(pdbChainCode); |
474 |
|
|
// Pdb pdbEmbedded = new PdbasePdb(pdbCode); |
475 |
|
|
// pdbEmbedded.load(pdbChainCode); |
476 |
|
|
// |
477 |
|
|
// RIGraph graph = pdb.get_graph(ct, cutoff); |
478 |
|
|
// BoundsSmoother bs = new BoundsSmoother(graph); |
479 |
|
|
// Bound[][] initialBoundsAllPairs = bs.getBoundsAllPairs(); |
480 |
|
|
// |
481 |
|
|
// if (debug) { |
482 |
|
|
// // all pairs bounds after triangle inequality |
483 |
|
|
// printBounds(initialBoundsAllPairs); |
484 |
|
|
// } |
485 |
|
|
// |
486 |
|
|
// |
487 |
|
|
// System.out.printf("%6s\t%6s\t%6s", "rmsd","rmsdm","viols"); |
488 |
|
|
// System.out.println(); |
489 |
|
|
// for (int model=0;model<numModels;model++) { |
490 |
|
|
// |
491 |
|
|
// Matrix matrix = null; |
492 |
|
|
// if (!metrize) { |
493 |
|
|
// matrix = bs.sampleBounds(); |
494 |
|
|
// } else { |
495 |
|
|
// matrix = bs.metrize(); |
496 |
|
|
// } |
497 |
|
|
// |
498 |
|
|
// if (debug) { |
499 |
|
|
// // bounds after metrization |
500 |
|
|
// bs.printBounds(); |
501 |
|
|
// } |
502 |
|
|
// |
503 |
|
|
// if (debug) { |
504 |
|
|
// printMatrix(matrix); |
505 |
|
|
// } |
506 |
|
|
// |
507 |
|
|
// Embedder embedder = new Embedder(matrix); |
508 |
|
|
// Vector3d[] embedding = embedder.embed(scalingMethod); |
509 |
|
|
// pdbEmbedded.setAtomsCoords(embedding, "CA"); |
510 |
|
|
// |
511 |
|
|
// double rmsd = pdb.rmsd(pdbEmbedded, "Ca"); |
512 |
|
|
// pdb.mirror(); |
513 |
|
|
// double rmsdm = pdb.rmsd(pdbEmbedded, "Ca"); |
514 |
|
|
// |
515 |
|
|
// if (rmsd>rmsdm ) { |
516 |
|
|
// pdbEmbedded.mirror(); |
517 |
|
|
// } |
518 |
|
|
// |
519 |
|
|
// if (writeFiles) |
520 |
|
|
// pdbEmbedded.dump2pdbfile("/project/StruPPi/jose/embed/embed_"+pdbCode+pdbChainCode+"_"+model+".pdb"); |
521 |
|
|
// |
522 |
|
|
// Matrix matrixEmbedded = pdbEmbedded.calculateDistMatrix("Ca"); |
523 |
|
|
// |
524 |
|
|
// System.out.printf("%6.3f\t%6.3f\t%6d",rmsd,rmsdm,getNumberViolations(matrixEmbedded, initialBoundsAllPairs)); |
525 |
|
|
// System.out.println(); |
526 |
|
|
// |
527 |
|
|
// if (debug) { |
528 |
|
|
// printViolations(matrixEmbedded, initialBoundsAllPairs); |
529 |
|
|
// } |
530 |
|
|
// } |
531 |
|
|
// } |
532 |
duarte |
796 |
|
533 |
|
|
} |