ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/embed/BoundsSmoother.java
Revision: 804
Committed: Tue Nov 18 17:49:17 2008 UTC (15 years, 11 months ago) by duarte
File size: 24773 byte(s)
Log Message:
Made Bound[][] bounds a member, thus changing the signature of most methods. Refactored getBoundsAllPairs(bounds) to better name computeTriangleInequality: now it uses as input the bounds member variable and updates it as output.
Line User Rev File contents
1 duarte 796 package embed;
2    
3 duarte 797 import java.util.ArrayList;
4     import java.util.Collections;
5     import java.util.HashMap;
6 duarte 796 import java.util.Random;
7     import java.util.TreeMap;
8    
9     import javax.vecmath.Vector3d;
10    
11     import org.apache.commons.collections15.Transformer;
12    
13     import Jama.Matrix;
14    
15     import edu.uci.ics.jung.algorithms.shortestpath.DijkstraDistance;
16 duarte 797 import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath;
17 duarte 796 import edu.uci.ics.jung.graph.SparseGraph;
18     import edu.uci.ics.jung.graph.util.EdgeType;
19     import edu.uci.ics.jung.graph.util.Pair;
20     import proteinstructure.AAinfo;
21     import proteinstructure.Pdb;
22     import proteinstructure.PdbasePdb;
23     import proteinstructure.RIGEdge;
24     import proteinstructure.RIGNode;
25     import proteinstructure.RIGraph;
26    
27     /**
28     * Implementation of the bounds smoothing part of the EMBED algorithm of Crippen and Havel
29     * Given a sparse set of distance ranges between a set of atoms it finds distance bounds for
30     * all pairs of atoms, using the triangle inequality.
31     *
32     * Taken from "Distance Geometry: Theory, Algorithms, and Chemical Applications" (section 3.1) by T.F. Havel,
33     * in Encyclopedia of Computational Chemistry (Wiley, New York, 1998).
34 duarte 804 * See also:
35     * "Distance Geometry and Molecular Conformation" (Chapter 5) by G.M. Crippen and T.F. Havel (Wiley)
36     * "Sampling and efficiency of metric matrix distance geometry: A novel partial
37     * metrization algorithm", Kuszewski J, Nilges M, Bruenger AT, 1992, Journal of Biomolecular NMR
38 duarte 796 *
39     * @author duarte
40     *
41     */
42     public class BoundsSmoother {
43    
44 duarte 800 protected static final double BB_CA_DIST = 3.8;
45 duarte 799 private static final double HARD_SPHERES_BOUND = AAinfo.DIST_MIN_CA ;
46 duarte 800
47     private static final boolean DEBUG = false;
48 duarte 802 private static final long DEBUG_SEED = 123456;
49    
50 duarte 804 private static final int NUM_ROOTS_PARTIAL_METRIZATION = 4; // we choose 4 as in Kuszewski et al.
51 duarte 796
52     /*----------------- helper classes and transformers -----------------*/
53    
54 duarte 797 Transformer<SimpleEdge, Number> WeightTransformer = new Transformer<SimpleEdge, Number>() {
55     public Number transform(SimpleEdge input) {
56 duarte 796 return input.weight;
57     }
58     };
59    
60     private class SimpleEdge {
61     public double weight;
62     public SimpleEdge(double weight) {
63     this.weight = weight;
64     }
65     }
66 duarte 797
67     private class BoundsDigraphNode {
68     public static final boolean LEFT = true;
69     public static final boolean RIGHT = false;
70     private boolean side; // true: left, false: right
71     private int resSerial;
72     public BoundsDigraphNode(int resSerial, boolean side) {
73     this.resSerial = resSerial;
74     this.side = side;
75     }
76     public boolean isRight() {
77     return !side;
78     }
79     public boolean isLeft() {
80     return side;
81     }
82     public int getSerial() {
83     return resSerial;
84     }
85     public boolean equals(Object other) {
86     if (! (other instanceof BoundsDigraphNode)) return false;
87     BoundsDigraphNode otherNode = (BoundsDigraphNode)other;
88     if (otherNode.resSerial==this.resSerial && otherNode.side == this.side) {
89     return true;
90     } else {
91     return false;
92     }
93     }
94     public String toString() {
95     return resSerial+(side?"L":"R");
96     }
97     }
98 duarte 796
99     /*---------------------- member variables ----------------------------*/
100    
101     private RIGraph rig;
102 duarte 802 private TreeMap<Integer,Integer> idx2resser;
103 duarte 796 private int conformationSize;
104 duarte 797 private HashMap<Boolean, HashMap<Integer,BoundsDigraphNode>> nodesBoundsDigraph; // map of serial/side to nodes in the bounds digraph
105     private double lmax; // maximum of the lower bounds: offset value for the boundsDigraph (not to have negative weights so that we can use Dijkstra's algo)
106 duarte 796
107 duarte 804 private Bound[][] bounds; // the bounds (half-)matrix with lower/upper bounds for all pairs (or with nulls for pairs without assigned bounds yet)
108    
109 duarte 802 private Random rand; // the random generator for sampleBounds and metrize
110    
111 duarte 796 /*------------------------ constructors ------------------------------*/
112    
113     /**
114     * Constructs a new BoundsSmoother object given a RIGraph.
115     * The RIGraph is converted into a set of distance ranges using the cutoff
116     * as upper limit and hard-spheres as lower limit.
117     * @param graph
118     */
119     public BoundsSmoother(RIGraph graph) {
120     this.rig = graph;
121     this.conformationSize = this.rig.getObsLength();
122 duarte 802 if (DEBUG) {
123     rand = new Random(DEBUG_SEED);
124     } else {
125     rand = new Random();
126     }
127 duarte 796 }
128    
129     /*----------------------- public methods ----------------------------*/
130    
131     /**
132     * Computes bounds for all pairs, based on the set of sparse distance ranges
133 duarte 804 * The returned array is a new array not the reference to the internal bounds array.
134 duarte 796 * @return a 2-dimensional array with the bounds for all pairs of residues, the
135     * indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)}
136     * and are guaranteed to be in the same order as the residue serials.
137     */
138     public Bound[][] getBoundsAllPairs() {
139 duarte 804 convertRIGraphToBoundsMatrix(this.rig); // this initializes the bounds array
140     computeTriangleInequality();
141     return copyBounds(bounds); // we return a copy of the internal bounds array so that we can keep modifying it without side-effects to the returned reference
142 duarte 796 }
143 duarte 798
144 duarte 796 /**
145 duarte 804 * Gets a random sample from the internal bounds array of all pairs distance ranges
146 duarte 802 * @return a simmetric metric matrix (both sides filled)
147 duarte 804 * @throws NullPointerException if the internal bounds array doesn't contain bounds for all pairs
148 duarte 796 */
149 duarte 804 public Matrix sampleBounds() {
150 duarte 796 double[][] matrix = new double[conformationSize][conformationSize];
151     for (int i=0;i<conformationSize;i++) {
152     for (int j=0;j<conformationSize;j++) {
153     if (j>i) {
154     matrix[i][j]= bounds[i][j].lower+rand.nextDouble()*(bounds[i][j].upper-bounds[i][j].lower);
155     } else if (j<i) {
156     matrix[i][j]=matrix[j][i];
157     }
158     }
159     }
160     return new Matrix(matrix);
161     }
162 duarte 798
163 duarte 803 /**
164 duarte 804 * Performs partial metrization for the internal bounds array.
165     * The internal bounds array is updated with the new bounds after metrization.
166     * The idea is that metrization doesn't need to be done for all atoms but only
167     * for a handful of them (called roots). This results in a much faster algorithm having
168     * almost the same sampling properties as full metrization.
169     * See "Sampling and efficiency of metric matrix distance geometry: A novel partial
170     * metrization algorithm", Kuszewski J, Nilges M, Bruenger AT, 1992, Journal of Biomolecular NMR
171 duarte 803 * @return
172     */
173 duarte 804 public Matrix metrize() {
174 duarte 803
175 duarte 802 ArrayList<Integer> roots = new ArrayList<Integer>();
176     for (int count=1;count<=NUM_ROOTS_PARTIAL_METRIZATION;count++) {
177     int root = rand.nextInt(conformationSize);
178 duarte 804 if (roots.contains(root)) {
179     root = rand.nextInt(conformationSize);
180     }
181     if (roots.contains(root)) {
182     System.err.println("Warning: repeated root atom while doing metrization: "+root);
183     }
184 duarte 803 if (DEBUG) System.out.println("Picked root: "+root);
185 duarte 802 roots.add(root);
186 duarte 804 sampleBoundForRoot(root); // this alters directly the input bounds array
187     computeTriangleInequality();
188 duarte 802 }
189    
190 duarte 803 // finally pick a value at random for all the other bounds
191 duarte 804 return sampleBounds();
192 duarte 802 }
193    
194 duarte 796 /**
195     * Maps from residue serials to indices of the matrices returned by {@link #getBoundsAllPairs()} and
196     * {@link #sampleBounds(Bound[][])}
197     * @param idx
198     * @return
199     */
200     public int getResserFromIdx(int idx) {
201 duarte 802 return idx2resser.get(idx);
202 duarte 796 }
203    
204     /*----------------------- private methods ---------------------------*/
205    
206 duarte 804 /**
207     * For given root atom samples a value from the distance ranges of the root
208     * to all of its neighbours (updating the internal bounds array with the new sampled bounds)
209     * @param root
210     */
211     private void sampleBoundForRoot(int root) {
212 duarte 803 for (int neighb=0;neighb<conformationSize;neighb++) {
213     if (neighb==root) continue; // avoid the diagonal (which contains a null Bound)
214 duarte 802 int i = root;
215 duarte 803 int j = neighb;
216     if (neighb<root) {
217     i = neighb;
218 duarte 802 j = root;
219     }
220     double sampledValue = bounds[i][j].lower+rand.nextDouble()*(bounds[i][j].upper-bounds[i][j].lower);
221 duarte 803 bounds[i][j].lower = sampledValue;
222     bounds[i][j].upper = sampledValue;
223     if (DEBUG) System.out.print(bounds[i][j]);
224 duarte 802 }
225 duarte 803 if (DEBUG) System.out.println("\n");
226 duarte 802 }
227    
228 duarte 796 /**
229 duarte 804 * Computes bounds for all pairs through triangle inequalities from the bounds member variable
230     * containing a set of lower/upper bounds (sparse or full)
231     * The bounds array is modified to contain the new bounds.
232     * This is guaranteed to run in o(nmlog(n)), thus if the input is sparse then it's pretty fast: o(n2log(n)) but if
233     * the input is the full set of bounds then we have o(n3log(n)).
234 duarte 798 */
235 duarte 804 private void computeTriangleInequality() {
236 duarte 803 double MARGIN = 0.0001; // for comparing doubles we need some tolerance value
237 duarte 804
238     double[][] lowerBounds = getLowerBoundsAllPairs();
239     double[][] upperBounds = getUpperBoundsAllPairs();
240 duarte 802 for (int i=0;i<conformationSize;i++) {
241     for (int j=i+1;j<conformationSize;j++) {
242 duarte 798 double upperBound = upperBounds[i][j];
243     double lowerBound = lowerBounds[i][j];
244 duarte 804
245     if (bounds[i][j]!=null) {
246     if (lowerBound>upperBound+MARGIN) {
247     //System.err.println("old: "+sparseBounds[i][j]+" new: "+new Bound(lowerBound,upperBound));
248    
249     // During metrization sometimes a new upper bound is found that is below the new lower bound
250     // (actually in these cases the new lower bound coincides with the old one i.e. nothing new was
251     // found through triangle inequality for the lower bound).
252     // For some reason it doesn't happen the other way around: a new lower bound found that is
253     // above the new (coinciding with old) upper bound. I suppose this is because the triangle inequality
254     // "is a lot more effective at reducing the upper bounds than increasing the lower bounds" (quoting Havel)
255     // To correct this we set both lower and upper to the newly found upper, i.e. we assume that
256     // the new upper bound is better because is in accordance to the triangle inequality
257     lowerBound=upperBound;
258    
259     //if (upperBound<sparseBounds[i][j].upper-MARGIN)
260     // System.err.printf("new upper bound (%4.1f) for pair %3d %3d is smaller than old upper bound (%4.1f)\n",upperBound,i,j,sparseBounds[i][j].upper);
261     //if (lowerBound>sparseBounds[i][j].lower+MARGIN)
262     // System.err.printf("new lower bound (%4.1f) for pair %3d %3d is bigger than old lower bound (%4.1f)\n",lowerBound,i,j,sparseBounds[i][j].lower);
263     }
264    
265     bounds[i][j].lower = lowerBound;
266     bounds[i][j].upper = upperBound;
267     } else {
268     bounds[i][j] = new Bound(lowerBound,upperBound);
269 duarte 803 }
270 duarte 802 // sanity check: lower bounds can't be bigger than upper bounds!
271 duarte 803 if (lowerBound>upperBound+MARGIN) {
272 duarte 802 System.err.printf("Warning: lower bound (%4.1f) for pair "+i+" "+j+" is bigger than upper bound (%4.1f)\n",lowerBound,upperBound);
273 duarte 798 }
274     }
275     }
276     }
277    
278     /**
279 duarte 804 * Computes upper bounds for all pairs from a set of upper bounds (sparse or full) based
280 duarte 796 * on the triangle inequality.
281     * The computation is actually just an all pairs shortest path using Dijkstra's
282     * shortest path algorithm (as the set of distance coming from contact maps is
283     * very sparse this algorithm should be more efficient than Floyd's: Dijkstra's is
284     * o(nm logn) and Floyd's is o(n3))
285     * @return a 2-dimensional array with the upper bounds for all pairs of residues, the
286     * indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)}
287     * and are guaranteed to be in the same order as the residue serials.
288     */
289 duarte 804 private double[][] getUpperBoundsAllPairs() {
290 duarte 796 double[][] upperBoundsMatrix = new double[conformationSize][conformationSize];
291 duarte 804 SparseGraph<Integer,SimpleEdge> upperBoundGraph = convertBoundsMatrixToUpperBoundGraph();
292 duarte 796 DijkstraDistance<Integer, SimpleEdge> dd = new DijkstraDistance<Integer, SimpleEdge>(upperBoundGraph,WeightTransformer);
293 duarte 802
294     for (int i=0;i<conformationSize;i++) {
295     for (int j=i+1;j<conformationSize;j++) {
296     upperBoundsMatrix[i][j] = dd.getDistance(i, j).doubleValue();
297 duarte 796 }
298     }
299     return upperBoundsMatrix;
300     }
301    
302     /**
303 duarte 804 * Computes lower bounds for all pairs given a set of upper/lower bounds (sparse or full)
304     * based on the triangle inequality.
305 duarte 796 *
306 duarte 797 * NOTE that because we use Dijkstra's algorithm for the computation of the shortest paths, we can't use negative
307     * weights (see http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm). Because of this the boundsDigraph result
308 duarte 804 * of calling {@link #convertBoundsMatrixToBoundsDigraph()}} have values offset with the maximum lower bound (lmax).
309 duarte 797 * Thus after computing the shortest paths we have to revert back that offset by counting the number of hops the shortest path has.
310     *
311 duarte 796 * @return a 2-dimensional array with the lower bounds for all pairs of residues, the
312     * indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)}
313     * and are guaranteed to be in the same order as the residue serials.
314 duarte 797 *
315 duarte 804 * @see {@link #convertBoundsMatrixToBoundsDigraph()} and {@link #getUpperBoundsAllPairs()}
316 duarte 796 */
317 duarte 804 private double[][] getLowerBoundsAllPairs() {
318 duarte 797 double[][] lowerBoundsMatrix = new double[conformationSize][conformationSize];
319     // this is the bounds digraph as described by Crippen and Havel
320 duarte 804 SparseGraph<BoundsDigraphNode,SimpleEdge> boundsDigraph = convertBoundsMatrixToBoundsDigraph();
321 duarte 797 DijkstraShortestPath<BoundsDigraphNode, SimpleEdge> dd = new DijkstraShortestPath<BoundsDigraphNode, SimpleEdge>(boundsDigraph,WeightTransformer);
322 duarte 802
323     for (int i=0;i<conformationSize;i++) {
324     for (int j=i+1;j<conformationSize;j++) {
325     int hops = dd.getPath(nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j)).size();
326     double lower = Math.abs(
327 duarte 797 (dd.getDistance(nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i),
328 duarte 802 nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j)
329     ).doubleValue())
330 duarte 797 - (hops*lmax)); // the lower limit for the triangle inequality is: Math.abs(shortestpath-(hops*lmax))
331 duarte 802 lowerBoundsMatrix[i][j] = Math.max(lower, HARD_SPHERES_BOUND); // we only set the new lower bound to the one found if is above the HARD_SPHERES_BOUND
332 duarte 796 }
333     }
334 duarte 797 return lowerBoundsMatrix;
335 duarte 796
336     }
337    
338     /**
339 duarte 802 * Converts the bounds matrix to a graph with only the upper bounds: nodes indices of bounds
340     * matrix, edges upper bounds (in SimpleEdge objects containing the upper bound value
341 duarte 796 * in their weight field).
342 duarte 802 * @param bounds
343 duarte 796 * @return
344     */
345 duarte 804 private SparseGraph<Integer,SimpleEdge> convertBoundsMatrixToUpperBoundGraph() {
346 duarte 796 SparseGraph<Integer,SimpleEdge> upperBoundGraph = new SparseGraph<Integer, SimpleEdge>();
347 duarte 802 for (int i=0;i<conformationSize;i++) {
348     for (int j=0;j<conformationSize;j++) {
349     if (bounds[i][j]!=null) {
350     upperBoundGraph.addEdge(new SimpleEdge(bounds[i][j].upper), i, j, EdgeType.UNDIRECTED);
351     }
352     }
353 duarte 796 }
354     return upperBoundGraph;
355     }
356 duarte 797
357 duarte 796 /**
358 duarte 797 * Constructs a bounds digraph (as described by Crippen and Havel) to compute the triangle inequality limits
359     * for the lower bounds.
360     * The graph is composed by 2 subgraphs (we call them left and right) each of them containing the set of all atoms.
361     * Within the subgraphs there is an undirected edge between atoms i,j with weight the upper bounds for i,j
362     * Between the subgraphs there is a directed edge from left to right between atoms i(left) to j(right)
363     * with weight the negative of the lower bound i,j
364     * NOTE that because we use Dijkstra's algorithm for the computation of the shortest paths, we can't use negative
365     * weights (see http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm). Thus we offset the values here to the maximum lower bound.
366     * After computing the shortest paths we have to revert back that offset by counting the number of hops the shortest path has.
367     * @return
368     */
369 duarte 804 private SparseGraph<BoundsDigraphNode,SimpleEdge> convertBoundsMatrixToBoundsDigraph() {
370 duarte 797 // to do the offset thing (see docs above) we need to know first of all the max lower bound
371     ArrayList<Double> lowerBounds = new ArrayList<Double>();
372 duarte 802 for (int i=0;i<conformationSize;i++) {
373     for (int j=0;j<conformationSize;j++) {
374     if (bounds[i][j]!=null) {
375     lowerBounds.add(bounds[i][j].lower);
376     }
377     }
378 duarte 797 }
379     lmax = Collections.max(lowerBounds); // this is the offset value
380    
381     SparseGraph<BoundsDigraphNode,SimpleEdge> boundsDigraph = new SparseGraph<BoundsDigraphNode, SimpleEdge>();
382     // we have to store all nodes in a HashMap, so we can retrieve them by residue serial and side after
383     nodesBoundsDigraph = new HashMap<Boolean, HashMap<Integer,BoundsDigraphNode>>();
384     nodesBoundsDigraph.put(BoundsDigraphNode.LEFT , new HashMap<Integer, BoundsDigraphNode>());
385     nodesBoundsDigraph.put(BoundsDigraphNode.RIGHT, new HashMap<Integer, BoundsDigraphNode>());
386     // first we create the nodes and store them into the HashMap
387 duarte 802 for (int i=0;i<conformationSize;i++) {
388 duarte 797 BoundsDigraphNode leftNode = new BoundsDigraphNode(i, BoundsDigraphNode.LEFT);
389     BoundsDigraphNode rightNode = new BoundsDigraphNode(i, BoundsDigraphNode.RIGHT);
390     boundsDigraph.addVertex(leftNode);
391     boundsDigraph.addVertex(rightNode);
392     nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).put(i,leftNode);
393     nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).put(i,rightNode);
394     }
395    
396 duarte 802 for (int i=0;i<conformationSize;i++) {
397     for (int j=0;j<conformationSize;j++) {
398     if (bounds[i][j]!=null) {
399     // first we add the upper bounds as undirected edges to the 2 subgraphs (left and right)
400     boundsDigraph.addEdge(new SimpleEdge(lmax+bounds[i][j].upper),
401     nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i),
402     nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(j),
403     EdgeType.UNDIRECTED);
404     boundsDigraph.addEdge(new SimpleEdge(lmax+bounds[i][j].upper),
405     nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(i),
406     nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j),
407     EdgeType.UNDIRECTED);
408     // then we add the negative of the lower bounds as directed edges connecting nodes of subgraph left to subgraph right
409     boundsDigraph.addEdge(new SimpleEdge(lmax-bounds[i][j].lower),
410     nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i),
411     nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j),
412     EdgeType.DIRECTED);
413     }
414     }
415 duarte 797 }
416     return boundsDigraph;
417     }
418    
419     /**
420 duarte 802 * Convert the given RIGraph to a bounds matrix. The indices of the matrix can be mapped back to residue
421     * serials through {@link #getResserFromIdx(int)}
422 duarte 796 * Will only admit single atom contact type RIGraphs
423     * @param graph
424     * @return
425     * @throws IllegalArgumentException if contact type of given RIGraph is not a single atom contact type
426     */
427 duarte 802 private Bound[][] convertRIGraphToBoundsMatrix(RIGraph graph) {
428 duarte 796 // code cloned from ConstraintsMaker.createDistanceConstraints with some modifications
429 duarte 804 bounds = new Bound[conformationSize][conformationSize];
430 duarte 802 TreeMap<Integer,Integer> resser2idx = new TreeMap<Integer, Integer>();
431     this.idx2resser = new TreeMap<Integer, Integer>();
432     int idx = 0;
433     for (int resser:graph.getSerials()) {
434     resser2idx.put(resser,idx);
435     idx2resser.put(idx,resser);
436     idx++;
437     }
438 duarte 796 double cutoff = graph.getCutoff();
439     String ct = graph.getContactType();
440     String i_ct = ct;
441     String j_ct = ct;
442     if (ct.contains("/")){
443     i_ct = ct.split("/")[0];
444     j_ct = ct.split("/")[1];
445     }
446    
447     if (!AAinfo.isValidSingleAtomContactType(i_ct) || !AAinfo.isValidSingleAtomContactType(j_ct)){
448     throw new IllegalArgumentException("Contact type "+i_ct+" or "+j_ct+" is not valid for reconstruction");
449     }
450    
451     for (RIGEdge cont:graph.getEdges()){
452     Pair<RIGNode> pair = graph.getEndpoints(cont);
453     String i_res = pair.getFirst().getResidueType();
454     String j_res = pair.getSecond().getResidueType();
455    
456     // as dist_min we take the average of the two dist mins, if i_ct and j_ct are the same then this will be the same as dist_min for ct
457     double dist_min = (AAinfo.getLowerBoundDistance(i_ct,i_res,j_res)+AAinfo.getLowerBoundDistance(j_ct,i_res,j_res))/2;
458     // for single atom contact types getUpperBoundDistance and getLowerBoundDistance will return 0 thus for those cases dist_max = cutoff
459     double dist_max = AAinfo.getUpperBoundDistance(i_ct, i_res, j_res)/2+AAinfo.getUpperBoundDistance(i_ct, i_res, j_res)/2+cutoff;
460    
461 duarte 799 if (pair.getSecond().getResidueSerial()>pair.getFirst().getResidueSerial()+1) { //we don't add the first diagonal, we add it later as contiguous CA constraints
462 duarte 802 bounds[resser2idx.get(pair.getFirst().getResidueSerial())][resser2idx.get(pair.getSecond().getResidueSerial())] = new Bound(dist_min, dist_max);
463 duarte 799 }
464 duarte 796 }
465 duarte 799 // adding contiguous CA distance backbone constraints
466 duarte 802 for (int i=0;i<conformationSize-1;i++) {
467     bounds[i][i+1]=new Bound(BB_CA_DIST,BB_CA_DIST);
468     }
469     return bounds;
470     }
471 duarte 804
472     private void printBounds() {
473     printBounds(this.bounds);
474     }
475 duarte 802
476 duarte 804 /*------------------------ statics ------------------------------*/
477    
478 duarte 802 /**
479     * Deep copies given array of bounds
480     * @param bounds
481     * @return
482     */
483     private static Bound[][] copyBounds(Bound[][] bounds) {
484     Bound[][] newBounds = new Bound[bounds.length][bounds.length];
485     for (int i=0;i<bounds.length;i++) {
486     for (int j=0;j<bounds[i].length;j++) {
487     if (bounds[i][j]!=null) {
488     newBounds[i][j] = new Bound(bounds[i][j].lower,bounds[i][j].upper);
489     }
490 duarte 799 }
491     }
492 duarte 802 return newBounds;
493 duarte 796 }
494 duarte 804
495 duarte 802 private static void printBounds(Bound[][] bounds) {
496     for (int i=0;i<bounds.length;i++) {
497     for (int j=0;j<bounds[i].length;j++) {
498     if (bounds[i][j]==null) {
499     System.out.printf("%11s","null");
500     } else {
501     System.out.print(bounds[i][j]);
502     }
503     }
504     System.out.println();
505     }
506     System.out.println();
507     }
508    
509     private static void printViolations (Matrix matrixEmbedded, Bound[][] bounds) {
510     int count = 0;
511     for (int i=0;i<matrixEmbedded.getRowDimension();i++) {
512     for (int j=i+1;j<matrixEmbedded.getColumnDimension();j++) {
513     if ((matrixEmbedded.get(i,j)<bounds[i][j].lower) || (matrixEmbedded.get(i,j)>bounds[i][j].upper)) {
514     System.out.printf("%3d %3d %4.1f %s\n",i,j,matrixEmbedded.get(i,j),bounds[i][j].toString());
515     count++;
516     }
517     }
518     }
519     int cells = (matrixEmbedded.getRowDimension()*(matrixEmbedded.getRowDimension()-1))/2;
520     System.out.println("Number of violations: "+count+" out of "+cells+" cells in half matrix");
521     }
522    
523     private static int getNumberViolations(Matrix matrixEmbedded, Bound[][] bounds) {
524     int count = 0;
525     for (int i=0;i<matrixEmbedded.getRowDimension();i++) {
526     for (int j=i+1;j<matrixEmbedded.getColumnDimension();j++) {
527     if ((matrixEmbedded.get(i,j)<bounds[i][j].lower) || (matrixEmbedded.get(i,j)>bounds[i][j].upper)) {
528     count++;
529     }
530     }
531     }
532     return count;
533     }
534    
535     private static void printMatrix(Matrix matrix) {
536     for (int i=0;i<matrix.getRowDimension();i++) {
537     for (int j=0;j<matrix.getColumnDimension();j++) {
538     System.out.printf("%4.1f ",matrix.get(i, j));
539     }
540     System.out.println();
541     }
542     }
543    
544 duarte 796 /*-------------------------- main -------------------------------*/
545    
546     /**
547     * To test the class
548     */
549     public static void main (String[] args) throws Exception {
550 duarte 802 boolean debug = false;
551     boolean writeFiles = false;
552 duarte 804 int numModels = 10;
553 duarte 802 Embedder.ScalingMethod scalingMethod = Embedder.ScalingMethod.AVRG_INTER_CA_DIST;
554 duarte 804 boolean metrize = true; // if true metrization performed, if false random sampling
555 duarte 802
556 duarte 798 String pdbCode = "1bxy";
557     String pdbChainCode = "A";
558 duarte 802 String ct = "Ca";
559     double cutoff = 8.0;
560 duarte 798
561     Pdb pdb = new PdbasePdb(pdbCode);
562     pdb.load(pdbChainCode);
563     Pdb pdbEmbedded = new PdbasePdb(pdbCode);
564     pdbEmbedded.load(pdbChainCode);
565    
566 duarte 802 RIGraph graph = pdb.get_graph(ct, cutoff);
567 duarte 796 BoundsSmoother bs = new BoundsSmoother(graph);
568 duarte 804 Bound[][] initialBoundsAllPairs = bs.getBoundsAllPairs();
569 duarte 803
570 duarte 798 if (debug) {
571 duarte 804 // all pairs bounds after triangle inequality
572     printBounds(initialBoundsAllPairs);
573 duarte 796 }
574 duarte 802
575 duarte 803
576 duarte 802 System.out.printf("%6s\t%6s\t%6s", "rmsd","rmsdm","viols");
577 duarte 796 System.out.println();
578 duarte 798 for (int model=0;model<numModels;model++) {
579 duarte 803
580 duarte 804 Matrix matrix = null;
581     if (!metrize) {
582     matrix = bs.sampleBounds();
583     } else {
584     matrix = bs.metrize();
585     }
586    
587     if (debug) {
588 duarte 803 // bounds after metrization
589 duarte 804 bs.printBounds();
590     }
591 duarte 803
592 duarte 798 if (debug) {
593 duarte 802 printMatrix(matrix);
594 duarte 796 }
595 duarte 798
596 duarte 802 Embedder embedder = new Embedder(matrix);
597     Vector3d[] embedding = embedder.embed(scalingMethod);
598 duarte 798 pdbEmbedded.setAtomsCoords(embedding, "CA");
599    
600     double rmsd = pdb.rmsd(pdbEmbedded, "Ca");
601     pdb.mirror();
602     double rmsdm = pdb.rmsd(pdbEmbedded, "Ca");
603 duarte 800
604 duarte 799 if (rmsd>rmsdm ) {
605     pdbEmbedded.mirror();
606     }
607 duarte 800
608 duarte 802 if (writeFiles)
609 duarte 804 pdbEmbedded.dump2pdbfile("/project/StruPPi/jose/embed/embed_"+pdbCode+pdbChainCode+"_"+model+".pdb");
610 duarte 802
611     Matrix matrixEmbedded = pdbEmbedded.calculateDistMatrix("Ca");
612 duarte 799
613 duarte 804 System.out.printf("%6.3f\t%6.3f\t%6d",rmsd,rmsdm,getNumberViolations(matrixEmbedded, initialBoundsAllPairs));
614 duarte 796 System.out.println();
615 duarte 799
616 duarte 800 if (debug) {
617 duarte 804 printViolations(matrixEmbedded, initialBoundsAllPairs);
618 duarte 799 }
619 duarte 796 }
620     }
621    
622     }