ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/embed/BoundsSmoother.java
Revision: 797
Committed: Wed Nov 12 16:13:39 2008 UTC (15 years, 11 months ago) by duarte
File size: 17191 byte(s)
Log Message:
Lower bounds bounds-smoothing now also implemented (using Dijkstra's shortest path algorithm and offsetting to avoid negative weights).
Line User Rev File contents
1 duarte 796 package embed;
2    
3 duarte 797 import java.util.ArrayList;
4     import java.util.Collections;
5     import java.util.HashMap;
6 duarte 796 import java.util.Random;
7     import java.util.TreeMap;
8    
9     import javax.vecmath.Vector3d;
10    
11     import org.apache.commons.collections15.Transformer;
12    
13     import Jama.Matrix;
14    
15     import edu.uci.ics.jung.algorithms.shortestpath.DijkstraDistance;
16 duarte 797 import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath;
17 duarte 796 import edu.uci.ics.jung.graph.SparseGraph;
18     import edu.uci.ics.jung.graph.util.EdgeType;
19     import edu.uci.ics.jung.graph.util.Pair;
20     import proteinstructure.AAinfo;
21     import proteinstructure.Pdb;
22     import proteinstructure.PdbasePdb;
23     import proteinstructure.RIGEdge;
24     import proteinstructure.RIGNode;
25     import proteinstructure.RIGraph;
26    
27     /**
28     * Implementation of the bounds smoothing part of the EMBED algorithm of Crippen and Havel
29     * Given a sparse set of distance ranges between a set of atoms it finds distance bounds for
30     * all pairs of atoms, using the triangle inequality.
31     *
32     * Taken from "Distance Geometry: Theory, Algorithms, and Chemical Applications" (section 3.1) by T.F. Havel,
33     * in Encyclopedia of Computational Chemistry (Wiley, New York, 1998).
34     * See also "Distance Geometry and Molecular Conformation" (Chapter 5) by G.M. Crippen and T.F. Havel (Wiley)
35     *
36     * @author duarte
37     *
38     */
39     public class BoundsSmoother {
40    
41    
42     /*----------------- helper classes and transformers -----------------*/
43    
44 duarte 797 Transformer<SimpleEdge, Number> WeightTransformer = new Transformer<SimpleEdge, Number>() {
45     public Number transform(SimpleEdge input) {
46 duarte 796 return input.weight;
47     }
48     };
49    
50     private class SimpleEdge {
51     public double weight;
52     public SimpleEdge(double weight) {
53     this.weight = weight;
54     }
55     }
56 duarte 797
57     private class BoundsDigraphNode {
58     public static final boolean LEFT = true;
59     public static final boolean RIGHT = false;
60     private boolean side; // true: left, false: right
61     private int resSerial;
62     public BoundsDigraphNode(int resSerial, boolean side) {
63     this.resSerial = resSerial;
64     this.side = side;
65     }
66     public boolean isRight() {
67     return !side;
68     }
69     public boolean isLeft() {
70     return side;
71     }
72     public int getSerial() {
73     return resSerial;
74     }
75     public boolean equals(Object other) {
76     if (! (other instanceof BoundsDigraphNode)) return false;
77     BoundsDigraphNode otherNode = (BoundsDigraphNode)other;
78     if (otherNode.resSerial==this.resSerial && otherNode.side == this.side) {
79     return true;
80     } else {
81     return false;
82     }
83     }
84     public String toString() {
85     return resSerial+(side?"L":"R");
86     }
87     }
88 duarte 796
89     private class Bound {
90     public double lower;
91     public double upper;
92     public Bound(double lower, double upper) {
93     this.lower = lower;
94     this.upper = upper;
95     }
96     public String toString() {
97     return String.format("[%4.1f %4.1f]", lower, upper);
98     }
99     }
100    
101     /*---------------------- member variables ----------------------------*/
102    
103     private SparseGraph<Integer,Bound> boundsGraph;
104     private RIGraph rig;
105     private TreeMap<Integer,Integer> matIdx2Resser;
106     private int conformationSize;
107 duarte 797 private HashMap<Boolean, HashMap<Integer,BoundsDigraphNode>> nodesBoundsDigraph; // map of serial/side to nodes in the bounds digraph
108     private double lmax; // maximum of the lower bounds: offset value for the boundsDigraph (not to have negative weights so that we can use Dijkstra's algo)
109 duarte 796
110     /*------------------------ constructors ------------------------------*/
111    
112     /**
113     * Constructs a new BoundsSmoother object given a RIGraph.
114     * The RIGraph is converted into a set of distance ranges using the cutoff
115     * as upper limit and hard-spheres as lower limit.
116     * @param graph
117     */
118     public BoundsSmoother(RIGraph graph) {
119     this.rig = graph;
120     this.boundsGraph = convertRIGraphToDistRangeGraph(graph);
121     this.conformationSize = this.rig.getObsLength();
122     }
123    
124     /*----------------------- public methods ----------------------------*/
125    
126     /**
127     * Computes bounds for all pairs, based on the set of sparse distance ranges
128     * @return a 2-dimensional array with the bounds for all pairs of residues, the
129     * indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)}
130     * and are guaranteed to be in the same order as the residue serials.
131     */
132     public Bound[][] getBoundsAllPairs() {
133     Bound[][] bounds = new Bound[conformationSize][conformationSize];
134     double[][] lowerBounds = getLowerBoundsAllPairs();
135     double[][] upperBounds = getUpperBoundsAllPairs();
136     for (int i=0;i<lowerBounds.length;i++) {
137     for (int j=0;j<lowerBounds[i].length;j++) {
138 duarte 797 // we fill in the lower half of the matrix which was missing from upperBounds/lowerBounds
139 duarte 796 double upperBound = upperBounds[i][j];
140 duarte 797 double lowerBound = lowerBounds[i][j];
141     if (i>j) {
142     upperBound = upperBounds[j][i];
143     lowerBound = lowerBounds[j][i];
144     }
145     bounds[i][j]=new Bound(lowerBound,upperBound);
146     // sanity check: lower bounds can be bigger than upper bounds!, i<j condition is only to do half of the matrix
147     if (i<j && lowerBound>lowerBound) {
148     System.err.println("Warning: lower bound ("+lowerBound+") for pair "+i+" "+j+" is bigger than upper bound ("+upperBound+")");
149     }
150 duarte 796 }
151     }
152     return bounds;
153     }
154    
155     /**
156     * Gets a random sample from a matrix of all pairs distance ranges
157     * @param bounds the matrix of all pairs distance ranges
158     * @return
159     */
160     public Matrix sampleBounds(Bound[][] bounds) {
161     double[][] matrix = new double[conformationSize][conformationSize];
162     for (int i=0;i<conformationSize;i++) {
163     for (int j=0;j<conformationSize;j++) {
164     if (j>i) {
165     Random rand = new Random();
166     matrix[i][j]= bounds[i][j].lower+rand.nextDouble()*(bounds[i][j].upper-bounds[i][j].lower);
167     } else if (j<i) {
168     matrix[i][j]=matrix[j][i];
169     }
170     }
171     }
172     return new Matrix(matrix);
173     }
174    
175     /**
176     * Maps from residue serials to indices of the matrices returned by {@link #getBoundsAllPairs()} and
177     * {@link #sampleBounds(Bound[][])}
178     * @param idx
179     * @return
180     */
181     public int getResserFromIdx(int idx) {
182     return matIdx2Resser.get(idx);
183     }
184    
185     /*----------------------- private methods ---------------------------*/
186    
187     /**
188     * Computes upper bounds for all pairs from a sparse set of upper bounds based
189     * on the triangle inequality.
190     * The computation is actually just an all pairs shortest path using Dijkstra's
191     * shortest path algorithm (as the set of distance coming from contact maps is
192     * very sparse this algorithm should be more efficient than Floyd's: Dijkstra's is
193     * o(nm logn) and Floyd's is o(n3))
194     * @return a 2-dimensional array with the upper bounds for all pairs of residues, the
195     * indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)}
196     * and are guaranteed to be in the same order as the residue serials.
197     */
198     private double[][] getUpperBoundsAllPairs() {
199     this.matIdx2Resser = new TreeMap<Integer,Integer>();
200     double[][] upperBoundsMatrix = new double[conformationSize][conformationSize];
201     SparseGraph<Integer,SimpleEdge> upperBoundGraph = convertBoundsGraphToUpperBoundGraph(boundsGraph);
202     DijkstraDistance<Integer, SimpleEdge> dd = new DijkstraDistance<Integer, SimpleEdge>(upperBoundGraph,WeightTransformer);
203     int iMatIdx = 0;
204     for (int i:rig.getSerials()) {
205     int jMatIdx = 0;
206     for (int j:rig.getSerials()) {
207     if (jMatIdx>iMatIdx) {
208     upperBoundsMatrix[iMatIdx][jMatIdx] = dd.getDistance(i, j).doubleValue();
209     }
210     jMatIdx++;
211     }
212     this.matIdx2Resser.put(iMatIdx,i);
213     iMatIdx++;
214     }
215     return upperBoundsMatrix;
216     }
217    
218     /**
219 duarte 797 * Computes lower bounds for all pairs given a sparse set of upper/lower bounds based on the triangle inequality.
220 duarte 796 *
221 duarte 797 * NOTE that because we use Dijkstra's algorithm for the computation of the shortest paths, we can't use negative
222     * weights (see http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm). Because of this the boundsDigraph result
223     * of calling {@link #convertBoundsGraphToBoundsDigraph(SparseGraph)}} have values offset with the maximum lower bound (lmax).
224     * Thus after computing the shortest paths we have to revert back that offset by counting the number of hops the shortest path has.
225     *
226 duarte 796 * @return a 2-dimensional array with the lower bounds for all pairs of residues, the
227     * indices of the array can be mapped to residue serials through {@link #getResserFromIdx(int)}
228     * and are guaranteed to be in the same order as the residue serials.
229 duarte 797 *
230     * @see {@link #convertBoundsGraphToBoundsDigraph(SparseGraph)} and {@link #getUpperBoundsAllPairs()}
231 duarte 796 */
232     private double[][] getLowerBoundsAllPairs() {
233 duarte 797 double[][] lowerBoundsMatrix = new double[conformationSize][conformationSize];
234     // this is the bounds digraph as described by Crippen and Havel
235     SparseGraph<BoundsDigraphNode,SimpleEdge> boundsDigraph = convertBoundsGraphToBoundsDigraph(boundsGraph);
236     DijkstraShortestPath<BoundsDigraphNode, SimpleEdge> dd = new DijkstraShortestPath<BoundsDigraphNode, SimpleEdge>(boundsDigraph,WeightTransformer);
237     int iMatIdx = 0;
238     for (int i:rig.getSerials()) {
239     int jMatIdx = 0;
240     for (int j:rig.getSerials()) {
241     if (jMatIdx>iMatIdx) {
242     int hops = dd.getPath(nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i), nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j)).size();
243     lowerBoundsMatrix[iMatIdx][jMatIdx] = Math.abs(
244     (dd.getDistance(nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(i),
245     nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(j)
246     ).doubleValue())
247     - (hops*lmax)); // the lower limit for the triangle inequality is: Math.abs(shortestpath-(hops*lmax))
248 duarte 796 }
249 duarte 797 jMatIdx++;
250 duarte 796 }
251 duarte 797 iMatIdx++;
252 duarte 796 }
253 duarte 797 return lowerBoundsMatrix;
254 duarte 796
255     }
256    
257     /**
258     * Converts the bounds graph to a graph with only the upper bounds: nodes residue
259     * serials, edges upper bounds (in SimpleEdge objects containing the upper bound value
260     * in their weight field).
261     * @param distanceGraph
262     * @return
263     */
264     private SparseGraph<Integer,SimpleEdge> convertBoundsGraphToUpperBoundGraph(SparseGraph<Integer,Bound> distanceGraph) {
265     SparseGraph<Integer,SimpleEdge> upperBoundGraph = new SparseGraph<Integer, SimpleEdge>();
266     for (Bound bounds:distanceGraph.getEdges()) {
267     Pair<Integer> pair = distanceGraph.getEndpoints(bounds);
268     upperBoundGraph.addEdge(new SimpleEdge(bounds.upper), pair.getFirst(), pair.getSecond(), EdgeType.UNDIRECTED);
269     }
270     return upperBoundGraph;
271     }
272 duarte 797
273 duarte 796 /**
274 duarte 797 * Constructs a bounds digraph (as described by Crippen and Havel) to compute the triangle inequality limits
275     * for the lower bounds.
276     * The graph is composed by 2 subgraphs (we call them left and right) each of them containing the set of all atoms.
277     * Within the subgraphs there is an undirected edge between atoms i,j with weight the upper bounds for i,j
278     * Between the subgraphs there is a directed edge from left to right between atoms i(left) to j(right)
279     * with weight the negative of the lower bound i,j
280     * NOTE that because we use Dijkstra's algorithm for the computation of the shortest paths, we can't use negative
281     * weights (see http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm). Thus we offset the values here to the maximum lower bound.
282     * After computing the shortest paths we have to revert back that offset by counting the number of hops the shortest path has.
283     * @param distanceGraph
284     * @return
285     */
286     private SparseGraph<BoundsDigraphNode,SimpleEdge> convertBoundsGraphToBoundsDigraph(SparseGraph<Integer,Bound> distanceGraph) {
287     // to do the offset thing (see docs above) we need to know first of all the max lower bound
288     ArrayList<Double> lowerBounds = new ArrayList<Double>();
289     for (Bound bounds:distanceGraph.getEdges()) {
290     lowerBounds.add(bounds.lower);
291     }
292     lmax = Collections.max(lowerBounds); // this is the offset value
293    
294     SparseGraph<BoundsDigraphNode,SimpleEdge> boundsDigraph = new SparseGraph<BoundsDigraphNode, SimpleEdge>();
295     // we have to store all nodes in a HashMap, so we can retrieve them by residue serial and side after
296     nodesBoundsDigraph = new HashMap<Boolean, HashMap<Integer,BoundsDigraphNode>>();
297     nodesBoundsDigraph.put(BoundsDigraphNode.LEFT , new HashMap<Integer, BoundsDigraphNode>());
298     nodesBoundsDigraph.put(BoundsDigraphNode.RIGHT, new HashMap<Integer, BoundsDigraphNode>());
299     // first we create the nodes and store them into the HashMap
300     for (int i:distanceGraph.getVertices()) {
301     BoundsDigraphNode leftNode = new BoundsDigraphNode(i, BoundsDigraphNode.LEFT);
302     BoundsDigraphNode rightNode = new BoundsDigraphNode(i, BoundsDigraphNode.RIGHT);
303     boundsDigraph.addVertex(leftNode);
304     boundsDigraph.addVertex(rightNode);
305     nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).put(i,leftNode);
306     nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).put(i,rightNode);
307     }
308    
309     for (Bound bounds:distanceGraph.getEdges()) {
310     Pair<Integer> pair = distanceGraph.getEndpoints(bounds);
311     // first we add the upper bounds as undirected edges to the 2 subgraphs (left and right)
312     boundsDigraph.addEdge(new SimpleEdge(lmax+bounds.upper),
313     nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(pair.getFirst()),
314     nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(pair.getSecond()),
315     EdgeType.UNDIRECTED);
316     boundsDigraph.addEdge(new SimpleEdge(lmax+bounds.upper),
317     nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(pair.getFirst()),
318     nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(pair.getSecond()),
319     EdgeType.UNDIRECTED);
320     // then we add the negative of the lower bounds as directed edges connecting nodes of subgraph left to subgraph right
321     boundsDigraph.addEdge(new SimpleEdge(lmax-bounds.lower),
322     nodesBoundsDigraph.get(BoundsDigraphNode.LEFT).get(pair.getFirst()),
323     nodesBoundsDigraph.get(BoundsDigraphNode.RIGHT).get(pair.getSecond()),
324     EdgeType.DIRECTED);
325     }
326     return boundsDigraph;
327     }
328    
329     /**
330 duarte 796 * Convert the given RIGraph to a bounds graph: residue serials as nodes and distance bounds as edges.
331     * Will only admit single atom contact type RIGraphs
332     * @param graph
333     * @return
334     * @throws IllegalArgumentException if contact type of given RIGraph is not a single atom contact type
335     */
336     private SparseGraph<Integer,Bound> convertRIGraphToDistRangeGraph(RIGraph graph) {
337     // code cloned from ConstraintsMaker.createDistanceConstraints with some modifications
338    
339     SparseGraph<Integer, Bound> distanceGraph = new SparseGraph<Integer, Bound>();
340    
341     double cutoff = graph.getCutoff();
342     String ct = graph.getContactType();
343     String i_ct = ct;
344     String j_ct = ct;
345     if (ct.contains("/")){
346     i_ct = ct.split("/")[0];
347     j_ct = ct.split("/")[1];
348     }
349    
350     if (!AAinfo.isValidSingleAtomContactType(i_ct) || !AAinfo.isValidSingleAtomContactType(j_ct)){
351     throw new IllegalArgumentException("Contact type "+i_ct+" or "+j_ct+" is not valid for reconstruction");
352     }
353    
354     for (RIGEdge cont:graph.getEdges()){
355     Pair<RIGNode> pair = graph.getEndpoints(cont);
356     String i_res = pair.getFirst().getResidueType();
357     String j_res = pair.getSecond().getResidueType();
358    
359     // as dist_min we take the average of the two dist mins, if i_ct and j_ct are the same then this will be the same as dist_min for ct
360     double dist_min = (AAinfo.getLowerBoundDistance(i_ct,i_res,j_res)+AAinfo.getLowerBoundDistance(j_ct,i_res,j_res))/2;
361     // for single atom contact types getUpperBoundDistance and getLowerBoundDistance will return 0 thus for those cases dist_max = cutoff
362     double dist_max = AAinfo.getUpperBoundDistance(i_ct, i_res, j_res)/2+AAinfo.getUpperBoundDistance(i_ct, i_res, j_res)/2+cutoff;
363    
364     Bound bounds = new Bound(dist_min, dist_max);
365     distanceGraph.addEdge(bounds, pair.getFirst().getResidueSerial(), pair.getSecond().getResidueSerial(), EdgeType.UNDIRECTED);
366     }
367     return distanceGraph;
368     }
369    
370     /*-------------------------- main -------------------------------*/
371    
372     /**
373     * To test the class
374     */
375     public static void main (String[] args) throws Exception {
376     Pdb pdb = new PdbasePdb("1bxy");
377     pdb.load("A");
378     RIGraph graph = pdb.get_graph("Ca", 8);
379     BoundsSmoother bs = new BoundsSmoother(graph);
380     Bound[][] bounds = bs.getBoundsAllPairs();
381     for (int i=0;i<bounds.length;i++) {
382     for (int j=0;j<bounds[i].length;j++) {
383     System.out.print(bounds[i][j]);
384     }
385     System.out.println();
386     }
387     System.out.println();
388    
389     Matrix matrix = bs.sampleBounds(bounds);
390     for (int i=0;i<matrix.getRowDimension();i++) {
391     for (int j=0;j<matrix.getColumnDimension();j++) {
392     System.out.printf("%4.1f ",matrix.get(i, j));
393     }
394     System.out.println();
395     }
396     int size = pdb.get_length();
397     Embedder embedder = new Embedder(matrix,Embedder.createTrivialVector(1.0, size), Embedder.createTrivialVector(1.0, size));
398     Vector3d[] embedding = embedder.embed();
399     Vector3d[] originalConformation = new Vector3d[size];
400     for (int i=0;i<size;i++) {
401     originalConformation[i]=new Vector3d(pdb.getAtomCoord(bs.getResserFromIdx(i), "CA"));
402     }
403    
404     double rmsd = Pdb.calculate_rmsd(originalConformation, embedding);
405     for (int i=0;i<originalConformation.length;i++){
406     originalConformation[i].scale(-1);
407     }
408     double rmsdm = Pdb.calculate_rmsd(originalConformation, embedding);
409     System.out.println("rmsd of embedded to original conformation: "+rmsd);
410     System.out.println("rmsd of embedded to mirrored original conformation: "+rmsdm);
411    
412     }
413    
414     }