ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/GraphAverager.java
Revision: 379
Committed: Fri Nov 2 14:22:54 2007 UTC (16 years, 11 months ago) by stehr
File size: 4999 byte(s)
Log Message:
redesign of TinkerRunner: added 2 reconstruct methods, one which generates normal tinker output for a given number of models, and one fully automatic one which simply returns one pdb object;
updated reconstruct.java to make use of the new TinkerRunner method;
updated GraphAverager: now assigning edgetype and cutoff to consensus graph

Line User Rev File contents
1 duarte 329 package proteinstructure;
2    
3     import java.util.TreeMap;
4    
5     public class GraphAverager {
6    
7     private Alignment al;
8     private TreeMap<String,Graph> templateGraphs;
9     private String targetTag;
10 duarte 331 private int numTemplates;
11 stehr 379 private String sequence; // sequence of the final consensus graph
12     private String contactType; // contact type of the final consensus graph
13     private double distCutoff; // cutoff of the final consensus graph
14 duarte 329
15 duarte 331 private TreeMap<Edge,Integer> contactVotes;
16    
17 stehr 337 public GraphAverager(String sequence, Alignment al, TreeMap<String,Graph> templateGraphs, String targetTag) {
18 duarte 329 this.al = al;
19     this.templateGraphs = templateGraphs;
20     this.targetTag = targetTag;
21 stehr 337 this.sequence = sequence;
22 stehr 379 Graph firstGraph = templateGraphs.get(templateGraphs.firstKey());
23     this.contactType = firstGraph.getContactType();
24     this.distCutoff = firstGraph.getCutoff();
25 duarte 331
26     this.numTemplates = templateGraphs.size();
27 duarte 329 checkSequences();
28    
29 duarte 331 countVotes(); // does the averaging by counting the votes and putting them into contactVotes
30    
31 duarte 329 }
32    
33 duarte 331 /**
34     * Checks that tags and sequences are consistent between this.al and this.templateGraphs and between this.al and this.graph/this.targetTag
35     *
36     */
37 duarte 329 private void checkSequences(){
38     if (!al.hasTag(targetTag)){
39     System.err.println("Alignment doesn't seem to contain the target sequence, check the FASTA tags");
40     //TODO throw exception
41     }
42     for (String tag:templateGraphs.keySet()){
43     if (!al.hasTag(tag)){
44     System.err.println("Alignment is missing template sequence "+tag+", check the FASTA tags");
45     // TODO throw exception
46     }
47     }
48     if (templateGraphs.size()!=al.getNumberOfSequences()-1){
49     System.err.println("Number of sequences in alignment is different from number of templates +1 ");
50     // TODO throw exception
51     }
52 stehr 337 if(!al.getSequenceNoGaps(targetTag).equals(this.sequence)) {
53 duarte 329 System.err.println("Target sequence in alignment does not match sequence in target graph");
54     // TODO throw exception
55     }
56     for (String tag:templateGraphs.keySet()){
57     if(!al.getSequenceNoGaps(tag).equals(templateGraphs.get(tag).getSequence())) {
58     System.err.println("Sequence of template graph "+tag+" does not match sequence in alignment");
59     // TODO throw exception
60     }
61     }
62     }
63    
64 duarte 331 /**
65     * Counts the votes for each possible alignment edge and puts all the votes in contactVotes TreeMap
66     *
67     */
68     private void countVotes() {
69 duarte 329
70 duarte 331 contactVotes = new TreeMap<Edge, Integer>();
71 duarte 329
72 duarte 331 // we go through all positions in the alignment
73 duarte 329 for (int i=0; i<al.getAlignmentLength(); i++){
74     for (int j=0; j<al.getAlignmentLength(); j++) {
75    
76     int vote = 0;
77 duarte 331 // scanning all templates to see if they have this contact
78 duarte 329 for (String tag:templateGraphs.keySet()){
79     Edge thisGraphCont = new Edge(al.al2seq(tag, i),al.al2seq(tag, j));
80     if (templateGraphs.get(tag).containsContact(thisGraphCont)) {
81     vote++;
82     }
83     }
84     // putting vote in contactVotes TreeMap
85     if (vote>0){
86     contactVotes.put(new Edge(i,j), vote);
87     }
88     }
89 duarte 331 }
90     }
91    
92     /**
93 stehr 334 * Calculates the consensus graph from the set of template graphs. An edge is contained
94     * in the consensus graph if the fractions of template graphs it is contained in is above
95 duarte 345 * the given threshold.
96     * The output is a new Graph object created from the given sequence in the constructor
97 stehr 360 * to which we add the averaged edges.
98 stehr 334 * @param threshold the threshold above which an edge is taken to be a consensus edge
99 stehr 360 * @return the new graph
100 duarte 331 */
101 stehr 337 public Graph doAveraging(double threshold) {
102    
103     Graph graph = new Graph(this.sequence);
104 stehr 379 graph.setContactType(this.contactType);
105     graph.setCutoff(this.distCutoff);
106 stehr 337
107 duarte 329 // if vote above threshold we take the contact for our target
108     int voteThreshold = (int) Math.ceil((double)numTemplates*threshold); // i.e. round up of 50%, 40% or 30% (depends on threshold given)
109     for (Edge alignCont:contactVotes.keySet()){
110     if (contactVotes.get(alignCont)>=voteThreshold) {
111     Edge targetGraphCont = new Edge(al.al2seq(targetTag,alignCont.i),al.al2seq(targetTag,alignCont.j));
112     if (targetGraphCont.i!=-1 && targetGraphCont.j!=-1){ // we can't add contacts that map to gaps!!
113     graph.addEdge(targetGraphCont);
114     }
115     }
116     }
117 stehr 337 return graph;
118 duarte 329 }
119    
120 stehr 360 /**
121     * Returns a graph containing the union of edges of the template graphs weighted by the fraction of occurance in the templates.
122     * The sequence of the graph is initalized to the sequence of the template.
123     * @return
124     */
125     public Graph getAverageGraph() {
126     Graph graph = new Graph(this.sequence);
127 stehr 379 graph.setContactType(this.contactType);
128     graph.setCutoff(this.distCutoff);
129    
130 stehr 360 for (Edge alignCont:contactVotes.keySet()){
131     double weight = 1.0 * contactVotes.get(alignCont) / numTemplates;
132     Edge targetGraphCont = new Edge(al.al2seq(targetTag,alignCont.i),al.al2seq(targetTag,alignCont.j), weight);
133     if (targetGraphCont.i!=-1 && targetGraphCont.j!=-1){ // we can't add contacts that map to gaps!!
134     graph.addEdge(targetGraphCont);
135     }
136    
137     }
138     return graph;
139     }
140 duarte 329 }