ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/GraphAverager.java
Revision: 379
Committed: Fri Nov 2 14:22:54 2007 UTC (16 years, 11 months ago) by stehr
File size: 4999 byte(s)
Log Message:
redesign of TinkerRunner: added 2 reconstruct methods, one which generates normal tinker output for a given number of models, and one fully automatic one which simply returns one pdb object;
updated reconstruct.java to make use of the new TinkerRunner method;
updated GraphAverager: now assigning edgetype and cutoff to consensus graph

Line File contents
1 package proteinstructure;
2
3 import java.util.TreeMap;
4
5 public class GraphAverager {
6
7 private Alignment al;
8 private TreeMap<String,Graph> templateGraphs;
9 private String targetTag;
10 private int numTemplates;
11 private String sequence; // sequence of the final consensus graph
12 private String contactType; // contact type of the final consensus graph
13 private double distCutoff; // cutoff of the final consensus graph
14
15 private TreeMap<Edge,Integer> contactVotes;
16
17 public GraphAverager(String sequence, Alignment al, TreeMap<String,Graph> templateGraphs, String targetTag) {
18 this.al = al;
19 this.templateGraphs = templateGraphs;
20 this.targetTag = targetTag;
21 this.sequence = sequence;
22 Graph firstGraph = templateGraphs.get(templateGraphs.firstKey());
23 this.contactType = firstGraph.getContactType();
24 this.distCutoff = firstGraph.getCutoff();
25
26 this.numTemplates = templateGraphs.size();
27 checkSequences();
28
29 countVotes(); // does the averaging by counting the votes and putting them into contactVotes
30
31 }
32
33 /**
34 * Checks that tags and sequences are consistent between this.al and this.templateGraphs and between this.al and this.graph/this.targetTag
35 *
36 */
37 private void checkSequences(){
38 if (!al.hasTag(targetTag)){
39 System.err.println("Alignment doesn't seem to contain the target sequence, check the FASTA tags");
40 //TODO throw exception
41 }
42 for (String tag:templateGraphs.keySet()){
43 if (!al.hasTag(tag)){
44 System.err.println("Alignment is missing template sequence "+tag+", check the FASTA tags");
45 // TODO throw exception
46 }
47 }
48 if (templateGraphs.size()!=al.getNumberOfSequences()-1){
49 System.err.println("Number of sequences in alignment is different from number of templates +1 ");
50 // TODO throw exception
51 }
52 if(!al.getSequenceNoGaps(targetTag).equals(this.sequence)) {
53 System.err.println("Target sequence in alignment does not match sequence in target graph");
54 // TODO throw exception
55 }
56 for (String tag:templateGraphs.keySet()){
57 if(!al.getSequenceNoGaps(tag).equals(templateGraphs.get(tag).getSequence())) {
58 System.err.println("Sequence of template graph "+tag+" does not match sequence in alignment");
59 // TODO throw exception
60 }
61 }
62 }
63
64 /**
65 * Counts the votes for each possible alignment edge and puts all the votes in contactVotes TreeMap
66 *
67 */
68 private void countVotes() {
69
70 contactVotes = new TreeMap<Edge, Integer>();
71
72 // we go through all positions in the alignment
73 for (int i=0; i<al.getAlignmentLength(); i++){
74 for (int j=0; j<al.getAlignmentLength(); j++) {
75
76 int vote = 0;
77 // scanning all templates to see if they have this contact
78 for (String tag:templateGraphs.keySet()){
79 Edge thisGraphCont = new Edge(al.al2seq(tag, i),al.al2seq(tag, j));
80 if (templateGraphs.get(tag).containsContact(thisGraphCont)) {
81 vote++;
82 }
83 }
84 // putting vote in contactVotes TreeMap
85 if (vote>0){
86 contactVotes.put(new Edge(i,j), vote);
87 }
88 }
89 }
90 }
91
92 /**
93 * Calculates the consensus graph from the set of template graphs. An edge is contained
94 * in the consensus graph if the fractions of template graphs it is contained in is above
95 * the given threshold.
96 * The output is a new Graph object created from the given sequence in the constructor
97 * to which we add the averaged edges.
98 * @param threshold the threshold above which an edge is taken to be a consensus edge
99 * @return the new graph
100 */
101 public Graph doAveraging(double threshold) {
102
103 Graph graph = new Graph(this.sequence);
104 graph.setContactType(this.contactType);
105 graph.setCutoff(this.distCutoff);
106
107 // if vote above threshold we take the contact for our target
108 int voteThreshold = (int) Math.ceil((double)numTemplates*threshold); // i.e. round up of 50%, 40% or 30% (depends on threshold given)
109 for (Edge alignCont:contactVotes.keySet()){
110 if (contactVotes.get(alignCont)>=voteThreshold) {
111 Edge targetGraphCont = new Edge(al.al2seq(targetTag,alignCont.i),al.al2seq(targetTag,alignCont.j));
112 if (targetGraphCont.i!=-1 && targetGraphCont.j!=-1){ // we can't add contacts that map to gaps!!
113 graph.addEdge(targetGraphCont);
114 }
115 }
116 }
117 return graph;
118 }
119
120 /**
121 * Returns a graph containing the union of edges of the template graphs weighted by the fraction of occurance in the templates.
122 * The sequence of the graph is initalized to the sequence of the template.
123 * @return
124 */
125 public Graph getAverageGraph() {
126 Graph graph = new Graph(this.sequence);
127 graph.setContactType(this.contactType);
128 graph.setCutoff(this.distCutoff);
129
130 for (Edge alignCont:contactVotes.keySet()){
131 double weight = 1.0 * contactVotes.get(alignCont) / numTemplates;
132 Edge targetGraphCont = new Edge(al.al2seq(targetTag,alignCont.i),al.al2seq(targetTag,alignCont.j), weight);
133 if (targetGraphCont.i!=-1 && targetGraphCont.j!=-1){ // we can't add contacts that map to gaps!!
134 graph.addEdge(targetGraphCont);
135 }
136
137 }
138 return graph;
139 }
140 }