1 |
package proteinstructure; |
2 |
|
3 |
import java.util.TreeMap; |
4 |
|
5 |
public class GraphAverager { |
6 |
|
7 |
private Alignment al; |
8 |
private TreeMap<String,Graph> templateGraphs; |
9 |
private String targetTag; |
10 |
private int numTemplates; |
11 |
private String sequence; // sequence of the final consensus graph |
12 |
private String contactType; // contact type of the final consensus graph |
13 |
private double distCutoff; // cutoff of the final consensus graph |
14 |
|
15 |
private TreeMap<Edge,Integer> contactVotes; |
16 |
|
17 |
public GraphAverager(String sequence, Alignment al, TreeMap<String,Graph> templateGraphs, String targetTag) { |
18 |
this.al = al; |
19 |
this.templateGraphs = templateGraphs; |
20 |
this.targetTag = targetTag; |
21 |
this.sequence = sequence; |
22 |
Graph firstGraph = templateGraphs.get(templateGraphs.firstKey()); |
23 |
this.contactType = firstGraph.getContactType(); |
24 |
this.distCutoff = firstGraph.getCutoff(); |
25 |
|
26 |
this.numTemplates = templateGraphs.size(); |
27 |
checkSequences(); |
28 |
|
29 |
countVotes(); // does the averaging by counting the votes and putting them into contactVotes |
30 |
|
31 |
} |
32 |
|
33 |
/** |
34 |
* Checks that tags and sequences are consistent between this.al and this.templateGraphs and between this.al and this.graph/this.targetTag |
35 |
* |
36 |
*/ |
37 |
private void checkSequences(){ |
38 |
if (!al.hasTag(targetTag)){ |
39 |
System.err.println("Alignment doesn't seem to contain the target sequence, check the FASTA tags"); |
40 |
//TODO throw exception |
41 |
} |
42 |
for (String tag:templateGraphs.keySet()){ |
43 |
if (!al.hasTag(tag)){ |
44 |
System.err.println("Alignment is missing template sequence "+tag+", check the FASTA tags"); |
45 |
// TODO throw exception |
46 |
} |
47 |
} |
48 |
if (templateGraphs.size()!=al.getNumberOfSequences()-1){ |
49 |
System.err.println("Number of sequences in alignment is different from number of templates +1 "); |
50 |
// TODO throw exception |
51 |
} |
52 |
if(!al.getSequenceNoGaps(targetTag).equals(this.sequence)) { |
53 |
System.err.println("Target sequence in alignment does not match sequence in target graph"); |
54 |
// TODO throw exception |
55 |
} |
56 |
for (String tag:templateGraphs.keySet()){ |
57 |
if(!al.getSequenceNoGaps(tag).equals(templateGraphs.get(tag).getSequence())) { |
58 |
System.err.println("Sequence of template graph "+tag+" does not match sequence in alignment"); |
59 |
// TODO throw exception |
60 |
} |
61 |
} |
62 |
} |
63 |
|
64 |
/** |
65 |
* Counts the votes for each possible alignment edge and puts all the votes in contactVotes TreeMap |
66 |
* |
67 |
*/ |
68 |
private void countVotes() { |
69 |
|
70 |
contactVotes = new TreeMap<Edge, Integer>(); |
71 |
|
72 |
// we go through all positions in the alignment |
73 |
for (int i=0; i<al.getAlignmentLength(); i++){ |
74 |
for (int j=0; j<al.getAlignmentLength(); j++) { |
75 |
|
76 |
int vote = 0; |
77 |
// scanning all templates to see if they have this contact |
78 |
for (String tag:templateGraphs.keySet()){ |
79 |
Edge thisGraphCont = new Edge(al.al2seq(tag, i),al.al2seq(tag, j)); |
80 |
if (templateGraphs.get(tag).containsContact(thisGraphCont)) { |
81 |
vote++; |
82 |
} |
83 |
} |
84 |
// putting vote in contactVotes TreeMap |
85 |
if (vote>0){ |
86 |
contactVotes.put(new Edge(i,j), vote); |
87 |
} |
88 |
} |
89 |
} |
90 |
} |
91 |
|
92 |
/** |
93 |
* Calculates the consensus graph from the set of template graphs. An edge is contained |
94 |
* in the consensus graph if the fractions of template graphs it is contained in is above |
95 |
* the given threshold. |
96 |
* The output is a new Graph object created from the given sequence in the constructor |
97 |
* to which we add the averaged edges. |
98 |
* @param threshold the threshold above which an edge is taken to be a consensus edge |
99 |
* @return the new graph |
100 |
*/ |
101 |
public Graph doAveraging(double threshold) { |
102 |
|
103 |
Graph graph = new Graph(this.sequence); |
104 |
graph.setContactType(this.contactType); |
105 |
graph.setCutoff(this.distCutoff); |
106 |
|
107 |
// if vote above threshold we take the contact for our target |
108 |
int voteThreshold = (int) Math.ceil((double)numTemplates*threshold); // i.e. round up of 50%, 40% or 30% (depends on threshold given) |
109 |
for (Edge alignCont:contactVotes.keySet()){ |
110 |
if (contactVotes.get(alignCont)>=voteThreshold) { |
111 |
Edge targetGraphCont = new Edge(al.al2seq(targetTag,alignCont.i),al.al2seq(targetTag,alignCont.j)); |
112 |
if (targetGraphCont.i!=-1 && targetGraphCont.j!=-1){ // we can't add contacts that map to gaps!! |
113 |
graph.addEdge(targetGraphCont); |
114 |
} |
115 |
} |
116 |
} |
117 |
return graph; |
118 |
} |
119 |
|
120 |
/** |
121 |
* Returns a graph containing the union of edges of the template graphs weighted by the fraction of occurance in the templates. |
122 |
* The sequence of the graph is initalized to the sequence of the template. |
123 |
* @return |
124 |
*/ |
125 |
public Graph getAverageGraph() { |
126 |
Graph graph = new Graph(this.sequence); |
127 |
graph.setContactType(this.contactType); |
128 |
graph.setCutoff(this.distCutoff); |
129 |
|
130 |
for (Edge alignCont:contactVotes.keySet()){ |
131 |
double weight = 1.0 * contactVotes.get(alignCont) / numTemplates; |
132 |
Edge targetGraphCont = new Edge(al.al2seq(targetTag,alignCont.i),al.al2seq(targetTag,alignCont.j), weight); |
133 |
if (targetGraphCont.i!=-1 && targetGraphCont.j!=-1){ // we can't add contacts that map to gaps!! |
134 |
graph.addEdge(targetGraphCont); |
135 |
} |
136 |
|
137 |
} |
138 |
return graph; |
139 |
} |
140 |
} |