1 |
duarte |
329 |
package proteinstructure; |
2 |
|
|
|
3 |
|
|
import java.util.TreeMap; |
4 |
|
|
|
5 |
|
|
public class GraphAverager { |
6 |
|
|
|
7 |
|
|
private Alignment al; |
8 |
|
|
private TreeMap<String,Graph> templateGraphs; |
9 |
|
|
private String targetTag; |
10 |
duarte |
331 |
private int numTemplates; |
11 |
stehr |
337 |
private String sequence; |
12 |
duarte |
329 |
|
13 |
duarte |
331 |
private TreeMap<Edge,Integer> contactVotes; |
14 |
|
|
|
15 |
stehr |
337 |
public GraphAverager(String sequence, Alignment al, TreeMap<String,Graph> templateGraphs, String targetTag) { |
16 |
duarte |
329 |
this.al = al; |
17 |
|
|
this.templateGraphs = templateGraphs; |
18 |
|
|
this.targetTag = targetTag; |
19 |
stehr |
337 |
this.sequence = sequence; |
20 |
duarte |
331 |
|
21 |
|
|
this.numTemplates = templateGraphs.size(); |
22 |
duarte |
329 |
checkSequences(); |
23 |
|
|
|
24 |
duarte |
331 |
countVotes(); // does the averaging by counting the votes and putting them into contactVotes |
25 |
|
|
|
26 |
duarte |
329 |
} |
27 |
|
|
|
28 |
duarte |
331 |
/** |
29 |
|
|
* Checks that tags and sequences are consistent between this.al and this.templateGraphs and between this.al and this.graph/this.targetTag |
30 |
|
|
* |
31 |
|
|
*/ |
32 |
duarte |
329 |
private void checkSequences(){ |
33 |
|
|
if (!al.hasTag(targetTag)){ |
34 |
|
|
System.err.println("Alignment doesn't seem to contain the target sequence, check the FASTA tags"); |
35 |
|
|
//TODO throw exception |
36 |
|
|
} |
37 |
|
|
for (String tag:templateGraphs.keySet()){ |
38 |
|
|
if (!al.hasTag(tag)){ |
39 |
|
|
System.err.println("Alignment is missing template sequence "+tag+", check the FASTA tags"); |
40 |
|
|
// TODO throw exception |
41 |
|
|
} |
42 |
|
|
} |
43 |
|
|
if (templateGraphs.size()!=al.getNumberOfSequences()-1){ |
44 |
|
|
System.err.println("Number of sequences in alignment is different from number of templates +1 "); |
45 |
|
|
// TODO throw exception |
46 |
|
|
} |
47 |
stehr |
337 |
if(!al.getSequenceNoGaps(targetTag).equals(this.sequence)) { |
48 |
duarte |
329 |
System.err.println("Target sequence in alignment does not match sequence in target graph"); |
49 |
|
|
// TODO throw exception |
50 |
|
|
} |
51 |
|
|
for (String tag:templateGraphs.keySet()){ |
52 |
|
|
if(!al.getSequenceNoGaps(tag).equals(templateGraphs.get(tag).getSequence())) { |
53 |
|
|
System.err.println("Sequence of template graph "+tag+" does not match sequence in alignment"); |
54 |
|
|
// TODO throw exception |
55 |
|
|
} |
56 |
|
|
} |
57 |
|
|
} |
58 |
|
|
|
59 |
duarte |
331 |
/** |
60 |
|
|
* Counts the votes for each possible alignment edge and puts all the votes in contactVotes TreeMap |
61 |
|
|
* |
62 |
|
|
*/ |
63 |
|
|
private void countVotes() { |
64 |
duarte |
329 |
|
65 |
duarte |
331 |
contactVotes = new TreeMap<Edge, Integer>(); |
66 |
duarte |
329 |
|
67 |
duarte |
331 |
// we go through all positions in the alignment |
68 |
duarte |
329 |
for (int i=0; i<al.getAlignmentLength(); i++){ |
69 |
|
|
for (int j=0; j<al.getAlignmentLength(); j++) { |
70 |
|
|
|
71 |
|
|
int vote = 0; |
72 |
duarte |
331 |
// scanning all templates to see if they have this contact |
73 |
duarte |
329 |
for (String tag:templateGraphs.keySet()){ |
74 |
|
|
Edge thisGraphCont = new Edge(al.al2seq(tag, i),al.al2seq(tag, j)); |
75 |
|
|
if (templateGraphs.get(tag).containsContact(thisGraphCont)) { |
76 |
|
|
vote++; |
77 |
|
|
} |
78 |
|
|
} |
79 |
|
|
// putting vote in contactVotes TreeMap |
80 |
|
|
if (vote>0){ |
81 |
|
|
contactVotes.put(new Edge(i,j), vote); |
82 |
|
|
} |
83 |
|
|
} |
84 |
duarte |
331 |
} |
85 |
|
|
} |
86 |
|
|
|
87 |
|
|
/** |
88 |
stehr |
334 |
* Calculates the consensus graph from the set of template graphs. An edge is contained |
89 |
|
|
* in the consensus graph if the fractions of template graphs it is contained in is above |
90 |
|
|
* the given threshold. The resulting consensus edges are added to the output graph passed |
91 |
|
|
* to the construtor on creation. Note that access to this graph is by reference, so the |
92 |
|
|
* original graph is modified. |
93 |
|
|
* @param threshold the threshold above which an edge is taken to be a consensus edge |
94 |
duarte |
331 |
*/ |
95 |
stehr |
337 |
public Graph doAveraging(double threshold) { |
96 |
|
|
|
97 |
|
|
Graph graph = new Graph(this.sequence); |
98 |
|
|
|
99 |
duarte |
329 |
// if vote above threshold we take the contact for our target |
100 |
|
|
int voteThreshold = (int) Math.ceil((double)numTemplates*threshold); // i.e. round up of 50%, 40% or 30% (depends on threshold given) |
101 |
|
|
for (Edge alignCont:contactVotes.keySet()){ |
102 |
|
|
if (contactVotes.get(alignCont)>=voteThreshold) { |
103 |
|
|
Edge targetGraphCont = new Edge(al.al2seq(targetTag,alignCont.i),al.al2seq(targetTag,alignCont.j)); |
104 |
|
|
if (targetGraphCont.i!=-1 && targetGraphCont.j!=-1){ // we can't add contacts that map to gaps!! |
105 |
|
|
graph.addEdge(targetGraphCont); |
106 |
|
|
} |
107 |
|
|
} |
108 |
|
|
} |
109 |
stehr |
337 |
return graph; |
110 |
duarte |
329 |
|
111 |
|
|
} |
112 |
|
|
|
113 |
|
|
} |