ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/graphAveraging/PhiPsiAverager.java
Revision: 667
Committed: Fri May 23 16:04:48 2008 UTC (16 years, 4 months ago) by duarte
File size: 20070 byte(s)
Log Message:
Fixed (hopefuly) the issue of wrapping angles (at -180,180). Tested minimally and seems fine.
Line File contents
1 package graphAveraging;
2
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.Collections;
6 import java.util.Comparator;
7 import java.util.HashMap;
8 import java.util.LinkedHashMap;
9 import java.util.TreeMap;
10
11 import proteinstructure.Alignment;
12 import proteinstructure.PairwiseSequenceAlignment;
13 import proteinstructure.Pdb;
14 import proteinstructure.Template;
15 import proteinstructure.TemplateList;
16 import proteinstructure.PairwiseSequenceAlignment.PairwiseSequenceAlignmentException;
17 import tools.Goodies;
18 import tools.MySQLConnection;
19
20 public class PhiPsiAverager {
21
22 private Alignment al; // alignment
23 private ArrayList<TemplateWithPhiPsi> templates;
24
25 public PhiPsiAverager(TemplateList templates, Alignment aln) {
26
27 this.al = aln;
28 if (!templates.isPdbDataLoaded()) {
29 throw new IllegalArgumentException("TemplateList passed to PhiPsiAverager constructor must have PDB data loaded");
30 }
31 checkSequences(templates);
32 getPhiPsi(templates);
33
34 }
35
36 /**
37 * Gets the consensus phi/psi angles mapped onto the sequence of the given targetTag
38 * If the targetTag is not in the Alignment an IllegalArgumentException is thrown
39 * @param threshold
40 * @param angleInterval
41 * @param targetTag
42 * @return
43 * @throws IllegalArgumentException if targetTag is not present in this.al
44 */
45 public TreeMap<Integer, ConsensusSquare> getConsensusPhiPsiOnTarget(double threshold, int angleInterval, String targetTag) {
46 if (!al.hasTag(targetTag)) throw new IllegalArgumentException("Given targetTag is not present in alignment");
47 TreeMap<Integer, ConsensusSquare> phiPsiConsensus = this.getConsensusPhiPsi(threshold, angleInterval);
48 TreeMap<Integer, ConsensusSquare> phiPsiConsOnTarget = new TreeMap<Integer, ConsensusSquare>();
49 for (int i:phiPsiConsensus.keySet()) {
50 int resser = al.al2seq(targetTag, i);
51 if (resser!=-1) {
52 phiPsiConsOnTarget.put(resser, phiPsiConsensus.get(i));
53 }
54 }
55 return phiPsiConsOnTarget;
56 }
57
58 /**
59 * Gets the consensus phi/psi angles for each alignment column with phi/psi consensus.
60 * The return is a TreeMap with keys alignment positions, values the ConsensusSquare.
61 * Columns without phi/psi consensus are not present in the TreeMap
62 * @param threshold a value >= 0.5
63 * @param angleInterval
64 * @return
65 * @throws IllegalArgumentException if threshold < 0.5
66 */
67 public TreeMap<Integer, ConsensusSquare> getConsensusPhiPsi(double threshold, int angleInterval) {
68
69 if (threshold < 0.5) {
70 throw new IllegalArgumentException("Threshold for consensus ph/psi must be above or equals to 0.5");
71 }
72
73 // we round DOWN given threshold. So for the 2 cases even/odd and limit case threshold=0.5:
74 // - odd case : 3 templates => voteThreshold=1, then we apply cutoff (see getInterval) with a '>' so that 2 falls within threshold
75 // - even case : 4 templates => voteThreshold=2, then we applu cutoff (see getInterval) with a '>' so that 2 doesn't fall within threshold
76 int voteThreshold = (int) (templates.size()*threshold); // the int casting rounds down
77
78 TreeMap<Integer, ConsensusSquare> bounds = new TreeMap<Integer, ConsensusSquare>();
79
80 // we go through each column i in the alignment and find the consensus per column
81 for (int i=1; i<=al.getAlignmentLength(); i++) {
82 HashMap<Integer,Double> phiAnglesColumnI = new HashMap<Integer, Double>();
83 HashMap<Integer,Double> psiAnglesColumnI = new HashMap<Integer, Double>();
84 for (int j=0;j<templates.size();j++) {
85 TemplateWithPhiPsi template = templates.get(j);
86 int resser = al.al2seq(template.getId(), i);
87
88 phiAnglesColumnI.put(j, Double.NaN);
89 psiAnglesColumnI.put(j, Double.NaN);
90 if (resser!=-1) { // to skip gaps
91 if (template.hasPhiPsiAngles(resser)) { // some columns won't have angle data because of unobserved i, i-1 or i+1 residue. Or for N and C-terminals
92 phiAnglesColumnI.put(j, template.getPhiAngle(resser));
93 psiAnglesColumnI.put(j, template.getPsiAngle(resser));
94 }
95 }
96 }
97
98 ConsensusSquare consIntervalPhPsi = findConsSquare(phiAnglesColumnI, psiAnglesColumnI, voteThreshold, angleInterval);
99
100 if (consIntervalPhPsi!=null) {
101 bounds.put(i, consIntervalPhPsi);
102 }
103
104 }
105 return bounds;
106 }
107
108 /**
109 * Finds the final ConsensusSquare from the list of all candidates
110 * returned from findAllConsSquares
111 * The returning value is the square region of the phi/psi (Ramachandran)
112 * space where the consensus phi/psi angles are.
113 * @param values1stDim
114 * @param values2ndDim
115 * @param voteThreshold
116 * @param angleInterval
117 * @return the ConsensusSquare with the phi/psi consensus intervals or null if no consensus for this column
118 */
119 private ConsensusSquare findConsSquare(HashMap<Integer,Double> values1stDim, HashMap<Integer,Double> values2ndDim, int voteThreshold, int angleInterval) {
120
121 ArrayList<ConsensusSquare> allConsIntervals = findAllConsSquares(values1stDim, values2ndDim, voteThreshold, angleInterval);
122 if (allConsIntervals.isEmpty()) {
123 return null;
124 }
125 // 1st we sort on voteCount descending (max count first)
126 Collections.sort(allConsIntervals, new Comparator<ConsensusSquare>() {
127 public int compare(ConsensusSquare o1, ConsensusSquare o2) {
128 return (new Integer(o2.getVoteCount()).compareTo(o1.getVoteCount()));
129 }
130 });
131 // now we take first (max votes)
132 // we want the solution with the maximum vote count. But in case of overlap there could be multiple solutions with maximum vote count
133 // from within those we can't do much to select one (thery are all equivalent consensus-wise)
134 // We will simply return the first, hopefully this is not totally random (since we use an ArrayList
135 // then they are first ordered as found and when we sort above, hopefully the sort algorithm behaves in the same way for
136 // tie breaks of inputs in the same order. TODO is there something better we can do here? it doesn't seem very important
137 // anyway. Overlap cases should be rare)
138 return allConsIntervals.get(0);
139 }
140
141 /**
142 * Finds all candidates of 2-dimensional consensus (by 2-dimensional we mean
143 * in phi/psi angle space, i.e. in both phi and psi) by taking all candidates
144 * in 1st dimension (phi) and looking if consensus also holds in second
145 * dimension (psi).
146 *
147 * @param values1stDim
148 * @param values2ndDim
149 * @param voteThreshold
150 * @param angleInterval
151 * @return
152 */
153 private ArrayList<ConsensusSquare> findAllConsSquares(HashMap<Integer,Double> values1stDim, HashMap<Integer,Double> values2ndDim, int voteThreshold, int angleInterval) {
154 ArrayList<ConsensusSquare> consInterval2DCandidates = new ArrayList<ConsensusSquare>();
155
156 ArrayList<ConsensusInterval> consIntervals1stDim = findAllConsInterval1stDim(values1stDim, voteThreshold, angleInterval);
157 for (ConsensusInterval consInterv1stDim: consIntervals1stDim) {
158 ConsensusInterval consInterv2ndDim = findConsInterval2ndDim(values2ndDim, consInterv1stDim, angleInterval);
159 if (consInterv2ndDim!=null) {
160 // ok this is a 2D interval candidate, we put it in the ArrayList. There simply shouldn't be duplicates
161 consInterval2DCandidates.add(new ConsensusSquare(consInterv1stDim, consInterv2ndDim));
162 }
163 }
164 return consInterval2DCandidates;
165 }
166
167 /**
168 * Finds all candidates of consensus in the 1st dimension (in phi/psi angle
169 * space, i.e. the phi angles)
170 * This is the core of the algorithm. We sort phi angle values and then put
171 * them together one by one and see if they fit within the angleInterval window
172 * and if the consensus is above the voteThreshold. If both this conditions are
173 * fullfilled then it is considered a candidate and added to the final output ArrayList
174 * @param anglesInColumn
175 * @param voteThreshold
176 * @param angleInterval
177 * @return
178 */
179 private ArrayList<ConsensusInterval> findAllConsInterval1stDim(HashMap<Integer,Double> anglesInColumn, int voteThreshold, int angleInterval) {
180
181 ArrayList<ConsensusInterval> allConsInterv1stDim = new ArrayList<ConsensusInterval>();
182
183 // anglesInColumn: keys index j corresponding to templates order, values angles unsorted
184 // we want the angles sorted so that we can then find intervals easily, but at the same time we
185 // want to keep the j indices. This is exactly what Goodies.sortMapByValue does
186 LinkedHashMap<Integer, Double> anglesSorted = Goodies.sortMapByValue(anglesInColumn, Goodies.ASCENDING);
187 ArrayList<Double> values = new ArrayList<Double>(anglesSorted.values());
188 ArrayList<Integer> jIndices = new ArrayList<Integer>(anglesSorted.keySet());
189 extendLists(values, jIndices, angleInterval);
190 for (int startIndex=0;startIndex<values.size()-1;startIndex++) {
191 ConsensusInterval consInterv = new ConsensusInterval();
192 consInterv.addVoter(jIndices.get(startIndex), templates.get(jIndices.get(startIndex)).getId(), ConsensusInterval.unwrapAngle(values.get(startIndex)));
193 for (int endIndex=startIndex+1;endIndex<values.size();endIndex++) {
194 if ((values.get(endIndex) - values.get(startIndex))<=angleInterval) {
195 consInterv.addVoter(jIndices.get(endIndex), templates.get(jIndices.get(endIndex)).getId(), ConsensusInterval.unwrapAngle(values.get(endIndex)));
196 }
197 }
198 if (consInterv.getVoteCount()>voteThreshold) { // note we use strictly bigger, see comment in getConsensusPhiPsi
199 consInterv.reCenterInterval(angleInterval);
200 allConsInterv1stDim.add(consInterv);
201 }
202 }
203 return allConsInterv1stDim;
204 }
205
206 /**
207 * Gets the ConsensusInterval for the given list of 2nd dimension values (psi angles) for a
208 * column in the alignment and the given consInterva1stDim (the phi consensus interval).
209 * We get each of the 2nd dimension (psi) values corresponding to each 1st dimension (phi)
210 * voted template, if they are within a angleInterval window then they are taken as our
211 * ConsensusInterval (recentering first the interval at max-min/2)
212 *
213 * @param anglesInColumn
214 * @param consInterv1stDim
215 * @param angleInterval
216 * @return the ConsensusInterval or null if the psi values don't fit within an angleInterval window
217 */
218 private ConsensusInterval findConsInterval2ndDim(HashMap<Integer,Double> anglesInColumn, ConsensusInterval consInterv1stDim, int angleInterval) {
219
220 // we use a values2ndDim ArrayList to put the 2nd dimension (psi) values corresponding to the given 1st dimension (phi) voters
221 ArrayList<Double> values2ndDim = new ArrayList<Double>();
222 for (int voterIndex:consInterv1stDim.getVotersIndices()) {
223 values2ndDim.add(anglesInColumn.get(voterIndex));
224 }
225 // now values2ndDim contains one value for each of the 1st dimension voters. We want to see if they fall within the angleInterval cutoff
226
227 // to find whether all the angles fall within the given angleInterval we have to remember
228 // that the angles are in a circle and thus is not as easy as take max-min. We use a brute force
229 // approach: take our angleDistance function and then measure all pairwise distances and take the maximum
230 double maxDistance = -1;
231 if (values2ndDim.contains(Double.NaN)) {
232 maxDistance = Double.NaN;
233 } else {
234 for (int i=0; i<values2ndDim.size();i++) {
235 for (int j=i+1;j<values2ndDim.size();j++) {
236 double dist = ConsensusInterval.angleDistance(values2ndDim.get(i),values2ndDim.get(j));
237 if (dist>maxDistance) {
238 maxDistance = ConsensusInterval.angleDistance(values2ndDim.get(i), values2ndDim.get(j));
239 }
240 }
241 }
242 }
243 // maxDistance==-1 shouldn't happen at all but just in case!
244 if (maxDistance!=-1 && maxDistance<=angleInterval) {
245 // first we initialise with a dummy 0,0 interval, then we use the recenter method to get the interval
246 // ATTENTION! we pass references of voters and voterIndices from consInterv1stDim: that means that the 2 members of
247 // the final ConsensusSquare object will be referencing the same objects
248 ConsensusInterval consInterv2ndDim =
249 new ConsensusInterval(0, 0,
250 consInterv1stDim.getVoters(),
251 values2ndDim,
252 consInterv1stDim.getVotersIndices(),
253 consInterv1stDim.getVoteCount());
254 consInterv2ndDim.reCenterInterval(angleInterval);
255 return consInterv2ndDim;
256 }
257 return null;
258 }
259
260 /**
261 * Internal class to hold the Template together with its phi/psi angles.
262 */
263 private class TemplateWithPhiPsi extends Template {
264 private TreeMap<Integer,double[]> phipsiAngles;
265 public TemplateWithPhiPsi (String id, Pdb pdb, TreeMap<Integer,double[]> phipsiAngles) {
266 super(id);
267 this.phipsiAngles = phipsiAngles;
268 this.setPdb(pdb);
269 }
270
271 public double getPhiAngle(int resser) {
272 return phipsiAngles.get(resser)[0];
273 }
274 public double getPsiAngle(int resser) {
275 return phipsiAngles.get(resser)[1];
276 }
277 public boolean hasPhiPsiAngles(int resser) {
278 return phipsiAngles.containsKey(resser);
279 }
280 }
281
282 private void extendLists (ArrayList<Double> values, ArrayList<Integer> jIndices, int angleInterval) {
283
284 // graphically what we are doing is extending the list in this way: (w= angleInterval)
285 // |-------'-----------------------------------|-------|
286 // -180 180
287 // <---w---> <---w--->
288 // \ ^
289 // \ /
290 // ------------------------------------------
291
292 ArrayList<Double> valuesToAdd = new ArrayList<Double>();
293 for (int i=0; i< values.size();i++) { // values is sorted, thus when we add to valuesToAdd new values are sorted too
294 if (values.get(i)< -180+angleInterval ) {
295 valuesToAdd.add(ConsensusInterval.wrapAngle(values.get(i)));
296 jIndices.add(jIndices.get(i));
297 }
298 }
299 for (double value:valuesToAdd) {
300 values.add(value);
301 }
302
303 }
304
305 /**
306 * Gets the phi/psi angles for all templates putting them into this.templates.
307 * Checks first that all template have PDB data removing the ones that don't
308 */
309 private void getPhiPsi(TemplateList templates) {
310 this.templates = new ArrayList<TemplateWithPhiPsi>();
311 for (Template template: templates) {
312 if (template.hasPdbData()) {
313 this.templates.add(new TemplateWithPhiPsi(template.getId(), template.getPdb(), template.getPdb().getAllPhiPsi()));
314 }
315 }
316 if (this.templates.size()<templates.size()) {
317 System.out.println("Using only "+this.templates.size()+" templates for psi/phi averaging, out of given "+templates.size());
318 }
319 }
320
321 /**
322 * Checks that tags and sequences are consistent between this.al and this.templates
323 *
324 */
325 private void checkSequences(TemplateList templates){
326 for (Template template :templates){
327 if (!al.hasTag(template.getId())){
328 System.err.println("Alignment is missing template sequence "+template.getId()+", check the FASTA tags");
329 // TODO throw exception
330 }
331 }
332 //if (templates.size()!=al.getNumberOfSequences()-1){
333 // System.err.println("Number of sequences in alignment is different from number of templates +1 ");
334 // // TODO throw exception
335 //}
336
337 for (Template template:templates){
338 if(!al.getSequenceNoGaps(template.getId()).equals(template.getPdb().getSequence())) {
339 System.err.println("Sequence of template (from pdbase) "+template.getId()+" does not match sequence in alignment");
340 System.err.println("Trying to align sequences of alignment vs pdbase sequence: ");
341 try {
342 PairwiseSequenceAlignment alCheck = new PairwiseSequenceAlignment(template.getPdb().getSequence(),al.getSequenceNoGaps(template.getId()),"pdbase","alignment");
343 alCheck.printAlignment();
344 } catch (PairwiseSequenceAlignmentException e) {
345 System.err.println("Error while creating alignment check, can't display an alignment. The 2 sequences are: ");
346 System.err.println("graph: "+template.getPdb().getSequence());
347 System.err.println("alignment: "+al.getSequenceNoGaps(template.getId()));
348 }
349 // TODO throw exception
350 }
351 }
352 }
353
354 /**
355 * To test the class
356 * @param args
357 */
358 public static void main(String[] args) throws Exception {
359 MySQLConnection conn = new MySQLConnection("white","duarte","nieve");
360
361 File templatesFile = new File("/scratch/local/phipsi/T0332.templates");
362 File alnFile = new File("/scratch/local/phipsi/T0332.target2templ.fasta");
363 File psipredFile = new File("/scratch/local/phipsi/T0332.horiz");
364 //File templatesFile = new File("/project/StruPPi/jose/casp/test_phipsi/T0290.templates");
365 //File alnFile = new File("/project/StruPPi/jose/casp/test_phipsi/T0290.target2templ.fasta");
366 //File psipredFile = new File("/project/StruPPi/jose/casp/test_phipsi/T0290.horiz");
367 String targetTag = "T0332";
368
369
370 String pdbaseDb = "pdbase";
371 TemplateList templates = new TemplateList(templatesFile);
372 //Template temp1 = new Template("2f8aA");
373 //Template temp2 = new Template("1gp1A");
374 //Template temp3 = new Template("2gs3A");
375
376 //TemplateList templates = new TemplateList();
377 //templates.add(temp1);
378 //templates.add(temp2);
379 //templates.add(temp3);
380 templates.loadPdbData(conn, pdbaseDb);
381
382 Alignment aln = new Alignment(alnFile.getAbsolutePath(),"FASTA");
383 System.out.println("Secondary structure matching: ");
384 aln.writeSecStructMatching(System.out, targetTag, psipredFile , conn, pdbaseDb, "/project/StruPPi/bin/dssp");
385 System.out.println();
386 System.out.println("phi/psi angles of each sequence in alignment. Legend: line1 aln positions, line2 sequence positions, line3 phi, line4 psi");
387 PhiPsiAverager ppAvrg = new PhiPsiAverager(templates,aln);
388
389 TreeMap<Integer, ConsensusSquare> phipsibounds = ppAvrg.getConsensusPhiPsi(0.5, 20);
390
391 // printing phi/psi angles in each of the templates
392 for (TemplateWithPhiPsi template:ppAvrg.templates) {
393 TreeMap<Integer,double[]> phipsi = template.phipsiAngles;
394 //String sequence = template.getPdb().getSequence();
395 System.out.println(template.getId());
396 // printing alignment positions
397 for (int i=1; i<=aln.getAlignmentLength(); i++) {
398 System.out.printf("%5d",i);
399 }
400 System.out.println();
401 // printing residue serials
402 for (int i=1; i<=aln.getAlignmentLength(); i++) {
403 int resser = aln.al2seq(template.getId(), i);
404 if (resser!=-1 && template.hasPhiPsiAngles(resser))
405 System.out.printf("%5s",resser);
406 else System.out.printf("%5s","");
407 }
408 System.out.println();
409 // printing phis
410 for (int i=1; i<=aln.getAlignmentLength(); i++) {
411 int resser = aln.al2seq(template.getId(), i);
412 if (resser!=-1 && template.hasPhiPsiAngles(resser))
413 System.out.printf("%5.0f", phipsi.get(resser)[0]);
414 else System.out.printf("%5s","");
415 }
416 System.out.println();
417 // printing psis
418 for (int i=1; i<=aln.getAlignmentLength(); i++) {
419 int resser = aln.al2seq(template.getId(), i);
420 if (resser!=-1 && template.hasPhiPsiAngles(resser))
421 System.out.printf("%5.0f", phipsi.get(resser)[1]);
422 else System.out.printf("%5s","");
423 }
424 System.out.println();
425 }
426
427 // printing consensus intervals
428 System.out.println("Alignment positions that have phi/psi consensus.");
429 System.out.println("Legend: col1= aln pos, columns before #: phi interval begin, phi interval end, phi values of members. Same for columns after # but for psi");
430 for (int alnPos: phipsibounds.keySet()) {
431 System.out.printf("%3d %4d %4d ",
432 alnPos,
433 phipsibounds.get(alnPos).getConsInterval1stDim().beg,
434 phipsibounds.get(alnPos).getConsInterval1stDim().end);
435 for (double val:phipsibounds.get(alnPos).getConsInterval1stDim().getValues()) {
436 System.out.printf("%4.0f ",val);
437 }
438 System.out.printf("# %4d %4d ",
439 phipsibounds.get(alnPos).getConsInterval2ndDim().beg,
440 phipsibounds.get(alnPos).getConsInterval2ndDim().end);
441 for (double val:phipsibounds.get(alnPos).getConsInterval2ndDim().getValues()) {
442 System.out.printf("%4.0f ",val);
443 }
444 if (phipsibounds.get(alnPos).getConsInterval1stDim().beg>phipsibounds.get(alnPos).getConsInterval1stDim().end) {
445 System.out.print("Wrapping on phi");
446 }
447 if (phipsibounds.get(alnPos).getConsInterval2ndDim().beg>phipsibounds.get(alnPos).getConsInterval2ndDim().end) {
448 System.out.print("Wrapping on psi");
449 }
450 System.out.println();
451 }
452
453 }
454 }