ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/PairwiseAlignmentConverter.java
Revision: 405
Committed: Tue Nov 20 16:37:54 2007 UTC (16 years, 10 months ago) by duarte
File size: 10827 byte(s)
Log Message:
Alignment and PairwiseAlignmentConverter now using Interval, IntervalSet and TreeSet<Integer> to replace Edge, EdgeSet and NodeSet
New class IntervalSet
Line User Rev File contents
1 lpetzo 372 package proteinstructure;
2    
3     import java.util.Iterator;
4     import java.util.TreeMap;
5    
6     /**
7     * Instances of this class construct pairwise alignments based on a matching
8     * between position in the first sequence onto positions in the second
9     * sequence.
10     * <p>
11     * Execute method {@link main(String)} to get an impression of the functionality of this class.
12     *
13     * @author Lars Petzold
14     * @see aglappe.sadp.SADP
15     * */
16     public class PairwiseAlignmentConverter {
17    
18     private static char GAP = '-';
19     private static char UNDEF = 'X';
20     private Alignment ali = null;
21 duarte 405 private Iterator<Interval> it = null;
22 lpetzo 372 private int[] lengths = new int[2];
23     private String[] sequences = new String[2];
24     private String[] tags = new String[2];
25     private int offset = 0;
26    
27     /**
28     * Constructs sequence alignment for the given edge set.
29     * The tags are used as sequence identifiers. The length parameters as the
30     * length of the pseudo-sequence which consists only of 'X' characters
31     * denoting undefined amino acids. Use constructors taking the sequences
32     * itself to avoid pseudo-sequences.
33     * @param it edge iterator
34     * @param len1 length of the first pseudo-sequence (assumed length of the original sequence)
35     * @param len2 length of the second pseudo-sequence (assumed length of the original sequence)
36     * @param tag1 sequence name of the first sequence
37     * @param tag2 sequence name of the second sequence
38     * */
39 duarte 405 public PairwiseAlignmentConverter( Iterator<Interval> it, int len1, int len2, String tag1, String tag2, int fi )
40 lpetzo 385 throws AlignmentConstructionError {
41 lpetzo 372 this(it,len1,len2,null,null,tag1,tag2,fi);
42     }
43    
44     /**
45     * The actual constructor. Each public constructor calls this constructor.
46     * @param it edge iterator
47     * @param len1 length of the first sequence
48     * @param len2 length of the second sequence
49     * @param seq1 first sequence
50     * @param seq2 second sequence
51     * @param tag1 name of the first sequence
52     * @param tag2 name of the second sequence
53     * */
54 duarte 405 private PairwiseAlignmentConverter( Iterator<Interval> it, int len1, int len2, String seq1, String seq2, String tag1, String tag2, int fi)
55 lpetzo 385 throws AlignmentConstructionError {
56 lpetzo 372
57     sequences[0] = seq1;
58     sequences[1] = seq2;
59     tags[0] = tag1;
60     tags[1] = tag2;
61     lengths[0] = len1;
62     lengths[1] = len2;
63     this.it = it;
64     offset = -fi; // flip the sign to achieve a index convertion where 0 is the first index
65    
66     buildAlignment();
67     }
68    
69 duarte 405 public PairwiseAlignmentConverter( Iterator<Interval> it, int len1, String seq2, String tag1, String tag2, int fi)
70 lpetzo 385 throws AlignmentConstructionError {
71 lpetzo 372 this(it,len1,seq2.length(),null,seq2,tag1,tag2,fi);
72     }
73    
74 duarte 405 public PairwiseAlignmentConverter( Iterator<Interval> it, String seq1, int len2, String tag1, String tag2, int fi)
75 lpetzo 385 throws AlignmentConstructionError {
76 lpetzo 372 this(it,seq1.length(),len2,seq1,null,tag1,tag2,fi);
77     }
78    
79 duarte 405 public PairwiseAlignmentConverter( Iterator<Interval> it, String seq1, String seq2, String tag1, String tag2, int fi )
80 lpetzo 385 throws AlignmentConstructionError {
81 lpetzo 372 this(it,seq1.length(),seq2.length(),seq1,seq2,tag1,tag2,fi);
82     }
83    
84     /**
85     * Builds the alignment based on the preset members.
86     */
87 lpetzo 385 private void buildAlignment( ) throws AlignmentConstructionError {
88 lpetzo 372
89     // gapped sequences for the respective contact maps
90     StringBuffer s1 = new StringBuffer(Math.max(lengths[0],lengths[1]));
91     StringBuffer s2 = new StringBuffer(s1.capacity());
92    
93     // sequence positions
94     int prev1 = -1;
95     int prev2 = -1;
96     int cur1 = 0;
97     int cur2 = 0;
98 duarte 405 Interval e = null;
99 lpetzo 372
100     while( it.hasNext() ) {
101    
102     e = it.next();
103 duarte 405 cur1 = e.beg+offset;
104     cur2 = e.end+offset;
105 lpetzo 372
106     // puts columns ("non-trusted" MATCHes and MISMATCHes) between two
107     // consecutive matching edges
108     putColumnsBetweenMatches(prev1+1,cur1-prev1-1,prev2+1,cur2-prev2-1,s1,s2);
109    
110     // add recently found MATCH (cur1,cur2)
111     putMatch(cur1,cur2,s1,s2);
112    
113     // ... and update position counters
114     prev1 = cur1;
115     prev2 = cur2;
116     }
117    
118     // insert trailing GAPs and probably some "non-trusted" MATCHes
119     putColumnsBetweenMatches(cur1+1, lengths[0]-cur1-1, cur2+1, lengths[1]-cur2-1, s1, s2);
120    
121     // compose the name to sequence mapping
122     TreeMap<String, String> name2seq = new TreeMap<String, String>();
123     name2seq.put(tags[0], s1.toString());
124     name2seq.put(tags[1], s2.toString());
125    
126     ali = new Alignment(name2seq);
127     }
128    
129     /**
130     * Retrieves alignment.
131     *
132     * @return pairwise alignment based on the set matching edges as being
133     * passed to any constructor.
134     */
135     public Alignment getAlignment() {
136    
137     return ali;
138     }
139    
140     /**
141     * Puts a character into the given gapped sequence.
142     * <p>
143     * If the actual sequence is not provided an X is inserted instead denoting
144     * a non standard sequence item.
145     *
146     * @param pos index of character in the original sequence to be set
147     * @param s sequence already containing GAP-characters
148     * @param whichOne toggles the sequences, 1 -> first sequence, 2 -> second
149     * sequence
150     */
151     private void putCharacter( int pos, StringBuffer s, int whichOne ) {
152    
153     if( sequences[whichOne-1] != null ) {
154     s.append(sequences[whichOne-1].charAt(pos));
155     } else {
156     s.append(UNDEF);
157     }
158     }
159    
160     /**
161     * In one or both sequences serveral positions might have been skipped for
162     * some reasons. This method therefore applies a greedy strategy to first
163     * introduce "non-trusted" MATCHes and secondly to align the remaining
164     * positions -- in at most one sequence -- to GAPs.
165     */
166     private void putColumnsBetweenMatches( int beg1, int len1, int beg2, int len2, StringBuffer s1, StringBuffer s2 ) {
167    
168     int pos = 0;
169    
170     // we do model preceding gaps first
171     if( len1 > 0 ) {
172     if( len2 > 0 ) {
173     // model non-trusted MATCHes first
174     for( pos = 0; pos < Math.min(len1,len2); ++pos ) {
175     putMatch(beg1+pos,beg2+pos,s1,s2);
176     }
177     // secondly, introduce GAPs in one of the sequences
178     if( len1 > len2 ) {
179     for( ; pos < len1; ++pos ) {
180     putGapInSecond(beg1+pos,s1,s2);
181     }
182     } else {
183     for( ; pos < len2; ++pos ) {
184     putGapInFirst(beg2+pos,s1,s2);
185     }
186     }
187     } else {
188     // introduce GAPS in the second sequence
189     for( pos = 0; pos < len1; ++pos ) {
190     putGapInSecond(beg1+pos,s1,s2);
191     }
192     }
193     } else if (len2 > 0) {
194     // introduce GAPs in the first sequence
195     for( pos = 0; pos < len2; ++pos ) {
196     putGapInFirst(beg2+pos,s1,s2);
197     }
198     }
199     }
200    
201     /**
202     * Puts a GAP in the first sequence and a character in the second sequence.
203     *
204     * @param pos2 character in the second sequence to be set
205     * @param s1 first sequence already containing GAP-characters
206     * @param s2 second sequence already containing GAP-characters
207     */
208     private void putGapInFirst( int pos2, StringBuffer s1, StringBuffer s2 ) {
209    
210     s1.append(GAP);
211     putCharacter(pos2,s2,2);
212     }
213    
214     /**
215     * Puts a character in the first sequence and a GAP in the second sequence.
216     *
217     * @param pos1 index of character in the second sequence to be set
218     * @param s1 first sequence already containing GAP-characters
219     * @param s2 second sequence already containing GAP-characters
220     */
221     private void putGapInSecond( int pos1, StringBuffer s1, StringBuffer s2 ) {
222    
223     putCharacter(pos1,s1,1);
224     s2.append(GAP);
225     }
226    
227     /**
228     * Puts a MATCH which means that pos1'th character is to be inserted in s1 and the pos2'th in s2.
229     *
230     * @param pos1 current position in the first sequence
231     * @param pos2 current position in the second sequence
232     * @param s1 first sequence already containing GAP-characters
233     * @param s2 second sequence already containing GAP-characters
234     */
235     private void putMatch( int pos1, int pos2, StringBuffer s1, StringBuffer s2 ) {
236    
237     putCharacter(pos1,s1,1);
238     putCharacter(pos2,s2,2);
239     }
240    
241     /**
242     * Tests PairwiseAlignmentConverter and gives Examples how to use it.
243     * */
244     public static void main( String args[]) {
245    
246     // MATCHING:
247     // 0 1 2 3 4 5 6 7
248     // - - o - o o o - - o o o o
249     // | | | : | :
250     // o o o o o o o o o o o - -
251     // 0 1 2 3 4 5 6 7 8 9 1
252     // 0
253     //
254     // LEGEND:
255     // '-' -> GAP
256     // 'o' -> position
257     // '|' -> MATCH
258     // ':' -> "non trusted" MATCH (does not have a reference in in the edge set)
259    
260 duarte 405 IntervalSet intervals = new IntervalSet();
261     intervals.add(new Interval(0,2));
262     intervals.add(new Interval(1,4));
263     intervals.add(new Interval(2,5));
264     intervals.add(new Interval(4,9));
265 lpetzo 372
266     String seq1 = "ABCDEFGH";
267     String seq2 = "ABCDEFGHIJK";
268    
269     String tag1 = "seq1";
270     String tag2 = "seq2";
271    
272     PairwiseAlignmentConverter[] pab = new PairwiseAlignmentConverter[4];
273    
274 lpetzo 385 try {
275 duarte 405 pab[0] = new PairwiseAlignmentConverter( intervals.iterator(), seq1, seq2, tag1, tag2, 0 );
276     pab[1] = new PairwiseAlignmentConverter( intervals.iterator(), seq1.length(), seq2, tag1, tag2, 0 );
277     pab[2] = new PairwiseAlignmentConverter( intervals.iterator(), seq1, seq2.length(), tag1, tag2, 0 );
278     pab[3] = new PairwiseAlignmentConverter( intervals.iterator(), seq1.length(), seq2.length(), tag1, tag2, 0 );
279 lpetzo 385 } catch(Exception e) {
280     System.err.println(e.getMessage());
281     System.exit(-1);
282     }
283    
284 lpetzo 372 for( int i=0; i<pab.length; ++i ) {
285    
286     Alignment a = pab[i].getAlignment();
287    
288     System.out.println("\n" + i + "\n``````````````````");
289    
290     for( String seqTag:a.getTags() ) {
291     System.out.println(seqTag + ": " + a.getAlignedSequence(seqTag));
292     }
293     }
294     }
295    
296     }