ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/branches/aglappe-jung/proteinstructure/PairwiseAlignmentConverter.java
Revision: 426
Committed: Fri Nov 23 18:04:15 2007 UTC (16 years, 11 months ago) by duarte
File size: 9591 byte(s)
Log Message:
Now PairWiseAlignmentConverter and PairwiseAlignmentGraphConverter work in the new JUNG framework.
New class IntPairComparator
Now sorting output in RIGraph.write_graph_to_file
Line File contents
1 package proteinstructure;
2
3 import java.util.Iterator;
4 import java.util.TreeMap;
5 import java.util.TreeSet;
6
7 import edu.uci.ics.jung.graph.util.Pair;
8
9 /**
10 * Instances of this class construct pairwise alignments based on a matching
11 * between position in the first sequence onto positions in the second
12 * sequence.
13 * <p>
14 * Execute method {@link main(String)} to get an impression of the functionality of this class.
15 *
16 * @author Lars Petzold
17 * @see aglappe.sadp.SADP
18 * */
19 public class PairwiseAlignmentConverter {
20
21 private static char GAP = '-';
22 private static char UNDEF = 'X';
23 private Alignment ali = null;
24 private Iterator<Pair<Integer>> it = null;
25 private int[] lengths = new int[2];
26 private String[] sequences = new String[2];
27 private String[] tags = new String[2];
28 private int offset = 0;
29
30 /**
31 * Constructs sequence alignment for the given edge set.
32 * The tags are used as sequence identifiers. The length parameters as the
33 * length of the pseudo-sequence which consists only of 'X' characters
34 * denoting undefined amino acids. Use constructors taking the sequences
35 * itself to avoid pseudo-sequences.
36 * @param it edge iterator
37 * @param len1 length of the first pseudo-sequence (assumed length of the original sequence)
38 * @param len2 length of the second pseudo-sequence (assumed length of the original sequence)
39 * @param tag1 sequence name of the first sequence
40 * @param tag2 sequence name of the second sequence
41 * */
42 public PairwiseAlignmentConverter( Iterator<Pair<Integer>> it, int len1, int len2, String tag1, String tag2, int fi )
43 throws AlignmentConstructionError {
44 this(it,len1,len2,null,null,tag1,tag2,fi);
45 }
46
47 /**
48 * The actual constructor. Each public constructor calls this constructor.
49 * @param it edge iterator
50 * @param len1 length of the first sequence
51 * @param len2 length of the second sequence
52 * @param seq1 first sequence
53 * @param seq2 second sequence
54 * @param tag1 name of the first sequence
55 * @param tag2 name of the second sequence
56 * */
57 private PairwiseAlignmentConverter( Iterator<Pair<Integer>> it, int len1, int len2, String seq1, String seq2, String tag1, String tag2, int fi)
58 throws AlignmentConstructionError {
59
60 sequences[0] = seq1;
61 sequences[1] = seq2;
62 tags[0] = tag1;
63 tags[1] = tag2;
64 lengths[0] = len1;
65 lengths[1] = len2;
66 this.it = it;
67 offset = -fi; // flip the sign to achieve a index convertion where 0 is the first index
68
69 buildAlignment();
70 }
71
72 public PairwiseAlignmentConverter( Iterator<Pair<Integer>> it, int len1, String seq2, String tag1, String tag2, int fi)
73 throws AlignmentConstructionError {
74 this(it,len1,seq2.length(),null,seq2,tag1,tag2,fi);
75 }
76
77 public PairwiseAlignmentConverter( Iterator<Pair<Integer>> it, String seq1, int len2, String tag1, String tag2, int fi)
78 throws AlignmentConstructionError {
79 this(it,seq1.length(),len2,seq1,null,tag1,tag2,fi);
80 }
81
82 public PairwiseAlignmentConverter( Iterator<Pair<Integer>> it, String seq1, String seq2, String tag1, String tag2, int fi )
83 throws AlignmentConstructionError {
84 this(it,seq1.length(),seq2.length(),seq1,seq2,tag1,tag2,fi);
85 }
86
87 /**
88 * Builds the alignment based on the preset members.
89 */
90 private void buildAlignment( ) throws AlignmentConstructionError {
91
92 // gapped sequences for the respective contact maps
93 StringBuffer s1 = new StringBuffer(Math.max(lengths[0],lengths[1]));
94 StringBuffer s2 = new StringBuffer(s1.capacity());
95
96 // sequence positions
97 int prev1 = -1;
98 int prev2 = -1;
99 int cur1 = 0;
100 int cur2 = 0;
101 Pair<Integer> e = null;
102
103 while( it.hasNext() ) {
104
105 e = it.next();
106 cur1 = e.getFirst()+offset;
107 cur2 = e.getSecond()+offset;
108
109 // puts columns ("non-trusted" MATCHes and MISMATCHes) between two
110 // consecutive matching edges
111 putColumnsBetweenMatches(prev1+1,cur1-prev1-1,prev2+1,cur2-prev2-1,s1,s2);
112
113 // add recently found MATCH (cur1,cur2)
114 putMatch(cur1,cur2,s1,s2);
115
116 // ... and update position counters
117 prev1 = cur1;
118 prev2 = cur2;
119 }
120
121 // insert trailing GAPs and probably some "non-trusted" MATCHes
122 putColumnsBetweenMatches(cur1+1, lengths[0]-cur1-1, cur2+1, lengths[1]-cur2-1, s1, s2);
123
124 // compose the name to sequence mapping
125 TreeMap<String, String> name2seq = new TreeMap<String, String>();
126 name2seq.put(tags[0], s1.toString());
127 name2seq.put(tags[1], s2.toString());
128
129 ali = new Alignment(name2seq);
130 }
131
132 /**
133 * Retrieves alignment.
134 *
135 * @return pairwise alignment based on the set matching edges as being
136 * passed to any constructor.
137 */
138 public Alignment getAlignment() {
139
140 return ali;
141 }
142
143 /**
144 * Puts a character into the given gapped sequence.
145 * <p>
146 * If the actual sequence is not provided an X is inserted instead denoting
147 * a non standard sequence item.
148 *
149 * @param pos index of character in the original sequence to be set
150 * @param s sequence already containing GAP-characters
151 * @param whichOne toggles the sequences, 1 -> first sequence, 2 -> second
152 * sequence
153 */
154 private void putCharacter( int pos, StringBuffer s, int whichOne ) {
155
156 if( sequences[whichOne-1] != null ) {
157 s.append(sequences[whichOne-1].charAt(pos));
158 } else {
159 s.append(UNDEF);
160 }
161 }
162
163 /**
164 * In one or both sequences serveral positions might have been skipped for
165 * some reasons. This method therefore applies a greedy strategy to first
166 * introduce "non-trusted" MATCHes and secondly to align the remaining
167 * positions -- in at most one sequence -- to GAPs.
168 */
169 private void putColumnsBetweenMatches( int beg1, int len1, int beg2, int len2, StringBuffer s1, StringBuffer s2 ) {
170
171 int pos = 0;
172
173 // we do model preceding gaps first
174 if( len1 > 0 ) {
175 if( len2 > 0 ) {
176 // model non-trusted MATCHes first
177 for( pos = 0; pos < Math.min(len1,len2); ++pos ) {
178 putMatch(beg1+pos,beg2+pos,s1,s2);
179 }
180 // secondly, introduce GAPs in one of the sequences
181 if( len1 > len2 ) {
182 for( ; pos < len1; ++pos ) {
183 putGapInSecond(beg1+pos,s1,s2);
184 }
185 } else {
186 for( ; pos < len2; ++pos ) {
187 putGapInFirst(beg2+pos,s1,s2);
188 }
189 }
190 } else {
191 // introduce GAPS in the second sequence
192 for( pos = 0; pos < len1; ++pos ) {
193 putGapInSecond(beg1+pos,s1,s2);
194 }
195 }
196 } else if (len2 > 0) {
197 // introduce GAPs in the first sequence
198 for( pos = 0; pos < len2; ++pos ) {
199 putGapInFirst(beg2+pos,s1,s2);
200 }
201 }
202 }
203
204 /**
205 * Puts a GAP in the first sequence and a character in the second sequence.
206 *
207 * @param pos2 character in the second sequence to be set
208 * @param s1 first sequence already containing GAP-characters
209 * @param s2 second sequence already containing GAP-characters
210 */
211 private void putGapInFirst( int pos2, StringBuffer s1, StringBuffer s2 ) {
212
213 s1.append(GAP);
214 putCharacter(pos2,s2,2);
215 }
216
217 /**
218 * Puts a character in the first sequence and a GAP in the second sequence.
219 *
220 * @param pos1 index of character in the second sequence to be set
221 * @param s1 first sequence already containing GAP-characters
222 * @param s2 second sequence already containing GAP-characters
223 */
224 private void putGapInSecond( int pos1, StringBuffer s1, StringBuffer s2 ) {
225
226 putCharacter(pos1,s1,1);
227 s2.append(GAP);
228 }
229
230 /**
231 * Puts a MATCH which means that pos1'th character is to be inserted in s1 and the pos2'th in s2.
232 *
233 * @param pos1 current position in the first sequence
234 * @param pos2 current position in the second sequence
235 * @param s1 first sequence already containing GAP-characters
236 * @param s2 second sequence already containing GAP-characters
237 */
238 private void putMatch( int pos1, int pos2, StringBuffer s1, StringBuffer s2 ) {
239
240 putCharacter(pos1,s1,1);
241 putCharacter(pos2,s2,2);
242 }
243
244 /**
245 * Tests PairwiseAlignmentConverter and gives Examples how to use it.
246 * */
247 public static void main( String args[]) {
248
249 // MATCHING:
250 // 0 1 2 3 4 5 6 7
251 // - - o - o o o - - o o o o
252 // | | | : | :
253 // o o o o o o o o o o o - -
254 // 0 1 2 3 4 5 6 7 8 9 1
255 // 0
256 //
257 // LEGEND:
258 // '-' -> GAP
259 // 'o' -> position
260 // '|' -> MATCH
261 // ':' -> "non trusted" MATCH (does not have a reference in in the edge set)
262
263 TreeSet<Pair<Integer>> edges = new TreeSet<Pair<Integer>>(new IntPairComparator());
264 edges.add(new Pair<Integer>(0,2));
265 edges.add(new Pair<Integer>(1,4));
266 edges.add(new Pair<Integer>(2,5));
267 edges.add(new Pair<Integer>(4,9));
268
269 String seq1 = "ABCDEFGH";
270 String seq2 = "ABCDEFGHIJK";
271
272 String tag1 = "seq1";
273 String tag2 = "seq2";
274
275 PairwiseAlignmentConverter[] pab = new PairwiseAlignmentConverter[4];
276
277 try {
278 pab[0] = new PairwiseAlignmentConverter( edges.iterator(), seq1, seq2, tag1, tag2, 0 );
279 pab[1] = new PairwiseAlignmentConverter( edges.iterator(), seq1.length(), seq2, tag1, tag2, 0 );
280 pab[2] = new PairwiseAlignmentConverter( edges.iterator(), seq1, seq2.length(), tag1, tag2, 0 );
281 pab[3] = new PairwiseAlignmentConverter( edges.iterator(), seq1.length(), seq2.length(), tag1, tag2, 0 );
282 } catch(Exception e) {
283 System.err.println(e.getMessage());
284 System.exit(-1);
285 }
286
287 for( int i=0; i<pab.length; ++i ) {
288
289 Alignment a = pab[i].getAlignment();
290
291 System.out.println("\n" + i + "\n``````````````````");
292
293 for( String seqTag:a.getTags() ) {
294 System.out.println(seqTag + ": " + a.getAlignedSequence(seqTag));
295 }
296 }
297 }
298
299 }