ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/align_status.cpp
Revision: 236
Committed: Fri May 4 22:20:02 2012 UTC (12 years, 4 months ago) by gpertea
File size: 7979 byte(s)
Log Message:
sync, wip prep_reads

Line File contents
1 /*
2 * align_status.cpp
3 * TopHat
4 *
5 * Created by Ryan Kelley on 11/09/2010.
6 *
7 */
8
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <cassert>
14 #include <cstdlib>
15 #include <iostream>
16 #include <set>
17 #include <vector>
18
19 #include "common.h"
20 #include "bwt_map.h"
21 #include "tokenize.h"
22 #include "reads.h"
23 #include "junctions.h"
24 #include "insertions.h"
25 #include "deletions.h"
26 #include "fusions.h"
27 #include "coverage.h"
28 #include "align_status.h"
29
30 using namespace std;
31
32 AlignStatus::AlignStatus()
33 {
34 _alignment_score = std::numeric_limits<int>::min();
35 }
36
37 /**
38 * Parse the cigar string of a BowtieHit in order to determine the alignment status.
39 */
40 AlignStatus::AlignStatus(const BowtieHit& bh,
41 const JunctionSet& gtf_junctions,
42 const JunctionSet& junctions,
43 const InsertionSet& insertions,
44 const DeletionSet& deletions,
45 const FusionSet& fusions,
46 const Coverage& coverage)
47 {
48 // it seems like we need to test this more
49 // daehwan - it doesn't seem to work.
50 const bool recalculate_indel_score = false;
51
52 const vector<CigarOp>& cigar = bh.cigar();
53 _alignment_score = bh.alignment_score();
54
55 const int read_len = bh.read_len();
56 const int min_extent = min(read_len / 4, 10);
57 bool recalculate_score = !junctions.empty();
58
59 int j = bh.left();
60 int r = 0;
61 RefID ref_id = bh.ref_id();
62 for (size_t c = 0 ; c < cigar.size(); ++c)
63 {
64 int opcode = cigar[c].opcode;
65 int length = cigar[c].length;
66 switch(opcode)
67 {
68 case REF_SKIP:
69 case rEF_SKIP:
70 {
71 Junction junc;
72 junc.refid = bh.ref_id();
73
74 if (opcode == REF_SKIP)
75 {
76 junc.left = j - 1;
77 junc.right = j + length;
78 j += length;
79 }
80 else
81 {
82 junc.right = j + 1;
83 junc.left = j - length;
84 j -= length;
85 }
86
87 if (recalculate_score)
88 {
89 junc.antisense = bh.antisense_splice();
90
91 if (gtf_junctions.find(junc) == gtf_junctions.end())
92 {
93 JunctionSet::const_iterator itr = junctions.find(junc);
94 if (itr == junctions.end())
95 {
96 _alignment_score -= bowtie2_max_penalty;
97 }
98 else
99 {
100 const int left_cov = coverage.get_coverage(ref_id, junc.left + 1);
101 const int right_cov = coverage.get_coverage(ref_id, junc.right - 1);
102 const int avg_cov = (left_cov + right_cov) / 2;
103
104 int penalty = bowtie2_max_penalty;
105 const int supporting_hits = itr->second.supporting_hits;
106 const int left_extent = itr->second.left_extent;
107 const int right_extent = itr->second.right_extent;
108 float extent_penalty = 0.0f;
109 if (left_extent < min_extent || right_extent < min_extent)
110 extent_penalty = 0.5f;
111
112 if (supporting_hits >= 5)
113 penalty *= min((float)avg_cov/supporting_hits + extent_penalty, 1.f);
114
115 int prev_alignment_score = _alignment_score;
116 _alignment_score -= penalty;
117 _alignment_score = min(0, _alignment_score);
118
119 // daehwan - for debugging purposes
120 if (bh.insert_id() == 325708 && false)
121 {
122 fprintf(stderr, "junc(%d:%d-%d) %d / (%d + %d) = %d => %d\n",
123 junc.refid, junc.left, junc.right,
124 itr->second.supporting_hits, left_cov, right_cov,
125 prev_alignment_score, _alignment_score);
126 fprintf(stderr, "\textent: %d-%d\n",
127 left_extent, right_extent);
128 }
129 }
130 }
131 }
132 }
133 break;
134
135 case MATCH:
136 case mATCH:
137 {
138 if (opcode == MATCH)
139 j += length;
140 else
141 j -= length;
142
143 r += length;
144 }
145 break;
146
147 case DEL:
148 case dEL:
149 {
150 Deletion deletion;
151 deletion.refid = bh.ref_id();
152 if (opcode == DEL)
153 {
154 deletion.left = j - 1;
155 deletion.right = j + length;
156 j += length;
157 }
158 else
159 {
160 deletion.right = j + 1;
161 deletion.left = j - length;
162 j -= length;
163 }
164
165 if (recalculate_score && recalculate_indel_score)
166 {
167 DeletionSet::const_iterator itr = deletions.find(deletion);
168 if (itr != deletions.end())
169 {
170 const int left_cov = coverage.get_coverage(ref_id, deletion.left + 1);
171 const int right_cov = (length == 1 ? left_cov : coverage.get_coverage(ref_id, deletion.right - 1));
172 const int avg_cov = (left_cov + right_cov) / 2;
173 const int del_penalty = bowtie2_ref_gap_open + bowtie2_ref_gap_cont * length;
174 int addition = del_penalty;
175
176 const int supporting_hits = itr->second.supporting_hits;
177 const int left_extent = itr->second.left_extent;
178 const int right_extent = itr->second.right_extent;
179 int penalty = 0;
180 if (left_extent < min_extent || right_extent < min_extent)
181 penalty = del_penalty * 0.5f;
182
183 if (avg_cov > 0 && supporting_hits >= 5)
184 addition *= min((float)supporting_hits/avg_cov, 1.f);
185 else
186 addition = 0;
187
188 addition -= penalty;
189 if (addition < 0)
190 addition = 0;
191
192 int prev_alignment_score = _alignment_score;
193 _alignment_score += addition;
194 _alignment_score = min(0, _alignment_score);
195
196 // daehwan - for debug purposes
197 if (bh.insert_id() == 325708 && false)
198 {
199 fprintf(stderr, "del(%d:%d-%d) %d / (%d + %d) = %d => %d\n",
200 deletion.refid, deletion.left, deletion.right,
201 supporting_hits, left_cov, right_cov,
202 prev_alignment_score, _alignment_score);
203 fprintf(stderr, "\textent: %d-%d\n",
204 left_extent, right_extent);
205 }
206 }
207 }
208 }
209 break;
210
211 case INS:
212 case iNS:
213 {
214 if (recalculate_score && recalculate_indel_score)
215 {
216 string seq = bh.seq().substr(r, length);
217 Insertion ins(ref_id, j, seq);
218 InsertionSet::const_iterator itr = insertions.find(ins);
219 if (itr != insertions.end())
220 {
221 const int supporting_hits = itr->second.supporting_hits;
222 const int left_extent = itr->second.left_extent;
223 const int right_extent = itr->second.right_extent;
224
225 const int left_cov = coverage.get_coverage(ref_id, j);
226 const int right_cov = coverage.get_coverage(ref_id, j + (opcode == INS ? 1 : -1));
227 const int avg_cov = (left_cov + right_cov) / 2 - supporting_hits;
228 const int ins_penalty = bowtie2_read_gap_open + bowtie2_read_gap_cont * length;
229 int addition = ins_penalty;
230 int extent_penalty = 0.0f;
231 if (left_extent < min_extent || right_extent < min_extent)
232 extent_penalty = ins_penalty * 0.5f;
233
234 if (avg_cov > 0 && supporting_hits >= 5)
235 addition *= min((float)supporting_hits/avg_cov, 1.f);
236 else
237 addition = 0;
238
239 addition -= extent_penalty;
240 if (addition < 0)
241 addition = 0;
242
243 int prev_alignment_score = _alignment_score;
244 _alignment_score += addition;
245 _alignment_score = min(0, _alignment_score);
246
247 /*
248 fprintf(stderr, "ins(%d:%d:%s) %d / (%d - %d) = %d => %d (%d)\n",
249 ref_id, ins.left, seq.c_str(),
250 supporting_hits, avg_cov, supporting_hits,
251 prev_alignment_score, _alignment_score, ins_penalty);
252 fprintf(stderr, "\textent: %d-%d\n",
253 left_extent, right_extent);
254 */
255 }
256 }
257
258 r += length;
259 }
260 break;
261
262 case FUSION_FF:
263 case FUSION_FR:
264 case FUSION_RF:
265 case FUSION_RR:
266 // daehwan - implement this later
267 j = length;
268 ref_id = bh.ref_id2();
269 break;
270
271 default:
272 break;
273 }
274 }
275 }
276
277 /**
278 * Establish an ordering on alignments.
279 * Prefer aligned reads over unaligned reads
280 * Within the space of aligned reads
281 * prefer splice-free reads over splice reads, and
282 * indel-free reads over indel reads.
283 * If a read can either be indel-free or splice-free,
284 * prefer the indel-free alignment
285 */
286 bool AlignStatus::operator<(const AlignStatus& rhs) const
287 {
288 if (_alignment_score != rhs._alignment_score)
289 return _alignment_score > rhs._alignment_score;
290
291 return false;
292 }
293
294 /**
295 * Alignments are only equal if all fields are identical.
296 */
297 bool AlignStatus::operator==(const AlignStatus& rhs) const
298 {
299 return _alignment_score == rhs._alignment_score;
300 }
301
302 bool AlignStatus::operator!=(const AlignStatus& rhs) const
303 {
304 return _alignment_score != rhs._alignment_score;
305 }