ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/inserts.h
Revision: 245
Committed: Wed May 16 21:51:01 2012 UTC (12 years, 3 months ago) by gpertea
File size: 7167 byte(s)
Log Message:
merge-sync attempt

Line File contents
1 #ifndef INSERTS_H
2 #define INSERTS_H
3 /*
4 * inserts.h
5 * TopHat
6 *
7 * Created by Cole Trapnell on 1/14/09.
8 * Copyright 2009 Cole Trapnell. All rights reserved.
9 *
10 */
11
12 #include "bwt_map.h"
13 #include "junctions.h"
14
15 struct InsertAlignment
16 {
17 InsertAlignment(uint64_t _refid,
18 BowtieHit* _left_alignment,
19 BowtieHit* _right_alignment) :
20 refid(_refid),
21 left_alignment(_left_alignment),
22 right_alignment(_right_alignment) {}
23
24 uint64_t refid;
25 BowtieHit* left_alignment;
26 BowtieHit* right_alignment;
27 };
28
29 pair<int, int> pair_distances(const BowtieHit& h1, const BowtieHit& h2);
30
31 bool gap_lt(const pair<int, int>& lhs, const pair<int, int>& rhs);
32
33 struct InsertAlignmentGrade
34 {
35 InsertAlignmentGrade() :
36 too_close(false),
37 too_far(false),
38 num_spliced(0),
39 num_mapped(0),
40 opposite_strands(false),
41 consistent_splices(false),
42 longest_ref_skip(0x7FFFFu),
43 edit_dist(0x1F),
44 fusion(true),
45 inner_dist(99999999),
46 alignment_score(std::numeric_limits<int>::min())
47 {}
48
49 InsertAlignmentGrade(const BowtieHit& h1, bool fusion = false) :
50 too_close(false),
51 too_far(false),
52 num_spliced(0),
53 num_mapped(0),
54 opposite_strands(false),
55 consistent_splices(false),
56 edit_dist(0x1F),
57 fusion(fusion),
58 num_alignments(0),
59 inner_dist(99999999),
60 alignment_score(std::numeric_limits<int>::min())
61 {
62 if (!h1.contiguous())
63 num_spliced++;
64
65 num_mapped = 1;
66
67 longest_ref_skip = min(0x3FFFFu, (unsigned int)get_longest_ref_skip(h1) / 100);
68 edit_dist = h1.edit_dist();
69 num_alignments = 1;
70 }
71
72 InsertAlignmentGrade(const BowtieHit& h1,
73 const BowtieHit& h2,
74 const JunctionSet& junctions,
75 bool fusion = false) :
76 too_close(false),
77 too_far(false),
78 num_spliced(0),
79 num_mapped(0),
80 opposite_strands(false),
81 consistent_splices(false),
82 edit_dist(0x1F),
83 fusion(fusion),
84 num_alignments(0),
85 alignment_score(std::numeric_limits<int>::min())
86 {
87 int min_inner_distance = inner_dist_mean - inner_dist_std_dev;
88 int max_inner_distance = inner_dist_mean + inner_dist_std_dev;
89
90 pair<int, int> distances = pair_distances(h1, h2);
91 inner_dist = distances.second;
92
93 num_mapped = 2;
94
95 if (!h1.contiguous())
96 num_spliced++;
97 if (!h2.contiguous())
98 num_spliced++;
99
100 too_far = (inner_dist > max_inner_distance);
101 too_close = (inner_dist < min_inner_distance);
102 opposite_strands = (h1.antisense_align() != h2.antisense_align());
103 consistent_splices = (num_spliced == 2 && h1.antisense_splice() == h2.antisense_splice());
104
105 uint32_t ls = max(get_longest_ref_skip(h1), get_longest_ref_skip(h2));
106 longest_ref_skip = min (ls / 100, 0x3FFFFu);
107 edit_dist = h1.edit_dist() + h2.edit_dist();
108 num_alignments = 1;
109 assert(!(too_far && too_close));
110
111 if (too_far && !fusion)
112 {
113 int inner1, inner2;
114 if (h1.left() < h2.left())
115 inner1 = h1.right(), inner2 = h2.left();
116 else
117 inner1 = h2.right(), inner2 = h1.left();
118
119 JunctionSet::const_iterator lb, ub;
120
121 lb = junctions.upper_bound(Junction(h1.ref_id(), inner1, inner1, true));
122 ub = junctions.lower_bound(Junction(h1.ref_id(), inner2, inner2, false));
123 while (lb != ub && lb != junctions.end())
124 {
125 const Junction& junction = lb->first;
126 const JunctionStats& junction_stat = lb->second;
127 if (inner1 <= (int)junction.left && inner2 >= (int)junction.right &&
128 junction_stat.supporting_hits >= 10)
129 {
130 int temp_dist = junction.left - inner1 + inner2 - junction.right;
131 if (temp_dist >= min_inner_distance && temp_dist <= max_inner_distance)
132 {
133 // daehwan - for debugging purposes
134 /*
135 fprintf(stderr, "read id: %d\t %s %d-%d %d-%d %s\t %d->%d) \n",
136 h1.insert_id(), print_cigar(h1.cigar()).c_str(), h1.left(), h1.right(),
137 h2.left(), h2.right(), print_cigar(h2.cigar()).c_str(),
138 inner_dist, temp_dist);
139 */
140
141 inner_dist = temp_dist;
142 too_far = false;
143
144 break;
145 }
146 }
147
148 ++lb;
149 }
150 }
151
152 //
153 static const int penalty_for_long_inner_dist = bowtie2_max_penalty;
154 alignment_score = h1.alignment_score() + h2.alignment_score();
155 if (!fusion)
156 {
157 if (too_far)
158 {
159 int penalty = penalty_for_long_inner_dist;
160 if (inner_dist - max_inner_distance < inner_dist_std_dev)
161 {
162 penalty = penalty_for_long_inner_dist / 2;
163 }
164
165 alignment_score -= penalty;
166 }
167 else if (too_close)
168 {
169 int penalty = min(penalty_for_long_inner_dist/2,
170 (min_inner_distance - inner_dist) / inner_dist_std_dev + 1);
171 alignment_score -= penalty;
172 }
173
174 static const int penalty_for_same_strand = bowtie2_max_penalty;
175 if (!opposite_strands)
176 alignment_score -= penalty_for_same_strand;
177 }
178 }
179
180 InsertAlignmentGrade& operator=(const InsertAlignmentGrade& rhs)
181 {
182 too_close = rhs.too_close;
183 too_far = rhs.too_far;
184 num_spliced = rhs.num_spliced;
185 num_mapped = rhs.num_mapped;
186 opposite_strands = rhs.opposite_strands;
187 consistent_splices = rhs.consistent_splices;
188 longest_ref_skip = rhs.longest_ref_skip;
189 edit_dist = rhs.edit_dist;
190 fusion = rhs.fusion;
191 num_alignments = rhs.num_alignments;
192 inner_dist = rhs.inner_dist;
193 alignment_score = rhs.alignment_score;
194 return *this;
195 }
196
197 static int get_longest_ref_skip(const BowtieHit& h1)
198 {
199 vector<pair<int, int> > gaps;
200 h1.gaps(gaps);
201 if (gaps.empty())
202 return 0;
203
204 vector<pair<int, int> >::iterator max_itr = max_element(gaps.begin(), gaps.end(), gap_lt);
205 return abs(max_itr->second - max_itr->first);
206 }
207
208 // Returns true if rhs is a "happier" alignment for the ends of this insert
209 // than this InsertStatus.
210 bool operator<(const InsertAlignmentGrade& rhs);
211
212 bool happy() const
213 {
214 return num_mapped == 2 && opposite_strands && (num_spliced != 2 || consistent_splices) && !too_far;
215 }
216
217 int align_score() { return alignment_score; }
218
219 bool is_fusion() const { return fusion; }
220
221 bool too_close;
222 bool too_far;
223
224 uint8_t num_spliced;
225 uint8_t num_mapped;
226
227 bool opposite_strands;
228 bool consistent_splices;
229 uint32_t longest_ref_skip; // in 100s of bp
230 unsigned char edit_dist;
231 bool fusion;
232 int num_alignments; // number of equally good alignments for the insert
233 int inner_dist; // distance between inner edges of mates
234
235 int alignment_score;
236 };
237
238 typedef vector<pair<InsertAlignmentGrade, vector<InsertAlignment> > > BestInsertAlignmentTable;
239 void accept_valid_hits(BestInsertAlignmentTable& best_status_for_inserts);
240 void accept_all_best_hits(BestInsertAlignmentTable& best_status_for_inserts);
241
242 void best_insert_mappings(uint64_t refid,
243 ReadTable& it,
244 HitList& hits1_in_ref,
245 HitList& hits2_in_ref,
246 BestInsertAlignmentTable& best_insert_alignments,
247 bool prefer_shorter_pairs = false);
248
249
250 void insert_best_pairings(RefSequenceTable& rt,
251 ReadTable& it,
252 HitTable& hits1,
253 HitTable& hits2,
254 BestInsertAlignmentTable& best_pairings,
255 bool prefer_shorter_pairs = false);
256 #endif