30 |
|
#include <seqan/file.h> |
31 |
|
#include <getopt.h> |
32 |
|
|
33 |
+ |
#include <boost/thread.hpp> |
34 |
+ |
#include <boost/random/mersenne_twister.hpp> |
35 |
+ |
|
36 |
|
#include "common.h" |
37 |
+ |
#include "utils.h" |
38 |
|
#include "bwt_map.h" |
39 |
|
#include "junctions.h" |
40 |
|
#include "insertions.h" |
41 |
|
#include "deletions.h" |
42 |
+ |
#include "fusions.h" |
43 |
|
#include "align_status.h" |
44 |
|
#include "fragments.h" |
45 |
|
#include "wiggles.h" |
46 |
|
#include "tokenize.h" |
47 |
|
#include "reads.h" |
48 |
+ |
#include "coverage.h" |
49 |
+ |
|
50 |
|
|
51 |
|
#include "inserts.h" |
52 |
|
|
54 |
|
using namespace seqan; |
55 |
|
using std::set; |
56 |
|
|
57 |
+ |
static const JunctionSet empty_junctions; |
58 |
+ |
static const InsertionSet empty_insertions; |
59 |
+ |
static const DeletionSet empty_deletions; |
60 |
+ |
static const FusionSet empty_fusions; |
61 |
+ |
static const Coverage empty_coverage; |
62 |
+ |
|
63 |
+ |
// daehwan - this is redundancy, which should be removed. |
64 |
+ |
void get_seqs(istream& ref_stream, |
65 |
+ |
RefSequenceTable& rt, |
66 |
+ |
bool keep_seqs = true) |
67 |
+ |
{ |
68 |
+ |
while(ref_stream.good() && !ref_stream.eof()) |
69 |
+ |
{ |
70 |
+ |
RefSequenceTable::Sequence* ref_str = new RefSequenceTable::Sequence(); |
71 |
+ |
string name; |
72 |
+ |
readMeta(ref_stream, name, Fasta()); |
73 |
+ |
string::size_type space_pos = name.find_first_of(" \t\r"); |
74 |
+ |
if (space_pos != string::npos) |
75 |
+ |
{ |
76 |
+ |
name.resize(space_pos); |
77 |
+ |
} |
78 |
+ |
fprintf(stderr, "\tLoading %s...", name.c_str()); |
79 |
+ |
seqan::read(ref_stream, *ref_str, Fasta()); |
80 |
+ |
fprintf(stderr, "done\n"); |
81 |
+ |
|
82 |
+ |
rt.get_id(name, keep_seqs ? ref_str : NULL, 0); |
83 |
+ |
} |
84 |
+ |
} |
85 |
+ |
|
86 |
+ |
struct cmp_read_alignment |
87 |
+ |
{ |
88 |
+ |
bool operator() (const BowtieHit& l, const BowtieHit& r) const |
89 |
+ |
{ |
90 |
+ |
return l.alignment_score() > r.alignment_score(); |
91 |
+ |
} |
92 |
+ |
}; |
93 |
+ |
|
94 |
|
void read_best_alignments(const HitsForRead& hits_for_read, |
95 |
< |
FragmentAlignmentGrade& best_grade, |
96 |
< |
HitsForRead& best_hits, |
97 |
< |
const JunctionSet& gtf_junctions) |
95 |
> |
HitsForRead& best_hits, |
96 |
> |
const JunctionSet& gtf_junctions, |
97 |
> |
const JunctionSet& junctions = empty_junctions, |
98 |
> |
const InsertionSet& insertions = empty_insertions, |
99 |
> |
const DeletionSet& deletions = empty_deletions, |
100 |
> |
const FusionSet& fusions = empty_fusions, |
101 |
> |
const Coverage& coverage = empty_coverage, |
102 |
> |
bool final_report = false) |
103 |
|
{ |
104 |
|
const vector<BowtieHit>& hits = hits_for_read.hits; |
105 |
+ |
|
106 |
+ |
if (hits.size() >= max_multihits * 5) |
107 |
+ |
return; |
108 |
+ |
|
109 |
|
for (size_t i = 0; i < hits.size(); ++i) |
110 |
|
{ |
111 |
< |
if (hits[i].edit_dist()>max_read_mismatches) continue; |
112 |
< |
FragmentAlignmentGrade g(hits[i], gtf_junctions); |
113 |
< |
// Is the new status better than the current best one? |
114 |
< |
if (best_grade < g) |
115 |
< |
{ |
116 |
< |
best_hits.hits.clear(); |
117 |
< |
best_grade = g; |
118 |
< |
best_hits.hits.push_back(hits[i]); |
119 |
< |
} |
120 |
< |
else if (!(g < best_grade)) // is it just as good? |
121 |
< |
{ |
122 |
< |
best_grade.num_alignments++; |
123 |
< |
best_hits.hits.push_back(hits[i]); |
124 |
< |
} |
111 |
> |
if (hits[i].edit_dist() > max_read_mismatches) |
112 |
> |
continue; |
113 |
> |
|
114 |
> |
BowtieHit hit = hits[i]; |
115 |
> |
AlignStatus align_status(hit, gtf_junctions, |
116 |
> |
junctions, insertions, deletions, fusions, coverage); |
117 |
> |
hit.alignment_score(align_status._alignment_score); |
118 |
> |
|
119 |
> |
if (report_secondary_alignments || !final_report) |
120 |
> |
{ |
121 |
> |
best_hits.hits.push_back(hit); |
122 |
> |
} |
123 |
> |
else |
124 |
> |
{ |
125 |
> |
// Is the new status better than the current best one? |
126 |
> |
if (best_hits.hits.size() == 0 || cmp_read_alignment()(hit, best_hits.hits[0])) |
127 |
> |
{ |
128 |
> |
best_hits.hits.clear(); |
129 |
> |
best_hits.hits.push_back(hit); |
130 |
> |
} |
131 |
> |
else if (!cmp_read_alignment()(best_hits.hits[0], hit)) // is it just as good? |
132 |
> |
{ |
133 |
> |
best_hits.hits.push_back(hit); |
134 |
> |
} |
135 |
> |
} |
136 |
> |
} |
137 |
> |
|
138 |
> |
if ((report_secondary_alignments || !final_report) && best_hits.hits.size() > 0) |
139 |
> |
{ |
140 |
> |
sort(best_hits.hits.begin(), best_hits.hits.end(), cmp_read_alignment()); |
141 |
> |
} |
142 |
> |
|
143 |
> |
if (final_report) |
144 |
> |
{ |
145 |
> |
if (best_hits.hits.size() > max_multihits) |
146 |
> |
best_hits.hits.erase(best_hits.hits.begin() + max_multihits, best_hits.hits.end()); |
147 |
|
} |
148 |
|
} |
149 |
|
|
150 |
< |
void pair_best_alignments(const HitsForRead& left_hits, |
76 |
< |
const HitsForRead& right_hits, |
77 |
< |
InsertAlignmentGrade& best_grade, |
78 |
< |
HitsForRead& left_best_hits, |
79 |
< |
HitsForRead& right_best_hits) |
150 |
> |
bool set_insert_alignment_grade(const BowtieHit& lh, const BowtieHit& rh, const JunctionSet& junctions, InsertAlignmentGrade& grade) |
151 |
|
{ |
152 |
< |
// max mate inner distance (genomic) |
153 |
< |
int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev; |
154 |
< |
if (max_mate_inner_dist == -1) |
152 |
> |
bool fusion = false; |
153 |
> |
bool left_fusion = lh.fusion_opcode() != FUSION_NOTHING; |
154 |
> |
bool right_fusion = rh.fusion_opcode() != FUSION_NOTHING; |
155 |
> |
if (left_fusion && right_fusion) |
156 |
> |
return false; |
157 |
> |
|
158 |
> |
fusion = left_fusion || right_fusion; |
159 |
> |
if (!fusion && lh.ref_id() != rh.ref_id()) |
160 |
> |
fusion = true; |
161 |
> |
|
162 |
> |
if (!fusion && lh.ref_id() == rh.ref_id()) |
163 |
> |
{ |
164 |
> |
if (lh.antisense_align() == rh.antisense_align()) |
165 |
> |
fusion = true; |
166 |
> |
else |
167 |
|
{ |
168 |
< |
max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev; |
168 |
> |
int inner_dist = 0; |
169 |
> |
if (lh.antisense_align()) |
170 |
> |
// used rh.left() instead of rh.right() for the cases, |
171 |
> |
// where reads overlap with each other or reads span introns |
172 |
> |
inner_dist = lh.left() - rh.left(); |
173 |
> |
else |
174 |
> |
inner_dist = rh.left() - lh.left(); |
175 |
> |
|
176 |
> |
if (inner_dist < 0 || inner_dist > (int)fusion_min_dist) |
177 |
> |
fusion = true; |
178 |
|
} |
179 |
+ |
} |
180 |
+ |
|
181 |
+ |
// a read contains its partner, in which case the paired mapping will be ignored. |
182 |
+ |
if (!fusion) |
183 |
+ |
{ |
184 |
+ |
if (lh.left() <= rh.left() && lh.right() >= rh.right() && lh.right() - lh.left() > rh.right() - rh.left()) |
185 |
+ |
return false; |
186 |
+ |
else if (rh.left() <= lh.left() && rh.right() >= lh.right() && rh.right() - rh.left() > lh.right() - lh.left()) |
187 |
+ |
return false; |
188 |
+ |
} |
189 |
+ |
|
190 |
+ |
grade = InsertAlignmentGrade(lh, rh, junctions, fusion); |
191 |
+ |
|
192 |
+ |
return true; |
193 |
+ |
} |
194 |
+ |
|
195 |
+ |
struct cmp_pair_alignment |
196 |
+ |
{ |
197 |
+ |
cmp_pair_alignment(const JunctionSet& junctions) : |
198 |
+ |
_junctions(&junctions) {} |
199 |
|
|
200 |
< |
const vector<BowtieHit>& left = left_hits.hits; |
201 |
< |
const vector<BowtieHit>& right = right_hits.hits; |
202 |
< |
|
203 |
< |
for (size_t i = 0; i < left.size(); ++i) |
200 |
> |
bool operator() (const pair<BowtieHit, BowtieHit>& l, const pair<BowtieHit, BowtieHit>& r) const |
201 |
> |
{ |
202 |
> |
InsertAlignmentGrade gl; set_insert_alignment_grade(l.first, l.second, *_junctions, gl); |
203 |
> |
InsertAlignmentGrade gr; set_insert_alignment_grade(r.first, r.second, *_junctions, gr); |
204 |
> |
|
205 |
> |
bool better = gr < gl; |
206 |
> |
bool worse = gl < gr; |
207 |
> |
|
208 |
> |
if (better && !worse) |
209 |
> |
return true; |
210 |
> |
else |
211 |
> |
return false; |
212 |
> |
} |
213 |
> |
|
214 |
> |
const JunctionSet* _junctions; |
215 |
> |
}; |
216 |
> |
|
217 |
> |
void pair_best_alignments(const HitsForRead& left_hits, |
218 |
> |
const HitsForRead& right_hits, |
219 |
> |
InsertAlignmentGrade& best_grade, |
220 |
> |
vector<pair<BowtieHit, BowtieHit> >& best_hits, |
221 |
> |
const JunctionSet& gtf_junctions, |
222 |
> |
const JunctionSet& junctions = empty_junctions, |
223 |
> |
const InsertionSet& insertions = empty_insertions, |
224 |
> |
const DeletionSet& deletions = empty_deletions, |
225 |
> |
const FusionSet& fusions = empty_fusions, |
226 |
> |
const Coverage& coverage = empty_coverage, |
227 |
> |
bool final_report = false) |
228 |
> |
{ |
229 |
> |
const vector<BowtieHit>& left = left_hits.hits; |
230 |
> |
const vector<BowtieHit>& right = right_hits.hits; |
231 |
> |
|
232 |
> |
if (left.size() >= max_multihits * 5 || right.size() >= max_multihits * 5) |
233 |
> |
return; |
234 |
> |
|
235 |
> |
for (size_t i = 0; i < left.size(); ++i) |
236 |
> |
{ |
237 |
> |
if (left[i].edit_dist() > max_read_mismatches) continue; |
238 |
> |
|
239 |
> |
BowtieHit lh = left[i]; |
240 |
> |
AlignStatus align_status(lh, gtf_junctions, |
241 |
> |
junctions, insertions, deletions, fusions, coverage); |
242 |
> |
lh.alignment_score(align_status._alignment_score); |
243 |
> |
|
244 |
> |
for (size_t j = 0; j < right.size(); ++j) |
245 |
|
{ |
246 |
< |
if (left[i].edit_dist()>max_read_mismatches) continue; |
246 |
> |
if (right[j].edit_dist() > max_read_mismatches) continue; |
247 |
> |
|
248 |
> |
BowtieHit rh = right[j]; |
249 |
> |
AlignStatus align_status(rh, gtf_junctions, |
250 |
> |
junctions, insertions, deletions, fusions, coverage); |
251 |
> |
rh.alignment_score(align_status._alignment_score); |
252 |
> |
InsertAlignmentGrade g; |
253 |
> |
bool allowed; |
254 |
> |
allowed = set_insert_alignment_grade(lh, rh, final_report ? junctions : gtf_junctions, g); |
255 |
> |
|
256 |
> |
// daehwan - for debugging purposes |
257 |
> |
#if 0 |
258 |
> |
if (lh.insert_id() == 325708 && !g.is_fusion() && false) |
259 |
> |
{ |
260 |
> |
fprintf(stderr, "lh %d:%d %s score: %d (from %d) NM: %d\n", |
261 |
> |
lh.ref_id(), lh.left(), print_cigar(lh.cigar()).c_str(), |
262 |
> |
lh.alignment_score(), left[i].alignment_score(), lh.edit_dist()); |
263 |
> |
fprintf(stderr, "rh %d:%d %s score: %d (from %d) NM: %d\n", |
264 |
> |
rh.ref_id(), rh.left(), print_cigar(rh.cigar()).c_str(), |
265 |
> |
rh.alignment_score(), right[j].alignment_score(), rh.edit_dist()); |
266 |
> |
fprintf(stderr, "combined score: %d is_fusion(%d)\n", g.align_score(), g.is_fusion()); |
267 |
> |
} |
268 |
> |
#endif |
269 |
> |
|
270 |
> |
if (!allowed) continue; |
271 |
> |
if (!fusion_search && !report_discordant_pair_alignments && g.is_fusion()) continue; |
272 |
|
|
273 |
< |
const BowtieHit& lh = left[i]; |
274 |
< |
for (size_t j = 0; j < right.size(); ++j) |
273 |
> |
if (report_secondary_alignments || !final_report) |
274 |
> |
{ |
275 |
> |
best_hits.push_back(make_pair(lh, rh)); |
276 |
> |
} |
277 |
> |
else |
278 |
> |
{ |
279 |
> |
// Is the new status better than the current best one? |
280 |
> |
if (best_grade < g) |
281 |
|
{ |
282 |
< |
const BowtieHit& rh = right[j]; |
283 |
< |
|
284 |
< |
if (lh.ref_id() != rh.ref_id()) |
101 |
< |
continue; |
102 |
< |
if (right[j].edit_dist()>max_read_mismatches) continue; |
103 |
< |
InsertAlignmentGrade g(lh, rh, min_mate_inner_dist, max_mate_inner_dist); |
104 |
< |
|
105 |
< |
// Is the new status better than the current best one? |
106 |
< |
if (best_grade < g) |
107 |
< |
{ |
108 |
< |
left_best_hits.hits.clear(); |
109 |
< |
right_best_hits.hits.clear(); |
110 |
< |
|
111 |
< |
best_grade = g; |
112 |
< |
left_best_hits.hits.push_back(lh); |
113 |
< |
right_best_hits.hits.push_back(rh); |
114 |
< |
} |
115 |
< |
else if (!(g < best_grade)) |
116 |
< |
{ |
117 |
< |
left_best_hits.hits.push_back(lh); |
118 |
< |
right_best_hits.hits.push_back(rh); |
119 |
< |
best_grade.num_alignments++; |
120 |
< |
} |
282 |
> |
best_hits.clear(); |
283 |
> |
best_grade = g; |
284 |
> |
best_hits.push_back(make_pair(lh, rh)); |
285 |
|
} |
286 |
+ |
else if (!(g < best_grade)) |
287 |
+ |
{ |
288 |
+ |
best_hits.push_back(make_pair(lh, rh)); |
289 |
+ |
} |
290 |
+ |
} |
291 |
|
} |
292 |
+ |
} |
293 |
+ |
|
294 |
+ |
if ((report_secondary_alignments || !final_report) && best_hits.size() > 0) |
295 |
+ |
{ |
296 |
+ |
cmp_pair_alignment cmp(final_report ? junctions : gtf_junctions); |
297 |
+ |
sort(best_hits.begin(), best_hits.end(), cmp); |
298 |
+ |
set_insert_alignment_grade(best_hits[0].first, best_hits[0].second, final_report ? junctions : gtf_junctions, best_grade); |
299 |
+ |
} |
300 |
+ |
|
301 |
+ |
if (final_report) |
302 |
+ |
{ |
303 |
+ |
if (best_hits.size() > max_multihits) |
304 |
+ |
best_hits.erase(best_hits.begin() + max_multihits, best_hits.end()); |
305 |
+ |
} |
306 |
+ |
|
307 |
+ |
best_grade.num_alignments = best_hits.size(); |
308 |
|
} |
309 |
|
|
310 |
|
enum FragmentType {FRAG_UNPAIRED, FRAG_LEFT, FRAG_RIGHT}; |
312 |
|
void add_auxData(vector<string>& auxdata, vector<string>& sam_toks, |
313 |
|
const RefSequenceTable& rt,const BowtieHit& bh, FragmentType insert_side, |
314 |
|
int num_hits, const BowtieHit* next_hit, int hitIndex) { |
315 |
+ |
bool XS_found = false; |
316 |
+ |
if (sam_toks.size()>11) { |
317 |
|
|
318 |
< |
if (sam_toks.size()>11) { |
319 |
< |
for (size_t i=11;i<sam_toks.size();++i) { |
320 |
< |
if (sam_toks[i].find("NH:i:")==string::npos) |
321 |
< |
auxdata.push_back(sam_toks[i]); |
322 |
< |
} |
323 |
< |
} |
324 |
< |
string aux("NH:i:"); |
325 |
< |
str_appendInt(aux, num_hits); |
318 |
> |
for (size_t i=11;i<sam_toks.size();++i) { |
319 |
> |
if (sam_toks[i].find("NH:i:")==string::npos && |
320 |
> |
sam_toks[i].find("XS:i:")==string::npos) |
321 |
> |
auxdata.push_back(sam_toks[i]); |
322 |
> |
|
323 |
> |
if (sam_toks[i].find("XS:A:")!=string::npos) |
324 |
> |
XS_found = true; |
325 |
> |
} |
326 |
> |
} |
327 |
> |
string aux("NH:i:"); |
328 |
> |
str_appendInt(aux, num_hits); |
329 |
> |
auxdata.push_back(aux); |
330 |
> |
if (next_hit) { |
331 |
> |
const char* nh_ref_name = "="; |
332 |
> |
nh_ref_name = rt.get_name(next_hit->ref_id()); |
333 |
> |
assert (nh_ref_name != NULL); |
334 |
> |
bool same_contig=(next_hit->ref_id()==bh.ref_id()); |
335 |
> |
aux="CC:Z:"; |
336 |
> |
aux+= (same_contig ? "=" : nh_ref_name); |
337 |
> |
auxdata.push_back(aux); |
338 |
> |
aux="CP:i:"; |
339 |
> |
int nh_gpos=next_hit->left() + 1; |
340 |
> |
str_appendInt(aux, nh_gpos); |
341 |
|
auxdata.push_back(aux); |
342 |
< |
if (next_hit) { |
141 |
< |
const char* nh_ref_name = "="; |
142 |
< |
nh_ref_name = rt.get_name(next_hit->ref_id()); |
143 |
< |
assert (nh_ref_name != NULL); |
144 |
< |
bool same_contig=(next_hit->ref_id()==bh.ref_id()); |
145 |
< |
aux="CC:Z:"; |
146 |
< |
aux+= (same_contig ? "=" : nh_ref_name); |
147 |
< |
auxdata.push_back(aux); |
148 |
< |
aux="CP:i:"; |
149 |
< |
int nh_gpos=next_hit->left() + 1; |
150 |
< |
str_appendInt(aux, nh_gpos); |
151 |
< |
auxdata.push_back(aux); |
152 |
< |
} //has next_hit |
342 |
> |
} //has next_hit |
343 |
|
// FIXME: this code is still a bit brittle, because it contains no |
344 |
|
// consistency check that the mates are on opposite strands - a current protocol |
345 |
|
// requirement, and that the strand indicated by the alignment is consistent |
346 |
|
// with the orientation of the splices (though that should be handled upstream). |
347 |
+ |
if (!XS_found) { |
348 |
|
const string xs_f("XS:A:+"); |
349 |
|
const string xs_r("XS:A:-"); |
350 |
< |
if (bh.contiguous()) { |
351 |
< |
if (library_type == FR_FIRSTSTRAND) { |
352 |
< |
if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED) { |
353 |
< |
if (bh.antisense_align()) |
354 |
< |
auxdata.push_back(xs_f); |
355 |
< |
else |
356 |
< |
auxdata.push_back(xs_r); |
357 |
< |
} |
358 |
< |
else { |
359 |
< |
if (bh.antisense_align()) |
360 |
< |
auxdata.push_back(xs_r); |
361 |
< |
else |
362 |
< |
auxdata.push_back(xs_f); |
363 |
< |
} |
364 |
< |
} |
365 |
< |
else if (library_type == FR_SECONDSTRAND) { |
366 |
< |
if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED){ |
367 |
< |
if (bh.antisense_align()) |
368 |
< |
auxdata.push_back(xs_r); |
369 |
< |
else |
370 |
< |
auxdata.push_back(xs_f); |
371 |
< |
} |
372 |
< |
else |
373 |
< |
{ |
374 |
< |
if (bh.antisense_align()) |
375 |
< |
auxdata.push_back(xs_f); |
376 |
< |
else |
377 |
< |
auxdata.push_back(xs_r); |
378 |
< |
} |
379 |
< |
} |
380 |
< |
} //bh.contiguous() |
190 |
< |
if (hitIndex >= 0) |
350 |
> |
if (library_type == FR_FIRSTSTRAND) { |
351 |
> |
if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED) { |
352 |
> |
if (bh.antisense_align()) |
353 |
> |
auxdata.push_back(xs_f); |
354 |
> |
else |
355 |
> |
auxdata.push_back(xs_r); |
356 |
> |
} |
357 |
> |
else { |
358 |
> |
if (bh.antisense_align()) |
359 |
> |
auxdata.push_back(xs_r); |
360 |
> |
else |
361 |
> |
auxdata.push_back(xs_f); |
362 |
> |
} |
363 |
> |
} |
364 |
> |
else if (library_type == FR_SECONDSTRAND) { |
365 |
> |
if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED){ |
366 |
> |
if (bh.antisense_align()) |
367 |
> |
auxdata.push_back(xs_r); |
368 |
> |
else |
369 |
> |
auxdata.push_back(xs_f); |
370 |
> |
} |
371 |
> |
else |
372 |
> |
{ |
373 |
> |
if (bh.antisense_align()) |
374 |
> |
auxdata.push_back(xs_f); |
375 |
> |
else |
376 |
> |
auxdata.push_back(xs_r); |
377 |
> |
} |
378 |
> |
} |
379 |
> |
} |
380 |
> |
if (hitIndex >= 0) |
381 |
|
{ |
382 |
< |
string aux("HI:i:"); |
383 |
< |
str_appendInt(aux, hitIndex); |
384 |
< |
auxdata.push_back(aux); |
382 |
> |
string aux("HI:i:"); |
383 |
> |
str_appendInt(aux, hitIndex); |
384 |
> |
auxdata.push_back(aux); |
385 |
|
} |
386 |
|
} |
387 |
|
|
388 |
< |
bool rewrite_sam_record(GBamWriter& bam_writer, const RefSequenceTable& rt, |
388 |
> |
bool rewrite_sam_record(GBamWriter& bam_writer, |
389 |
> |
const RefSequenceTable& rt, |
390 |
|
const BowtieHit& bh, |
391 |
|
const char* bwt_buf, |
392 |
|
const char* read_alt_name, |
202 |
– |
const FragmentAlignmentGrade& grade, |
393 |
|
FragmentType insert_side, |
394 |
|
int num_hits, |
395 |
|
const BowtieHit* next_hit, |
396 |
|
bool primary, |
397 |
|
int hitIndex) |
398 |
|
{ |
399 |
< |
// Rewrite this hit, filling in the alt name, mate mapping |
400 |
< |
// and setting the pair flag |
401 |
< |
vector<string> sam_toks; |
402 |
< |
tokenize(bwt_buf, "\t", sam_toks); |
399 |
> |
// Rewrite this hit, filling in the alt name, mate mapping |
400 |
> |
// and setting the pair flag |
401 |
> |
vector<string> sam_toks; |
402 |
> |
tokenize(bwt_buf, "\t", sam_toks); |
403 |
> |
|
404 |
> |
string ref_name = sam_toks[2], ref_name2 = ""; |
405 |
> |
char cigar1[1024] = {0}, cigar2[1024] = {0}; |
406 |
> |
string left_seq, right_seq, left_qual, right_qual; |
407 |
> |
int left1 = -1, left2 = -1; |
408 |
> |
bool fusion_alignment = false; |
409 |
> |
size_t XF_index = 0; |
410 |
> |
for (size_t i = 11; i < sam_toks.size(); ++i) |
411 |
> |
{ |
412 |
> |
string& tok = sam_toks[i]; |
413 |
> |
if (strncmp(tok.c_str(), "XF", 2) == 0) |
414 |
> |
{ |
415 |
> |
fusion_alignment = true; |
416 |
> |
XF_index = i; |
417 |
> |
|
418 |
> |
vector<string> tuple_fields; |
419 |
> |
tokenize(tok.c_str(), " ", tuple_fields); |
420 |
> |
vector<string> contigs; |
421 |
> |
tokenize(tuple_fields[1].c_str(), "-", contigs); |
422 |
> |
if (contigs.size() >= 2) |
423 |
> |
{ |
424 |
> |
ref_name = contigs[0]; |
425 |
> |
ref_name2 = contigs[1]; |
426 |
> |
} |
427 |
> |
|
428 |
> |
extract_partial_hits(bh, tuple_fields[4].c_str(), tuple_fields[5].c_str(), |
429 |
> |
cigar1, cigar2, left_seq, right_seq, |
430 |
> |
left_qual, right_qual, left1, left2); |
431 |
> |
|
432 |
> |
break; |
433 |
> |
} |
434 |
> |
|
435 |
> |
else if (strncmp(tok.c_str(), "AS", 2) == 0) |
436 |
> |
{ |
437 |
> |
char AS_score[128] = {0}; |
438 |
> |
sprintf(AS_score, "AS:i:%d", min(0, bh.alignment_score())); |
439 |
> |
tok = AS_score; |
440 |
> |
} |
441 |
> |
} |
442 |
> |
|
443 |
|
string qname(read_alt_name); |
444 |
|
size_t slash_pos=qname.rfind('/'); |
445 |
|
if (slash_pos!=string::npos) |
446 |
< |
qname.resize(slash_pos); |
447 |
< |
//read_alt_name as QNAME |
448 |
< |
int flag=atoi(sam_toks[1].c_str()); //FLAG |
449 |
< |
if (insert_side != FRAG_UNPAIRED) { |
450 |
< |
//flag = atoi(sam_toks[1].c_str()); |
451 |
< |
// mark this as a singleton mate |
452 |
< |
flag |= 0x0001; |
453 |
< |
if (insert_side == FRAG_LEFT) |
454 |
< |
flag |= 0x0040; |
455 |
< |
else if (insert_side == FRAG_RIGHT) |
456 |
< |
flag |= 0x0080; |
457 |
< |
flag |= 0x0008; |
458 |
< |
//char flag_buf[64]; |
459 |
< |
//sprintf(flag_buf, "%d", flag); |
460 |
< |
//sam_toks[t] = flag_buf; |
461 |
< |
} |
462 |
< |
if (!primary) |
463 |
< |
flag |= 0x100; |
464 |
< |
|
465 |
< |
int gpos=isdigit(sam_toks[3][0]) ? atoi(sam_toks[3].c_str()) : 0; |
466 |
< |
int mapQ=255; |
467 |
< |
if (grade.num_alignments > 1) { |
468 |
< |
double err_prob = 1 - (1.0 / grade.num_alignments); |
469 |
< |
mapQ = (int)(-10.0 * log(err_prob) / log(10.0)); |
470 |
< |
} |
471 |
< |
vector<string> auxdata; |
472 |
< |
add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex); |
473 |
< |
int tlen =atoi(sam_toks[8].c_str()); //TLEN |
474 |
< |
int mate_pos=atoi(sam_toks[7].c_str()); |
475 |
< |
GBamRecord* bamrec=bam_writer.new_record(qname.c_str(), flag, sam_toks[2].c_str(), gpos, mapQ, |
476 |
< |
sam_toks[5].c_str(), sam_toks[6].c_str(), mate_pos, |
477 |
< |
tlen, sam_toks[9].c_str(), sam_toks[10].c_str(), &auxdata); |
478 |
< |
bam_writer.write(bamrec); |
479 |
< |
delete bamrec; |
480 |
< |
return true; |
446 |
> |
qname.resize(slash_pos); |
447 |
> |
//read_alt_name as QNAME |
448 |
> |
int flag=atoi(sam_toks[1].c_str()); //FLAG |
449 |
> |
if (insert_side != FRAG_UNPAIRED) { |
450 |
> |
//flag = atoi(sam_toks[1].c_str()); |
451 |
> |
// mark this as a singleton mate |
452 |
> |
flag |= 0x0001; |
453 |
> |
if (insert_side == FRAG_LEFT) |
454 |
> |
flag |= 0x0040; |
455 |
> |
else if (insert_side == FRAG_RIGHT) |
456 |
> |
flag |= 0x0080; |
457 |
> |
flag |= 0x0008; |
458 |
> |
//char flag_buf[64]; |
459 |
> |
//sprintf(flag_buf, "%d", flag); |
460 |
> |
//sam_toks[t] = flag_buf; |
461 |
> |
} |
462 |
> |
if (!primary) |
463 |
> |
flag |= 0x100; |
464 |
> |
|
465 |
> |
int gpos=isdigit(sam_toks[3][0]) ? atoi(sam_toks[3].c_str()) : 0; |
466 |
> |
int mapQ = 50; |
467 |
> |
if (num_hits > 1) { |
468 |
> |
double err_prob = 1 - (1.0 / num_hits); |
469 |
> |
mapQ = (int)(-10.0 * log(err_prob) / log(10.0)); |
470 |
> |
} |
471 |
> |
int tlen =atoi(sam_toks[8].c_str()); //TLEN |
472 |
> |
int mate_pos=atoi(sam_toks[7].c_str()); |
473 |
> |
|
474 |
> |
string rg_aux = ""; |
475 |
> |
if (!sam_readgroup_id.empty()) |
476 |
> |
rg_aux = string("RG:Z:") + sam_readgroup_id; |
477 |
> |
|
478 |
> |
GBamRecord* bamrec=NULL; |
479 |
> |
if (fusion_alignment) { |
480 |
> |
vector<string> auxdata; |
481 |
> |
add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex); |
482 |
> |
if (rg_aux != "") |
483 |
> |
auxdata.push_back(rg_aux); |
484 |
> |
bamrec=bam_writer.new_record(qname.c_str(), flag, ref_name.c_str(), left1 + 1, mapQ, |
485 |
> |
cigar1, sam_toks[6].c_str(), mate_pos, |
486 |
> |
tlen, left_seq.c_str(), left_qual.c_str(), &auxdata); |
487 |
> |
bam_writer.write(bamrec); |
488 |
> |
delete bamrec; |
489 |
> |
|
490 |
> |
auxdata.clear(); |
491 |
> |
sam_toks[XF_index][5] = '2'; |
492 |
> |
add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex); |
493 |
> |
if (rg_aux != "") |
494 |
> |
auxdata.push_back(rg_aux); |
495 |
> |
bamrec=bam_writer.new_record(qname.c_str(), flag, ref_name2.c_str(), left2 + 1, mapQ, |
496 |
> |
cigar2, sam_toks[6].c_str(), mate_pos, |
497 |
> |
tlen, right_seq.c_str(), right_qual.c_str(), &auxdata); |
498 |
> |
bam_writer.write(bamrec); |
499 |
> |
delete bamrec; |
500 |
> |
} else { |
501 |
> |
vector<string> auxdata; |
502 |
> |
add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex); |
503 |
> |
if (rg_aux != "") |
504 |
> |
auxdata.push_back(rg_aux); |
505 |
> |
bamrec=bam_writer.new_record(qname.c_str(), flag, sam_toks[2].c_str(), gpos, mapQ, |
506 |
> |
sam_toks[5].c_str(), sam_toks[6].c_str(), mate_pos, |
507 |
> |
tlen, sam_toks[9].c_str(), sam_toks[10].c_str(), &auxdata); |
508 |
> |
bam_writer.write(bamrec); |
509 |
> |
delete bamrec; |
510 |
> |
} |
511 |
> |
|
512 |
> |
return true; |
513 |
|
} |
514 |
|
|
515 |
< |
bool rewrite_sam_record(GBamWriter& bam_writer, const RefSequenceTable& rt, |
515 |
> |
bool rewrite_sam_record(GBamWriter& bam_writer, |
516 |
> |
const RefSequenceTable& rt, |
517 |
|
const BowtieHit& bh, |
518 |
|
const char* bwt_buf, |
519 |
|
const char* read_alt_name, |
525 |
|
bool primary, |
526 |
|
int hitIndex) |
527 |
|
{ |
528 |
< |
// Rewrite this hit, filling in the alt name, mate mapping |
529 |
< |
// and setting the pair flag |
530 |
< |
vector<string> sam_toks; |
531 |
< |
tokenize(bwt_buf, "\t", sam_toks); |
528 |
> |
// Rewrite this hit, filling in the alt name, mate mapping |
529 |
> |
// and setting the pair flag |
530 |
> |
vector<string> sam_toks; |
531 |
> |
tokenize(bwt_buf, "\t", sam_toks); |
532 |
|
string qname(read_alt_name); |
533 |
|
size_t slash_pos=qname.rfind('/'); |
534 |
|
if (slash_pos!=string::npos) |
535 |
< |
qname.resize(slash_pos); |
536 |
< |
//read_alt_name as QNAME |
537 |
< |
int flag = atoi(sam_toks[1].c_str()); |
538 |
< |
// 0x0010 (strand of query) is assumed to be set correctly |
539 |
< |
// to begin with |
540 |
< |
flag |= 0x0001; //it must be paired |
541 |
< |
if (insert_side == FRAG_LEFT) |
542 |
< |
flag |= 0x0040; |
543 |
< |
else if (insert_side == FRAG_RIGHT) |
544 |
< |
flag |= 0x0080; |
545 |
< |
if (!primary) |
546 |
< |
flag |= 0x100; |
547 |
< |
|
548 |
< |
int gpos=isdigit(sam_toks[3][0]) ? atoi(sam_toks[3].c_str()) : 0; |
549 |
< |
int mapQ=255; |
550 |
< |
if (grade.num_alignments > 1) { |
551 |
< |
double err_prob = 1 - (1.0 / grade.num_alignments); |
552 |
< |
mapQ = (int)(-10.0 * log(err_prob) / log(10.0)); |
553 |
< |
} |
554 |
< |
int tlen=0; //TLEN |
555 |
< |
int mate_pos=atoi(sam_toks[7].c_str()); |
556 |
< |
if (partner) { |
557 |
< |
if (partner->ref_id()==bh.ref_id()) { |
558 |
< |
sam_toks[6] = "="; //same chromosome |
559 |
< |
//TLEN: |
560 |
< |
tlen = bh.left() < partner->left() ? partner->right() - bh.left() : |
561 |
< |
partner->left() - bh.right(); |
562 |
< |
} |
535 |
> |
qname.resize(slash_pos); |
536 |
> |
//read_alt_name as QNAME |
537 |
> |
int flag = atoi(sam_toks[1].c_str()); |
538 |
> |
// 0x0010 (strand of query) is assumed to be set correctly |
539 |
> |
// to begin with |
540 |
> |
flag |= 0x0001; //it must be paired |
541 |
> |
if (insert_side == FRAG_LEFT) |
542 |
> |
flag |= 0x0040; |
543 |
> |
else if (insert_side == FRAG_RIGHT) |
544 |
> |
flag |= 0x0080; |
545 |
> |
if (!primary) |
546 |
> |
flag |= 0x100; |
547 |
> |
|
548 |
> |
string ref_name = sam_toks[2], ref_name2 = ""; |
549 |
> |
char cigar1[1024] = {0}, cigar2[1024] = {0}; |
550 |
> |
string left_seq, right_seq, left_qual, right_qual; |
551 |
> |
int left1 = -1, left2 = -1; |
552 |
> |
bool fusion_alignment = false; |
553 |
> |
size_t XF_tok_idx = 11; |
554 |
> |
for (; XF_tok_idx < sam_toks.size(); ++XF_tok_idx) |
555 |
> |
{ |
556 |
> |
string& tok = sam_toks[XF_tok_idx]; |
557 |
> |
if (strncmp(tok.c_str(), "XF", 2) == 0) |
558 |
> |
{ |
559 |
> |
fusion_alignment = true; |
560 |
> |
|
561 |
> |
vector<string> tuple_fields; |
562 |
> |
tokenize(tok.c_str(), " ", tuple_fields); |
563 |
> |
vector<string> contigs; |
564 |
> |
tokenize(tuple_fields[1].c_str(), "-", contigs); |
565 |
> |
if (contigs.size() >= 2) |
566 |
> |
{ |
567 |
> |
ref_name = contigs[0]; |
568 |
> |
ref_name2 = contigs[1]; |
569 |
> |
} |
570 |
> |
|
571 |
> |
extract_partial_hits(bh, tuple_fields[4].c_str(), tuple_fields[5].c_str(), |
572 |
> |
cigar1, cigar2, left_seq, right_seq, |
573 |
> |
left_qual, right_qual, left1, left2); |
574 |
> |
|
575 |
> |
break; |
576 |
> |
} |
577 |
|
|
578 |
< |
else { //partner on different chromosome/contig |
579 |
< |
sam_toks[6] = rt.get_name(partner->ref_id()); |
580 |
< |
if (sam_toks[6].empty()) { |
581 |
< |
//FIXME -- this should never happen |
582 |
< |
sam_toks[6] = "="; |
583 |
< |
fprintf(stderr, "Warning: partner ref_id %d has no entry in ref table?\n", partner->ref_id()); |
584 |
< |
} |
585 |
< |
} |
586 |
< |
mate_pos = partner->left() + 1; |
587 |
< |
if (grade.happy()) |
588 |
< |
flag |= 0x0002; |
589 |
< |
if (partner->antisense_align()) |
590 |
< |
flag |= 0x0020; |
591 |
< |
} |
592 |
< |
else { |
593 |
< |
sam_toks[6] = "*"; |
594 |
< |
mate_pos = 0; |
595 |
< |
flag |= 0x0008; |
596 |
< |
} |
597 |
< |
vector<string> auxdata; |
598 |
< |
add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex); |
599 |
< |
GBamRecord* bamrec=bam_writer.new_record(qname.c_str(), flag, sam_toks[2].c_str(), gpos, mapQ, |
600 |
< |
sam_toks[5].c_str(), sam_toks[6].c_str(), mate_pos, |
601 |
< |
tlen, sam_toks[9].c_str(), sam_toks[10].c_str(), &auxdata); |
602 |
< |
bam_writer.write(bamrec); |
603 |
< |
delete bamrec; |
604 |
< |
return true; |
578 |
> |
else if (strncmp(tok.c_str(), "AS", 2) == 0) |
579 |
> |
{ |
580 |
> |
char AS_score[128] = {0}; |
581 |
> |
sprintf(AS_score, "AS:i:%d", min(0, bh.alignment_score())); |
582 |
> |
tok = AS_score; |
583 |
> |
} |
584 |
> |
} |
585 |
> |
|
586 |
> |
int gpos=isdigit(sam_toks[3][0]) ? atoi(sam_toks[3].c_str()) : 0; |
587 |
> |
int mapQ = 50; |
588 |
> |
if (grade.num_alignments > 1) { |
589 |
> |
double err_prob = 1 - (1.0 / grade.num_alignments); |
590 |
> |
mapQ = (int)(-10.0 * log(err_prob) / log(10.0)); |
591 |
> |
} |
592 |
> |
int tlen=0; //TLEN |
593 |
> |
int mate_pos=atoi(sam_toks[7].c_str()); |
594 |
> |
if (partner) { |
595 |
> |
if (partner->ref_id()==bh.ref_id()) { |
596 |
> |
sam_toks[6] = "="; //same chromosome |
597 |
> |
//TLEN: |
598 |
> |
tlen = bh.left() < partner->left() ? partner->right() - bh.left() : |
599 |
> |
partner->left() - bh.right(); |
600 |
> |
} |
601 |
> |
|
602 |
> |
else { //partner on different chromosome/contig |
603 |
> |
sam_toks[6] = rt.get_name(partner->ref_id()); |
604 |
> |
} |
605 |
> |
mate_pos = partner->left() + 1; |
606 |
> |
if (grade.happy()) |
607 |
> |
flag |= 0x0002; |
608 |
> |
if (partner->antisense_align()) |
609 |
> |
flag |= 0x0020; |
610 |
> |
|
611 |
> |
if (partner->fusion_opcode() != FUSION_NOTHING) |
612 |
> |
{ |
613 |
> |
char partner_pos[1024]; |
614 |
> |
sprintf(partner_pos, "XP:Z:%s-%s %d", rt.get_name(partner->ref_id()), rt.get_name(partner->ref_id2()), partner->left() + 1); |
615 |
> |
sam_toks.push_back(partner_pos); |
616 |
> |
} |
617 |
> |
} |
618 |
> |
else { |
619 |
> |
sam_toks[6] = "*"; |
620 |
> |
mate_pos = 0; |
621 |
> |
flag |= 0x0008; |
622 |
> |
} |
623 |
> |
|
624 |
> |
string rg_aux = ""; |
625 |
> |
if (!sam_readgroup_id.empty()) |
626 |
> |
rg_aux = string("RG:Z:") + sam_readgroup_id; |
627 |
> |
|
628 |
> |
GBamRecord* bamrec=NULL; |
629 |
> |
if (fusion_alignment) { |
630 |
> |
vector<string> auxdata; |
631 |
> |
add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex); |
632 |
> |
if (rg_aux != "") |
633 |
> |
auxdata.push_back(rg_aux); |
634 |
> |
bamrec=bam_writer.new_record(qname.c_str(), flag, ref_name.c_str(), left1 + 1, mapQ, |
635 |
> |
cigar1, sam_toks[6].c_str(), mate_pos, |
636 |
> |
tlen, left_seq.c_str(), left_qual.c_str(), &auxdata); |
637 |
> |
bam_writer.write(bamrec); |
638 |
> |
delete bamrec; |
639 |
> |
|
640 |
> |
auxdata.clear(); |
641 |
> |
sam_toks[XF_tok_idx][5] = '2'; |
642 |
> |
add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex); |
643 |
> |
if (rg_aux != "") |
644 |
> |
auxdata.push_back(rg_aux); |
645 |
> |
bamrec=bam_writer.new_record(qname.c_str(), flag, ref_name2.c_str(), left2 + 1, mapQ, |
646 |
> |
cigar2, sam_toks[6].c_str(), mate_pos, |
647 |
> |
tlen, right_seq.c_str(), right_qual.c_str(), &auxdata); |
648 |
> |
bam_writer.write(bamrec); |
649 |
> |
delete bamrec; |
650 |
> |
} else { |
651 |
> |
vector<string> auxdata; |
652 |
> |
add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex); |
653 |
> |
if (rg_aux != "") |
654 |
> |
auxdata.push_back(rg_aux); |
655 |
> |
bamrec=bam_writer.new_record(qname.c_str(), flag, sam_toks[2].c_str(), gpos, mapQ, |
656 |
> |
sam_toks[5].c_str(), sam_toks[6].c_str(), mate_pos, |
657 |
> |
tlen, sam_toks[9].c_str(), sam_toks[10].c_str(), &auxdata); |
658 |
> |
bam_writer.write(bamrec); |
659 |
> |
delete bamrec; |
660 |
> |
} |
661 |
> |
|
662 |
> |
return true; |
663 |
|
} |
664 |
|
|
665 |
|
struct lex_hit_sort |
672 |
|
{ |
673 |
|
const BowtieHit& lhs = _hits.hits[l]; |
674 |
|
const BowtieHit& rhs = _hits.hits[r]; |
675 |
< |
|
675 |
> |
|
676 |
|
uint32_t l_id = lhs.ref_id(); |
677 |
|
uint32_t r_id = rhs.ref_id(); |
678 |
|
if (l_id != r_id) |
679 |
< |
{ |
680 |
< |
return (strcmp(_rt.get_name(lhs.ref_id()), _rt.get_name(rhs.ref_id())) < 0); |
346 |
< |
} |
679 |
> |
return l_id < r_id; |
680 |
> |
|
681 |
|
return lhs.left() < rhs.left(); |
682 |
|
} |
683 |
|
|
686 |
|
}; |
687 |
|
|
688 |
|
void print_sam_for_single(const RefSequenceTable& rt, |
689 |
< |
const HitsForRead& hits, |
690 |
< |
const FragmentAlignmentGrade& grade, |
691 |
< |
FragmentType frag_type, |
692 |
< |
//FILE* reads_file, |
693 |
< |
Read& read, |
360 |
< |
GBamWriter& bam_writer, |
361 |
< |
FILE* um_out//write unmapped reads here |
362 |
< |
) |
689 |
> |
const HitsForRead& hits, |
690 |
> |
FragmentType frag_type, |
691 |
> |
const string& alt_name, |
692 |
> |
GBamWriter& bam_writer, |
693 |
> |
boost::random::mt19937& rng) |
694 |
|
{ |
695 |
< |
assert(!read.alt_name.empty()); |
695 |
> |
//assert(!read.alt_name.empty()); |
696 |
> |
if (hits.hits.empty()) |
697 |
> |
return; |
698 |
> |
|
699 |
|
lex_hit_sort s(rt, hits); |
700 |
|
vector<uint32_t> index_vector; |
701 |
< |
for (size_t i = 0; i < hits.hits.size(); ++i) |
701 |
> |
|
702 |
> |
size_t i; |
703 |
> |
for (i = 0; i < hits.hits.size(); ++i) |
704 |
|
index_vector.push_back(i); |
705 |
|
|
706 |
|
sort(index_vector.begin(), index_vector.end(), s); |
707 |
< |
size_t primaryHit = (hits.hits.empty()? 0: random() % hits.hits.size()); |
707 |
> |
|
708 |
> |
size_t primaryHit = 0; |
709 |
> |
if (!report_secondary_alignments) |
710 |
> |
primaryHit = rng() % hits.hits.size(); |
711 |
> |
|
712 |
|
bool multipleHits = (hits.hits.size() > 1); |
713 |
< |
for (size_t i = 0; i < hits.hits.size(); ++i) |
714 |
< |
{ |
715 |
< |
bool primary = (i == primaryHit); |
716 |
< |
size_t index = index_vector[i]; |
717 |
< |
const BowtieHit& bh = hits.hits[index]; |
718 |
< |
rewrite_sam_record(bam_writer, rt, |
719 |
< |
bh, |
720 |
< |
bh.hitfile_rec().c_str(), |
721 |
< |
read.alt_name.c_str(), |
722 |
< |
grade, |
723 |
< |
frag_type, |
724 |
< |
hits.hits.size(), |
725 |
< |
(i < hits.hits.size()-1) ? &(hits.hits[index_vector[i+1]]) : NULL, |
726 |
< |
primary, |
727 |
< |
(multipleHits? i: -1)); |
388 |
< |
} |
713 |
> |
for (i = 0; i < hits.hits.size(); ++i) |
714 |
> |
{ |
715 |
> |
size_t index = index_vector[i]; |
716 |
> |
bool primary = (index == primaryHit); |
717 |
> |
const BowtieHit& bh = hits.hits[index]; |
718 |
> |
rewrite_sam_record(bam_writer, rt, |
719 |
> |
bh, |
720 |
> |
bh.hitfile_rec().c_str(), |
721 |
> |
alt_name.c_str(), |
722 |
> |
frag_type, |
723 |
> |
hits.hits.size(), |
724 |
> |
(i < hits.hits.size()-1) ? &(hits.hits[index_vector[i+1]]) : NULL, |
725 |
> |
primary, |
726 |
> |
(multipleHits? i: -1)); |
727 |
> |
} |
728 |
|
} |
729 |
|
|
730 |
|
void print_sam_for_pair(const RefSequenceTable& rt, |
731 |
< |
const HitsForRead& left_hits, |
393 |
< |
const HitsForRead& right_hits, |
731 |
> |
const vector<pair<BowtieHit, BowtieHit> >& best_hits, |
732 |
|
const InsertAlignmentGrade& grade, |
395 |
– |
FILE* left_reads_file, |
396 |
– |
FILE* right_reads_file, |
733 |
|
GBamWriter& bam_writer, |
734 |
< |
FILE* left_um_out, |
735 |
< |
FILE* right_um_out |
736 |
< |
) |
734 |
> |
const string& left_alt_name, |
735 |
> |
const string& right_alt_name, |
736 |
> |
boost::random::mt19937& rng, |
737 |
> |
uint64_t begin_id = 0, |
738 |
> |
uint64_t end_id = std::numeric_limits<uint64_t>::max()) |
739 |
|
{ |
402 |
– |
assert (left_hits.insert_id == right_hits.insert_id); |
403 |
– |
|
740 |
|
Read left_read; |
741 |
|
Read right_read; |
742 |
< |
assert (left_hits.hits.size() == right_hits.hits.size() || |
743 |
< |
(left_hits.hits.empty() || right_hits.hits.empty())); |
742 |
> |
if (best_hits.empty()) |
743 |
> |
return; |
744 |
> |
|
745 |
> |
size_t i; |
746 |
> |
HitsForRead right_hits; |
747 |
> |
for (i = 0; i < best_hits.size(); ++i) |
748 |
> |
right_hits.hits.push_back(best_hits[i].second); |
749 |
|
|
750 |
|
size_t primaryHit = 0; |
751 |
|
vector<uint32_t> index_vector; |
752 |
< |
if(right_hits.hits.size() > 0) |
753 |
< |
{ |
754 |
< |
lex_hit_sort s(rt, right_hits); |
414 |
< |
for (size_t i = 0; i < right_hits.hits.size(); ++i) |
415 |
< |
index_vector.push_back(i); |
416 |
< |
|
417 |
< |
sort(index_vector.begin(), index_vector.end(), s); |
418 |
< |
primaryHit = random() % right_hits.hits.size(); |
419 |
< |
} |
420 |
< |
else if (left_hits.hits.size() > 0) |
421 |
< |
{ |
422 |
< |
lex_hit_sort s(rt, left_hits); |
423 |
< |
for (size_t i = 0; i < left_hits.hits.size(); ++i) |
424 |
< |
index_vector.push_back(i); |
425 |
< |
|
426 |
< |
sort(index_vector.begin(), index_vector.end(), s); |
427 |
< |
primaryHit = random() % left_hits.hits.size(); |
428 |
< |
} |
752 |
> |
lex_hit_sort s(rt, right_hits); |
753 |
> |
for (i = 0; i < right_hits.hits.size(); ++i) |
754 |
> |
index_vector.push_back(i); |
755 |
|
|
756 |
< |
bool got_left_read = get_read_from_stream(left_hits.insert_id, |
431 |
< |
left_reads_file, |
432 |
< |
reads_format, |
433 |
< |
false, |
434 |
< |
left_read, left_um_out); |
435 |
< |
|
436 |
< |
bool got_right_read = get_read_from_stream(right_hits.insert_id, |
437 |
< |
right_reads_file, |
438 |
< |
reads_format, |
439 |
< |
false, |
440 |
< |
right_read, right_um_out); |
441 |
< |
|
442 |
< |
if (left_hits.hits.size() == right_hits.hits.size()) |
443 |
< |
{ |
444 |
< |
assert (got_left_read && got_right_read); |
445 |
< |
bool multipleHits = (left_hits.hits.size() > 1); |
446 |
< |
for (size_t i = 0; i < right_hits.hits.size(); ++i) |
447 |
< |
{ |
448 |
< |
bool primary = (i == primaryHit); |
449 |
< |
size_t index = index_vector[i]; |
450 |
< |
const BowtieHit& right_bh = right_hits.hits[index]; |
451 |
< |
const BowtieHit& left_bh = left_hits.hits[index]; |
452 |
< |
|
453 |
< |
rewrite_sam_record(bam_writer, rt, |
454 |
< |
right_bh, |
455 |
< |
right_bh.hitfile_rec().c_str(), |
456 |
< |
right_read.alt_name.c_str(), |
457 |
< |
grade, |
458 |
< |
FRAG_RIGHT, |
459 |
< |
&left_bh, |
460 |
< |
right_hits.hits.size(), |
461 |
< |
(i < right_hits.hits.size() - 1) ? &(right_hits.hits[index_vector[i+1]]) : NULL, |
462 |
< |
primary, |
463 |
< |
(multipleHits? i: -1)); |
464 |
< |
rewrite_sam_record(bam_writer, rt, |
465 |
< |
left_bh, |
466 |
< |
left_bh.hitfile_rec().c_str(), |
467 |
< |
left_read.alt_name.c_str(), |
468 |
< |
grade, |
469 |
< |
FRAG_LEFT, |
470 |
< |
&right_bh, |
471 |
< |
left_hits.hits.size(), |
472 |
< |
(i < left_hits.hits.size() - 1) ? &(left_hits.hits[index_vector[i+1]]) : NULL, |
473 |
< |
primary, |
474 |
< |
(multipleHits? i: -1)); |
475 |
< |
} |
476 |
< |
} |
477 |
< |
else if (left_hits.hits.empty()) |
478 |
< |
{ //only right read was mapped properly |
479 |
< |
if (right_um_out) { |
480 |
< |
//write it in the mapped file with the #MAPPED# flag |
481 |
< |
fprintf(right_um_out, "@%s #MAPPED#\n%s\n+\n%s\n", right_read.alt_name.c_str(), |
482 |
< |
right_read.seq.c_str(), right_read.qual.c_str()); |
483 |
< |
} |
484 |
< |
for (size_t i = 0; i < right_hits.hits.size(); ++i) |
485 |
< |
{ |
486 |
< |
bool primary = (i == primaryHit); |
487 |
< |
size_t index = index_vector[i]; |
488 |
< |
const BowtieHit& bh = right_hits.hits[index]; |
489 |
< |
|
490 |
< |
rewrite_sam_record(bam_writer, rt, |
491 |
< |
bh, |
492 |
< |
bh.hitfile_rec().c_str(), |
493 |
< |
right_read.alt_name.c_str(), |
494 |
< |
grade, |
495 |
< |
FRAG_RIGHT, |
496 |
< |
NULL, |
497 |
< |
right_hits.hits.size(), |
498 |
< |
(i < right_hits.hits.size() - 1) ? &(right_hits.hits[index_vector[i+1]]) : NULL, |
499 |
< |
primary, |
500 |
< |
-1); |
501 |
< |
|
502 |
< |
} |
503 |
< |
} |
504 |
< |
else if (right_hits.hits.empty()) |
505 |
< |
{ //only left read was mapped properly |
506 |
< |
if (left_um_out) { |
507 |
< |
//write it in the mapped file with the #MAPPED# flag |
508 |
< |
fprintf(left_um_out, "@%s #MAPPED#\n%s\n+\n%s\n", left_read.alt_name.c_str(), left_read.seq.c_str(), |
509 |
< |
left_read.qual.c_str()); |
510 |
< |
} |
756 |
> |
sort(index_vector.begin(), index_vector.end(), s); |
757 |
|
|
758 |
< |
for (size_t i = 0; i < left_hits.hits.size(); ++i) |
759 |
< |
{ |
760 |
< |
bool primary = (i == primaryHit); |
761 |
< |
size_t index = index_vector[i]; |
762 |
< |
const BowtieHit& bh = left_hits.hits[index]; |
763 |
< |
rewrite_sam_record(bam_writer, rt, |
764 |
< |
bh, |
765 |
< |
bh.hitfile_rec().c_str(), |
766 |
< |
left_read.alt_name.c_str(), |
767 |
< |
grade, |
768 |
< |
FRAG_LEFT, |
769 |
< |
NULL, |
770 |
< |
left_hits.hits.size(), |
771 |
< |
(i < left_hits.hits.size() - 1) ? &(left_hits.hits[index_vector[i+1]]) : NULL, |
772 |
< |
primary, |
773 |
< |
-1); |
774 |
< |
|
775 |
< |
} |
776 |
< |
} |
777 |
< |
else |
778 |
< |
{ |
779 |
< |
assert (false); |
780 |
< |
} |
758 |
> |
if (!report_secondary_alignments) |
759 |
> |
primaryHit = rng() % right_hits.hits.size(); |
760 |
> |
|
761 |
> |
bool multipleHits = (best_hits.size() > 1); |
762 |
> |
for (i = 0; i < best_hits.size(); ++i) |
763 |
> |
{ |
764 |
> |
size_t index = index_vector[i]; |
765 |
> |
bool primary = (index == primaryHit); |
766 |
> |
const BowtieHit& right_bh = best_hits[index].second; |
767 |
> |
const BowtieHit& left_bh = best_hits[index].first; |
768 |
> |
|
769 |
> |
rewrite_sam_record(bam_writer, rt, |
770 |
> |
right_bh, |
771 |
> |
right_bh.hitfile_rec().c_str(), |
772 |
> |
right_alt_name.c_str(), |
773 |
> |
grade, |
774 |
> |
FRAG_RIGHT, |
775 |
> |
&left_bh, |
776 |
> |
best_hits.size(), |
777 |
> |
(i < best_hits.size() - 1) ? &(best_hits[index_vector[i+1]].second) : NULL, |
778 |
> |
primary, |
779 |
> |
(multipleHits? i: -1)); |
780 |
> |
rewrite_sam_record(bam_writer, rt, |
781 |
> |
left_bh, |
782 |
> |
left_bh.hitfile_rec().c_str(), |
783 |
> |
left_alt_name.c_str(), |
784 |
> |
grade, |
785 |
> |
FRAG_LEFT, |
786 |
> |
&right_bh, |
787 |
> |
best_hits.size(), |
788 |
> |
(i < best_hits.size() - 1) ? &(best_hits[index_vector[i+1]].first) : NULL, |
789 |
> |
primary, |
790 |
> |
(multipleHits? i: -1)); |
791 |
> |
} |
792 |
|
} |
793 |
|
|
794 |
|
/** |
801 |
|
InsertionSet& insertions, |
802 |
|
DeletionSet& deletions) |
803 |
|
{ |
804 |
< |
for (size_t i = 0; i < hits.hits.size(); ++i) |
804 |
> |
for (size_t i = 0; i < hits.hits.size(); ++i) |
805 |
> |
{ |
806 |
> |
const BowtieHit& bh = hits.hits[i]; |
807 |
> |
insertions_from_alignment(bh, insertions); |
808 |
> |
deletions_from_alignment(bh, deletions); |
809 |
> |
} |
810 |
> |
} |
811 |
> |
|
812 |
> |
void update_coverage(const HitsForRead& hits, |
813 |
> |
Coverage& coverage) |
814 |
> |
{ |
815 |
> |
for (size_t i = 0; i < hits.hits.size(); ++i) |
816 |
> |
{ |
817 |
> |
const BowtieHit& hit = hits.hits[i]; |
818 |
> |
const vector<CigarOp>& cigar = hit.cigar(); |
819 |
> |
unsigned int positionInGenome = hit.left(); |
820 |
> |
RefID ref_id = hit.ref_id(); |
821 |
> |
|
822 |
> |
for(size_t c = 0; c < cigar.size(); ++c) |
823 |
|
{ |
824 |
< |
const BowtieHit& bh = hits.hits[i]; |
825 |
< |
insertions_from_alignment(bh, insertions); |
826 |
< |
deletions_from_alignment(bh, deletions); |
824 |
> |
int opcode = cigar[c].opcode; |
825 |
> |
int length = cigar[c].length; |
826 |
> |
switch(opcode) |
827 |
> |
{ |
828 |
> |
case REF_SKIP: |
829 |
> |
case MATCH: |
830 |
> |
case DEL: |
831 |
> |
if (opcode == MATCH) |
832 |
> |
coverage.add_coverage(ref_id, positionInGenome, length); |
833 |
> |
|
834 |
> |
positionInGenome += length; |
835 |
> |
break; |
836 |
> |
case rEF_SKIP: |
837 |
> |
case mATCH: |
838 |
> |
case dEL: |
839 |
> |
positionInGenome -= length; |
840 |
> |
|
841 |
> |
if (opcode == mATCH) |
842 |
> |
coverage.add_coverage(ref_id, positionInGenome + 1, length); |
843 |
> |
break; |
844 |
> |
case FUSION_FF: |
845 |
> |
case FUSION_FR: |
846 |
> |
case FUSION_RF: |
847 |
> |
case FUSION_RR: |
848 |
> |
positionInGenome = length; |
849 |
> |
ref_id = hit.ref_id2(); |
850 |
> |
break; |
851 |
> |
default: |
852 |
> |
break; |
853 |
> |
} |
854 |
|
} |
855 |
+ |
} |
856 |
|
} |
857 |
|
|
858 |
|
|
859 |
+ |
void update_fusions(const HitsForRead& hits, |
860 |
+ |
RefSequenceTable& rt, |
861 |
+ |
FusionSet& fusions, |
862 |
+ |
const FusionSet& fusions_ref = empty_fusions) |
863 |
+ |
{ |
864 |
+ |
if (!fusion_search) |
865 |
+ |
return; |
866 |
+ |
|
867 |
+ |
if (hits.hits.size() > fusion_multireads) |
868 |
+ |
return; |
869 |
+ |
|
870 |
+ |
bool update_stat = fusions_ref.size() > 0; |
871 |
+ |
for (size_t i = 0; i < hits.hits.size(); ++i) |
872 |
+ |
{ |
873 |
+ |
const BowtieHit& bh = hits.hits[i]; |
874 |
+ |
|
875 |
+ |
if (bh.edit_dist() > fusion_read_mismatches) |
876 |
+ |
continue; |
877 |
+ |
|
878 |
+ |
fusions_from_alignment(bh, fusions, rt, update_stat); |
879 |
+ |
|
880 |
+ |
if (update_stat) |
881 |
+ |
unsupport_fusions(bh, fusions, fusions_ref); |
882 |
+ |
} |
883 |
+ |
} |
884 |
|
|
885 |
|
void update_junctions(const HitsForRead& hits, |
886 |
< |
JunctionSet& junctions) |
886 |
> |
JunctionSet& junctions) |
887 |
|
{ |
888 |
< |
for (size_t i = 0; i < hits.hits.size(); ++i) |
889 |
< |
{ |
890 |
< |
const BowtieHit& bh = hits.hits[i]; |
891 |
< |
junctions_from_alignment(bh, junctions); |
892 |
< |
} |
888 |
> |
for (size_t i = 0; i < hits.hits.size(); ++i) |
889 |
> |
{ |
890 |
> |
const BowtieHit& bh = hits.hits[i]; |
891 |
> |
junctions_from_alignment(bh, junctions); |
892 |
> |
} |
893 |
|
} |
894 |
|
|
895 |
|
// Extracts junctions from all the SAM hits (based on REF_SKIPs) in the hit file |
896 |
|
// resets the stream when finished. |
897 |
< |
void exclude_hits_on_filtered_junctions(const JunctionSet& junctions, |
570 |
< |
HitsForRead& hits) |
897 |
> |
void exclude_hits_on_filtered_junctions(const JunctionSet& junctions, HitsForRead& hits) |
898 |
|
{ |
899 |
< |
HitsForRead remaining; |
900 |
< |
remaining.insert_id = hits.insert_id; |
901 |
< |
|
902 |
< |
for (size_t i = 0; i < hits.hits.size(); ++i) |
899 |
> |
HitsForRead remaining; |
900 |
> |
remaining.insert_id = hits.insert_id; |
901 |
> |
|
902 |
> |
for (size_t i = 0; i < hits.hits.size(); ++i) |
903 |
> |
{ |
904 |
> |
BowtieHit& bh = hits.hits[i]; |
905 |
> |
if (bh.edit_dist() > max_read_mismatches) |
906 |
> |
continue; |
907 |
> |
|
908 |
> |
bool filter_hit = false; |
909 |
> |
if (!bh.contiguous()) |
910 |
|
{ |
911 |
< |
BowtieHit& bh = hits.hits[i]; |
912 |
< |
bool filter_hit = false; |
913 |
< |
if (!bh.contiguous()) |
914 |
< |
{ |
915 |
< |
JunctionSet bh_juncs; |
916 |
< |
junctions_from_alignment(bh, bh_juncs); |
917 |
< |
for (JunctionSet::iterator itr = bh_juncs.begin(); |
918 |
< |
itr != bh_juncs.end(); |
919 |
< |
itr++) |
586 |
< |
{ |
587 |
< |
const Junction& j = itr->first; |
588 |
< |
JunctionSet::const_iterator target = junctions.find(j); |
589 |
< |
if (target == junctions.end() || !target->second.accepted) |
590 |
< |
{ |
591 |
< |
filter_hit = true; |
592 |
< |
break; |
593 |
< |
} |
594 |
< |
} |
595 |
< |
} |
596 |
< |
if (!filter_hit) |
911 |
> |
JunctionSet bh_juncs; |
912 |
> |
junctions_from_alignment(bh, bh_juncs); |
913 |
> |
for (JunctionSet::iterator itr = bh_juncs.begin(); |
914 |
> |
itr != bh_juncs.end(); |
915 |
> |
itr++) |
916 |
> |
{ |
917 |
> |
const Junction& j = itr->first; |
918 |
> |
JunctionSet::const_iterator target = junctions.find(j); |
919 |
> |
if (target == junctions.end() || !target->second.accepted) |
920 |
|
{ |
921 |
< |
remaining.hits.push_back(bh); |
921 |
> |
filter_hit = true; |
922 |
> |
break; |
923 |
|
} |
924 |
< |
else |
924 |
> |
} |
925 |
> |
} |
926 |
> |
if (!filter_hit) |
927 |
> |
remaining.hits.push_back(bh); |
928 |
> |
} |
929 |
> |
hits = remaining; |
930 |
> |
} |
931 |
> |
|
932 |
> |
void realign_reads(HitsForRead& hits, |
933 |
> |
const RefSequenceTable& rt, |
934 |
> |
const JunctionSet& junctions, |
935 |
> |
const JunctionSet& rev_junctions, |
936 |
> |
const InsertionSet& insertions, |
937 |
> |
const DeletionSet& deletions, |
938 |
> |
const DeletionSet& rev_deletions, |
939 |
> |
const FusionSet& fusions) |
940 |
> |
{ |
941 |
> |
if (color) |
942 |
> |
return; |
943 |
> |
|
944 |
> |
vector<BowtieHit> additional_hits; |
945 |
> |
for (size_t i = 0; i < hits.hits.size(); ++i) |
946 |
> |
{ |
947 |
> |
BowtieHit& bh = hits.hits[i]; |
948 |
> |
if (fusion_search && bh.fusion_opcode() != FUSION_NOTHING) |
949 |
> |
return; |
950 |
> |
|
951 |
> |
const vector<CigarOp>& cigars = bh.cigar(); |
952 |
> |
int pos = bh.left(); |
953 |
> |
int refid = bh.ref_id(); |
954 |
> |
for (size_t j = 0; j < cigars.size(); ++j) |
955 |
> |
{ |
956 |
> |
const CigarOp& op = cigars[j]; |
957 |
> |
if (j == 0 || j == cigars.size() - 1) |
958 |
> |
{ |
959 |
> |
// let's do this for MATCH case only, |
960 |
> |
if (op.opcode == MATCH || op.opcode == mATCH) |
961 |
|
{ |
962 |
< |
//fprintf(stderr, "Filtering hit\n"); |
962 |
> |
int left1, left2; |
963 |
> |
if (op.opcode == MATCH) |
964 |
> |
{ |
965 |
> |
left1 = pos; |
966 |
> |
left2 = pos + op.length - 1; |
967 |
> |
} |
968 |
> |
else |
969 |
> |
{ |
970 |
> |
left1 = pos - op.length + 1; |
971 |
> |
left2 = pos; |
972 |
> |
} |
973 |
> |
|
974 |
> |
{ |
975 |
> |
static const size_t max_temp_juncs = 3; |
976 |
> |
|
977 |
> |
JunctionSet::const_iterator lb, ub; |
978 |
> |
JunctionSet temp_junctions; |
979 |
> |
if (j == 0) |
980 |
> |
{ |
981 |
> |
lb = rev_junctions.upper_bound(Junction(refid, left1, 0, true)); |
982 |
> |
ub = rev_junctions.lower_bound(Junction(refid, left2, left2, false)); |
983 |
> |
while (lb != ub && lb != rev_junctions.end()) |
984 |
> |
{ |
985 |
> |
Junction temp_junction = lb->first; |
986 |
> |
temp_junction.left = lb->first.right; |
987 |
> |
temp_junction.right = lb->first.left; |
988 |
> |
temp_junctions[temp_junction] = lb->second; |
989 |
> |
++lb; |
990 |
> |
|
991 |
> |
if (temp_junctions.size() > max_temp_juncs) |
992 |
> |
break; |
993 |
> |
} |
994 |
> |
} |
995 |
> |
|
996 |
> |
if (j == cigars.size() - 1) |
997 |
> |
{ |
998 |
> |
int common_right = left2 + max_report_intron_length; |
999 |
> |
lb = junctions.upper_bound(Junction(refid, left1, common_right, true)); |
1000 |
> |
ub = junctions.lower_bound(Junction(refid, left2, common_right, false)); |
1001 |
> |
while (lb != ub && lb != junctions.end()) |
1002 |
> |
{ |
1003 |
> |
temp_junctions[lb->first] = lb->second; |
1004 |
> |
++lb; |
1005 |
> |
|
1006 |
> |
if (temp_junctions.size() > max_temp_juncs) |
1007 |
> |
break; |
1008 |
> |
} |
1009 |
> |
} |
1010 |
> |
|
1011 |
> |
if (temp_junctions.size() > max_temp_juncs) |
1012 |
> |
continue; |
1013 |
> |
|
1014 |
> |
JunctionSet::const_iterator junc_iter = temp_junctions.begin(); |
1015 |
> |
for (; junc_iter != temp_junctions.end(); ++junc_iter) |
1016 |
> |
{ |
1017 |
> |
Junction junc = junc_iter->first; |
1018 |
> |
|
1019 |
> |
/* |
1020 |
> |
fprintf(stderr, "%d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n", |
1021 |
> |
bh.insert_id(), bh.left(), bh.right(), |
1022 |
> |
print_cigar(bh.cigar()).c_str(), |
1023 |
> |
bh.alignment_score(), bh.edit_dist(), |
1024 |
> |
junc.left, junc.right); |
1025 |
> |
//*/ |
1026 |
> |
|
1027 |
> |
int new_left = bh.left(); |
1028 |
> |
int intron_length = junc.right - junc.left - 1; |
1029 |
> |
vector<CigarOp> new_cigars; |
1030 |
> |
bool anchored = false; |
1031 |
> |
if (j == 0 && bh.left() > (int)junc.left) |
1032 |
> |
{ |
1033 |
> |
new_left -= intron_length; |
1034 |
> |
int before_match_length = junc.left - new_left + 1;; |
1035 |
> |
int after_match_length = op.length - before_match_length; |
1036 |
> |
|
1037 |
> |
if (before_match_length > 0 && after_match_length > 0) |
1038 |
> |
{ |
1039 |
> |
anchored = true; |
1040 |
> |
new_cigars.push_back(CigarOp(MATCH, before_match_length)); |
1041 |
> |
new_cigars.push_back(CigarOp(REF_SKIP, intron_length)); |
1042 |
> |
new_cigars.push_back(CigarOp(MATCH, after_match_length)); |
1043 |
> |
|
1044 |
> |
new_cigars.insert(new_cigars.end(), cigars.begin() + 1, cigars.end()); |
1045 |
> |
} |
1046 |
> |
} |
1047 |
> |
else if (j == cigars.size() - 1 && pos < (int)junc.left) |
1048 |
> |
{ |
1049 |
> |
new_cigars.insert(new_cigars.end(), cigars.begin(), cigars.end() - 1); |
1050 |
> |
|
1051 |
> |
int before_match_length = junc.left - pos + 1; |
1052 |
> |
int after_match_length = op.length - before_match_length; |
1053 |
> |
|
1054 |
> |
if (before_match_length > 0 && after_match_length > 0) |
1055 |
> |
{ |
1056 |
> |
anchored = true; |
1057 |
> |
new_cigars.push_back(CigarOp(MATCH, before_match_length)); |
1058 |
> |
new_cigars.push_back(CigarOp(REF_SKIP, intron_length)); |
1059 |
> |
new_cigars.push_back(CigarOp(MATCH, after_match_length)); |
1060 |
> |
} |
1061 |
> |
} |
1062 |
> |
|
1063 |
> |
if (!anchored) |
1064 |
> |
continue; |
1065 |
> |
|
1066 |
> |
BowtieHit new_bh(bh.ref_id(), |
1067 |
> |
bh.ref_id2(), |
1068 |
> |
bh.insert_id(), |
1069 |
> |
new_left, |
1070 |
> |
new_cigars, |
1071 |
> |
bh.antisense_align(), |
1072 |
> |
junc.antisense, |
1073 |
> |
0, /* edit_dist - needs to be recalculated */ |
1074 |
> |
0, /* splice_mms - needs to be recalculated */ |
1075 |
> |
false); |
1076 |
> |
|
1077 |
> |
new_bh.seq(bh.seq()); |
1078 |
> |
new_bh.qual(bh.qual()); |
1079 |
> |
|
1080 |
> |
const RefSequenceTable::Sequence* ref_str = rt.get_seq(bh.ref_id()); |
1081 |
> |
|
1082 |
> |
if (new_left >= 0 && new_bh.right() <= (int)length(*ref_str)) |
1083 |
> |
{ |
1084 |
> |
vector<string> aux_fields; |
1085 |
> |
bowtie_sam_extra(new_bh, rt, aux_fields); |
1086 |
> |
|
1087 |
> |
vector<string>::const_iterator aux_iter = aux_fields.begin(); |
1088 |
> |
for (; aux_iter != aux_fields.end(); ++aux_iter) |
1089 |
> |
{ |
1090 |
> |
const string& aux_field = *aux_iter; |
1091 |
> |
if (strncmp(aux_field.c_str(), "AS", 2) == 0) |
1092 |
> |
{ |
1093 |
> |
int alignment_score = atoi(aux_field.c_str() + 5); |
1094 |
> |
new_bh.alignment_score(alignment_score); |
1095 |
> |
} |
1096 |
> |
else if (strncmp(aux_field.c_str(), "XM", 2) == 0) |
1097 |
> |
{ |
1098 |
> |
int XM_value = atoi(aux_field.c_str() + 5); |
1099 |
> |
new_bh.edit_dist(XM_value); |
1100 |
> |
} |
1101 |
> |
} |
1102 |
> |
|
1103 |
> |
vector<string> sam_toks; |
1104 |
> |
tokenize(bh.hitfile_rec().c_str(), "\t", sam_toks); |
1105 |
> |
|
1106 |
> |
char coord[20] = {0,}; |
1107 |
> |
sprintf(coord, "%d", new_bh.left() + 1); |
1108 |
> |
sam_toks[3] = coord; |
1109 |
> |
sam_toks[5] = print_cigar(new_bh.cigar()); |
1110 |
> |
for (size_t a = 11; a < sam_toks.size(); ++a) |
1111 |
> |
{ |
1112 |
> |
string& sam_tok = sam_toks[a]; |
1113 |
> |
for (size_t b = 0; b < aux_fields.size(); ++b) |
1114 |
> |
{ |
1115 |
> |
const string& aux_tok = aux_fields[b]; |
1116 |
> |
if (strncmp(sam_tok.c_str(), aux_tok.c_str(), 5) == 0) |
1117 |
> |
{ |
1118 |
> |
sam_tok = aux_tok; |
1119 |
> |
break; |
1120 |
> |
} |
1121 |
> |
} |
1122 |
> |
} |
1123 |
> |
|
1124 |
> |
if (!bh.is_spliced()) |
1125 |
> |
{ |
1126 |
> |
if (junc.antisense) |
1127 |
> |
sam_toks.push_back("XS:A:-"); |
1128 |
> |
else |
1129 |
> |
sam_toks.push_back("XS:A:+"); |
1130 |
> |
} |
1131 |
> |
|
1132 |
> |
|
1133 |
> |
string new_rec = ""; |
1134 |
> |
for (size_t d = 0; d < sam_toks.size(); ++d) |
1135 |
> |
{ |
1136 |
> |
new_rec += sam_toks[d]; |
1137 |
> |
if (d < sam_toks.size() - 1) |
1138 |
> |
new_rec += "\t"; |
1139 |
> |
} |
1140 |
> |
|
1141 |
> |
new_bh.hitfile_rec(new_rec); |
1142 |
> |
|
1143 |
> |
if (new_bh.edit_dist() <= bh.edit_dist()) |
1144 |
> |
additional_hits.push_back(new_bh); |
1145 |
> |
|
1146 |
> |
/* |
1147 |
> |
fprintf(stderr, "\t%d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n", |
1148 |
> |
new_bh.insert_id(), new_bh.left(), new_bh.right(), |
1149 |
> |
print_cigar(new_bh.cigar()).c_str(), |
1150 |
> |
new_bh.alignment_score(), new_bh.edit_dist(), |
1151 |
> |
junc.left, junc.right); |
1152 |
> |
//*/ |
1153 |
> |
} |
1154 |
> |
} |
1155 |
> |
} |
1156 |
> |
|
1157 |
> |
#if 0 |
1158 |
> |
{ |
1159 |
> |
DeletionSet::const_iterator lb, ub; |
1160 |
> |
bool use_rev_deletions = (j == 0); |
1161 |
> |
const DeletionSet& curr_deletions = (use_rev_deletions ? rev_deletions : deletions); |
1162 |
> |
if (use_rev_deletions) |
1163 |
> |
{ |
1164 |
> |
lb = curr_deletions.upper_bound(Deletion(refid, left1, 0, true)); |
1165 |
> |
ub = curr_deletions.lower_bound(Deletion(refid, left2, left2, false)); |
1166 |
> |
} |
1167 |
> |
else |
1168 |
> |
{ |
1169 |
> |
int common_right = left2 + 100; |
1170 |
> |
lb = curr_deletions.upper_bound(Deletion(refid, left1, common_right, true)); |
1171 |
> |
ub = curr_deletions.lower_bound(Deletion(refid, left2, common_right, false)); |
1172 |
> |
} |
1173 |
> |
|
1174 |
> |
while (lb != curr_deletions.end() && lb != ub) |
1175 |
> |
{ |
1176 |
> |
Deletion del = lb->first; |
1177 |
> |
if (use_rev_deletions) |
1178 |
> |
{ |
1179 |
> |
int temp = del.left; |
1180 |
> |
del.left = del.right; |
1181 |
> |
del.right = temp; |
1182 |
> |
} |
1183 |
> |
|
1184 |
> |
// daehwan - for debuggin purposes |
1185 |
> |
/* |
1186 |
> |
fprintf(stderr, "(type%d) %d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n", |
1187 |
> |
!use_rev_junctions, |
1188 |
> |
bh.insert_id(), bh.left(), bh.right(), |
1189 |
> |
print_cigar(bh.cigar()).c_str(), |
1190 |
> |
bh.alignment_score(), bh.edit_dist(), |
1191 |
> |
junc.left, junc.right); |
1192 |
> |
*/ |
1193 |
> |
|
1194 |
> |
int del_length = del.right - del.left - 1; |
1195 |
> |
int new_left = bh.left(); |
1196 |
> |
if (j == 0) |
1197 |
> |
new_left -= del_length; |
1198 |
> |
|
1199 |
> |
vector<CigarOp> new_cigars; |
1200 |
> |
if (j == 0) |
1201 |
> |
{ |
1202 |
> |
int before_match_length = del.left - new_left + 1;; |
1203 |
> |
int after_match_length = op.length - before_match_length; |
1204 |
> |
|
1205 |
> |
if (before_match_length > 0) |
1206 |
> |
new_cigars.push_back(CigarOp(MATCH, before_match_length)); |
1207 |
> |
new_cigars.push_back(CigarOp(DEL, del_length)); |
1208 |
> |
if (after_match_length > 0) |
1209 |
> |
new_cigars.push_back(CigarOp(MATCH, after_match_length)); |
1210 |
> |
|
1211 |
> |
new_cigars.insert(new_cigars.end(), cigars.begin() + 1, cigars.end()); |
1212 |
> |
} |
1213 |
> |
else |
1214 |
> |
{ |
1215 |
> |
new_cigars.insert(new_cigars.end(), cigars.begin(), cigars.end() - 1); |
1216 |
> |
|
1217 |
> |
int before_match_length = del.left - pos + 1; |
1218 |
> |
int after_match_length = op.length - before_match_length; |
1219 |
> |
|
1220 |
> |
if (before_match_length > 0) |
1221 |
> |
new_cigars.push_back(CigarOp(MATCH, before_match_length)); |
1222 |
> |
new_cigars.push_back(CigarOp(DEL, del_length)); |
1223 |
> |
if (after_match_length > 0) |
1224 |
> |
new_cigars.push_back(CigarOp(MATCH, after_match_length)); |
1225 |
> |
} |
1226 |
> |
|
1227 |
> |
BowtieHit new_bh(bh.ref_id(), |
1228 |
> |
bh.ref_id2(), |
1229 |
> |
bh.insert_id(), |
1230 |
> |
new_left, |
1231 |
> |
new_cigars, |
1232 |
> |
bh.antisense_align(), |
1233 |
> |
bh.antisense_splice(), |
1234 |
> |
0, /* edit_dist - needs to be recalculated */ |
1235 |
> |
0, /* splice_mms - needs to be recalculated */ |
1236 |
> |
false); |
1237 |
> |
|
1238 |
> |
new_bh.seq(bh.seq()); |
1239 |
> |
new_bh.qual(bh.qual()); |
1240 |
> |
|
1241 |
> |
vector<string> aux_fields; |
1242 |
> |
bowtie_sam_extra(new_bh, rt, aux_fields); |
1243 |
> |
|
1244 |
> |
vector<string>::const_iterator aux_iter = aux_fields.begin(); |
1245 |
> |
for (; aux_iter != aux_fields.end(); ++aux_iter) |
1246 |
> |
{ |
1247 |
> |
const string& aux_field = *aux_iter; |
1248 |
> |
if (strncmp(aux_field.c_str(), "AS", 2) == 0) |
1249 |
> |
{ |
1250 |
> |
int alignment_score = atoi(aux_field.c_str() + 5); |
1251 |
> |
new_bh.alignment_score(alignment_score); |
1252 |
> |
} |
1253 |
> |
else if (strncmp(aux_field.c_str(), "XM", 2) == 0) |
1254 |
> |
{ |
1255 |
> |
int XM_value = atoi(aux_field.c_str() + 5); |
1256 |
> |
new_bh.edit_dist(XM_value); |
1257 |
> |
} |
1258 |
> |
} |
1259 |
> |
|
1260 |
> |
vector<string> sam_toks; |
1261 |
> |
tokenize(bh.hitfile_rec().c_str(), "\t", sam_toks); |
1262 |
> |
|
1263 |
> |
char coord[20] = {0,}; |
1264 |
> |
sprintf(coord, "%d", new_bh.left() + 1); |
1265 |
> |
sam_toks[3] = coord; |
1266 |
> |
sam_toks[5] = print_cigar(new_bh.cigar()); |
1267 |
> |
for (size_t a = 11; a < sam_toks.size(); ++a) |
1268 |
> |
{ |
1269 |
> |
string& sam_tok = sam_toks[a]; |
1270 |
> |
for (size_t b = 0; b < aux_fields.size(); ++b) |
1271 |
> |
{ |
1272 |
> |
const string& aux_tok = aux_fields[b]; |
1273 |
> |
if (strncmp(sam_tok.c_str(), aux_tok.c_str(), 5) == 0) |
1274 |
> |
{ |
1275 |
> |
sam_tok = aux_tok; |
1276 |
> |
break; |
1277 |
> |
} |
1278 |
> |
} |
1279 |
> |
} |
1280 |
> |
|
1281 |
> |
string new_rec = ""; |
1282 |
> |
for (size_t d = 0; d < sam_toks.size(); ++d) |
1283 |
> |
{ |
1284 |
> |
new_rec += sam_toks[d]; |
1285 |
> |
if (d < sam_toks.size() - 1) |
1286 |
> |
new_rec += "\t"; |
1287 |
> |
} |
1288 |
> |
|
1289 |
> |
new_bh.hitfile_rec(new_rec); |
1290 |
> |
|
1291 |
> |
if (new_bh.edit_dist() <= bh.edit_dist()) |
1292 |
> |
additional_hits.push_back(new_bh); |
1293 |
> |
|
1294 |
> |
/* |
1295 |
> |
fprintf(stderr, "\t%d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n", |
1296 |
> |
new_bh.insert_id(), new_bh.left(), new_bh.right(), |
1297 |
> |
print_cigar(new_bh.cigar()).c_str(), |
1298 |
> |
new_bh.alignment_score(), new_bh.edit_dist(), |
1299 |
> |
junc.left, junc.right); |
1300 |
> |
*/ |
1301 |
> |
|
1302 |
> |
++lb; |
1303 |
> |
} |
1304 |
> |
} |
1305 |
> |
|
1306 |
> |
{ |
1307 |
> |
InsertionSet::const_iterator lb, ub; |
1308 |
> |
lb = insertions.upper_bound(Insertion(refid, left1, "")); |
1309 |
> |
ub = insertions.lower_bound(Insertion(refid, left2, "")); |
1310 |
> |
|
1311 |
> |
while (lb != insertions.end() && lb != ub) |
1312 |
> |
{ |
1313 |
> |
// daehwan - for debugging purposse |
1314 |
> |
break; |
1315 |
> |
|
1316 |
> |
Insertion ins = lb->first; |
1317 |
> |
|
1318 |
> |
// daehwan - for debugging purposes |
1319 |
> |
/* |
1320 |
> |
fprintf(stderr, "(type%d) %d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n", |
1321 |
> |
!use_rev_junctions, |
1322 |
> |
bh.insert_id(), bh.left(), bh.right(), |
1323 |
> |
print_cigar(bh.cigar()).c_str(), |
1324 |
> |
bh.alignment_score(), bh.edit_dist(), |
1325 |
> |
junc.left, junc.right); |
1326 |
> |
*/ |
1327 |
> |
|
1328 |
> |
int ins_length = ins.sequence.length(); |
1329 |
> |
int new_left = bh.left(); |
1330 |
> |
if (j == 0) |
1331 |
> |
new_left -= ins_length; |
1332 |
> |
|
1333 |
> |
vector<CigarOp> new_cigars; |
1334 |
> |
if (j == 0) |
1335 |
> |
{ |
1336 |
> |
int before_match_length = ins.left - new_left + 1;; |
1337 |
> |
int after_match_length = op.length - before_match_length - ins_length; |
1338 |
> |
|
1339 |
> |
if (before_match_length > 0) |
1340 |
> |
new_cigars.push_back(CigarOp(MATCH, before_match_length)); |
1341 |
> |
new_cigars.push_back(CigarOp(INS, ins_length)); |
1342 |
> |
if (after_match_length > 0) |
1343 |
> |
new_cigars.push_back(CigarOp(MATCH, after_match_length)); |
1344 |
> |
|
1345 |
> |
new_cigars.insert(new_cigars.end(), cigars.begin() + 1, cigars.end()); |
1346 |
> |
} |
1347 |
> |
else |
1348 |
> |
{ |
1349 |
> |
new_cigars.insert(new_cigars.end(), cigars.begin(), cigars.end() - 1); |
1350 |
> |
|
1351 |
> |
int before_match_length = ins.left - pos + 1; |
1352 |
> |
int after_match_length = op.length - before_match_length - ins_length; |
1353 |
> |
|
1354 |
> |
if (before_match_length > 0) |
1355 |
> |
new_cigars.push_back(CigarOp(MATCH, before_match_length)); |
1356 |
> |
new_cigars.push_back(CigarOp(INS, ins_length)); |
1357 |
> |
if (after_match_length > 0) |
1358 |
> |
new_cigars.push_back(CigarOp(MATCH, after_match_length)); |
1359 |
> |
} |
1360 |
> |
|
1361 |
> |
BowtieHit new_bh(bh.ref_id(), |
1362 |
> |
bh.ref_id2(), |
1363 |
> |
bh.insert_id(), |
1364 |
> |
new_left, |
1365 |
> |
new_cigars, |
1366 |
> |
bh.antisense_align(), |
1367 |
> |
bh.antisense_splice(), |
1368 |
> |
0, /* edit_dist - needs to be recalculated */ |
1369 |
> |
0, /* splice_mms - needs to be recalculated */ |
1370 |
> |
false); |
1371 |
> |
|
1372 |
> |
new_bh.seq(bh.seq()); |
1373 |
> |
new_bh.qual(bh.qual()); |
1374 |
> |
|
1375 |
> |
vector<string> aux_fields; |
1376 |
> |
bowtie_sam_extra(new_bh, rt, aux_fields); |
1377 |
> |
|
1378 |
> |
vector<string>::const_iterator aux_iter = aux_fields.begin(); |
1379 |
> |
for (; aux_iter != aux_fields.end(); ++aux_iter) |
1380 |
> |
{ |
1381 |
> |
const string& aux_field = *aux_iter; |
1382 |
> |
if (strncmp(aux_field.c_str(), "AS", 2) == 0) |
1383 |
> |
{ |
1384 |
> |
int alignment_score = atoi(aux_field.c_str() + 5); |
1385 |
> |
new_bh.alignment_score(alignment_score); |
1386 |
> |
} |
1387 |
> |
else if (strncmp(aux_field.c_str(), "XM", 2) == 0) |
1388 |
> |
{ |
1389 |
> |
int XM_value = atoi(aux_field.c_str() + 5); |
1390 |
> |
new_bh.edit_dist(XM_value); |
1391 |
> |
} |
1392 |
> |
} |
1393 |
> |
|
1394 |
> |
/* |
1395 |
> |
fprintf(stderr, "\t%d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n", |
1396 |
> |
new_bh.insert_id(), new_bh.left(), new_bh.right(), |
1397 |
> |
print_cigar(new_bh.cigar()).c_str(), |
1398 |
> |
new_bh.alignment_score(), new_bh.edit_dist(), |
1399 |
> |
junc.left, junc.right); |
1400 |
> |
*/ |
1401 |
> |
|
1402 |
> |
++lb; |
1403 |
> |
} |
1404 |
> |
} |
1405 |
> |
#endif |
1406 |
|
} |
1407 |
+ |
} |
1408 |
+ |
|
1409 |
+ |
switch(op.opcode) |
1410 |
+ |
{ |
1411 |
+ |
case REF_SKIP: |
1412 |
+ |
pos += op.length; |
1413 |
+ |
break; |
1414 |
+ |
case rEF_SKIP: |
1415 |
+ |
pos -= op.length; |
1416 |
+ |
break; |
1417 |
+ |
case MATCH: |
1418 |
+ |
case DEL: |
1419 |
+ |
pos += op.length; |
1420 |
+ |
break; |
1421 |
+ |
case mATCH: |
1422 |
+ |
case dEL: |
1423 |
+ |
pos -= op.length; |
1424 |
+ |
break; |
1425 |
+ |
case FUSION_FF: |
1426 |
+ |
case FUSION_FR: |
1427 |
+ |
case FUSION_RF: |
1428 |
+ |
case FUSION_RR: |
1429 |
+ |
pos = op.length; |
1430 |
+ |
refid = bh.ref_id2(); |
1431 |
+ |
break; |
1432 |
+ |
default: |
1433 |
+ |
break; |
1434 |
+ |
} |
1435 |
|
} |
1436 |
< |
hits = remaining; |
1436 |
> |
} |
1437 |
> |
|
1438 |
> |
hits.hits.insert(hits.hits.end(), additional_hits.begin(), additional_hits.end()); |
1439 |
> |
|
1440 |
> |
std::sort(hits.hits.begin(), hits.hits.end()); |
1441 |
> |
vector<BowtieHit>::iterator new_end = std::unique(hits.hits.begin(), hits.hits.end()); |
1442 |
> |
hits.hits.erase(new_end, hits.hits.end()); |
1443 |
|
} |
1444 |
|
|
1445 |
< |
void get_junctions_from_best_hits(HitStream& left_hs, |
1446 |
< |
HitStream& right_hs, |
1447 |
< |
ReadTable& it, |
1448 |
< |
JunctionSet& junctions, |
1449 |
< |
const JunctionSet& gtf_junctions) |
1450 |
< |
{ |
1451 |
< |
HitsForRead curr_left_hit_group; |
1452 |
< |
HitsForRead curr_right_hit_group; |
1453 |
< |
|
1454 |
< |
left_hs.next_read_hits(curr_left_hit_group); |
1455 |
< |
right_hs.next_read_hits(curr_right_hit_group); |
1456 |
< |
|
1457 |
< |
uint32_t curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1458 |
< |
uint32_t curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1459 |
< |
|
1460 |
< |
// While we still have unreported hits... |
1461 |
< |
while(curr_left_obs_order != VMAXINT32 || |
1462 |
< |
curr_right_obs_order != VMAXINT32) |
1463 |
< |
{ |
1464 |
< |
// Chew up left singletons |
1465 |
< |
while (curr_left_obs_order < curr_right_obs_order && |
1466 |
< |
curr_left_obs_order != VMAXINT32) |
1467 |
< |
{ |
1468 |
< |
HitsForRead best_hits; |
1469 |
< |
best_hits.insert_id = curr_left_obs_order; |
1470 |
< |
FragmentAlignmentGrade grade; |
1471 |
< |
|
1472 |
< |
// Process hits for left singleton, select best alignments |
1473 |
< |
read_best_alignments(curr_left_hit_group, grade, best_hits, gtf_junctions); |
1474 |
< |
update_junctions(best_hits, junctions); |
1445 |
> |
|
1446 |
> |
// events include splice junction, indels, and fusion points. |
1447 |
> |
struct ConsensusEventsWorker |
1448 |
> |
{ |
1449 |
> |
void operator()() |
1450 |
> |
{ |
1451 |
> |
ReadTable it; |
1452 |
> |
vector<BAMHitFactory*> hit_factories; |
1453 |
> |
hit_factories.push_back(new BAMHitFactory(it, *rt)); |
1454 |
> |
HitStream l_hs(left_map_fname, hit_factories.back(), false, true, true, true); |
1455 |
> |
if (left_map_offset > 0) |
1456 |
> |
l_hs.seek(left_map_offset); |
1457 |
> |
|
1458 |
> |
hit_factories.push_back(new BAMHitFactory(it, *rt)); |
1459 |
> |
HitStream r_hs(right_map_fname, hit_factories.back(), false, true, true, true); |
1460 |
> |
if (right_map_offset > 0) |
1461 |
> |
r_hs.seek(right_map_offset); |
1462 |
> |
|
1463 |
> |
HitsForRead curr_left_hit_group; |
1464 |
> |
HitsForRead curr_right_hit_group; |
1465 |
> |
|
1466 |
> |
l_hs.next_read_hits(curr_left_hit_group); |
1467 |
> |
r_hs.next_read_hits(curr_right_hit_group); |
1468 |
> |
|
1469 |
> |
uint32_t curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1470 |
> |
uint32_t curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1471 |
> |
|
1472 |
> |
// While we still have unreported hits... |
1473 |
> |
while((curr_left_obs_order != VMAXINT32 || curr_right_obs_order != VMAXINT32) && |
1474 |
> |
(curr_left_obs_order < end_id || curr_right_obs_order < end_id)) |
1475 |
> |
{ |
1476 |
> |
// Chew up left singletons |
1477 |
> |
while (curr_left_obs_order < curr_right_obs_order && |
1478 |
> |
curr_left_obs_order < end_id && |
1479 |
> |
curr_left_obs_order != VMAXINT32) |
1480 |
> |
{ |
1481 |
> |
HitsForRead best_hits; |
1482 |
> |
best_hits.insert_id = curr_left_obs_order; |
1483 |
|
|
1484 |
< |
// Get next hit group |
1485 |
< |
left_hs.next_read_hits(curr_left_hit_group); |
1486 |
< |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1487 |
< |
} |
1484 |
> |
// Process hits for left singleton, select best alignments |
1485 |
> |
read_best_alignments(curr_left_hit_group, best_hits, *gtf_junctions); |
1486 |
> |
update_coverage(best_hits, *coverage); |
1487 |
> |
update_junctions(best_hits, *junctions); |
1488 |
> |
update_insertions_and_deletions(best_hits, *insertions, *deletions); |
1489 |
> |
update_fusions(best_hits, *rt, *fusions); |
1490 |
> |
|
1491 |
> |
// Get next hit group |
1492 |
> |
l_hs.next_read_hits(curr_left_hit_group); |
1493 |
> |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1494 |
> |
} |
1495 |
|
|
1496 |
< |
// Chew up right singletons |
1497 |
< |
while (curr_left_obs_order > curr_right_obs_order && |
1498 |
< |
curr_right_obs_order != VMAXINT32) |
1499 |
< |
{ |
1500 |
< |
HitsForRead best_hits; |
1501 |
< |
best_hits.insert_id = curr_right_obs_order; |
1502 |
< |
FragmentAlignmentGrade grade; |
1503 |
< |
|
1504 |
< |
// Process hit for right singleton, select best alignments |
1505 |
< |
read_best_alignments(curr_right_hit_group,grade, best_hits, gtf_junctions); |
1506 |
< |
update_junctions(best_hits, junctions); |
1496 |
> |
// Chew up right singletons |
1497 |
> |
while (curr_left_obs_order > curr_right_obs_order && |
1498 |
> |
curr_right_obs_order < end_id && |
1499 |
> |
curr_right_obs_order != VMAXINT32) |
1500 |
> |
{ |
1501 |
> |
HitsForRead best_hits; |
1502 |
> |
best_hits.insert_id = curr_right_obs_order; |
1503 |
> |
if (curr_right_obs_order >= begin_id) |
1504 |
> |
{ |
1505 |
> |
// Process hit for right singleton, select best alignments |
1506 |
> |
read_best_alignments(curr_right_hit_group, best_hits, *gtf_junctions); |
1507 |
> |
update_coverage(best_hits, *coverage); |
1508 |
> |
update_junctions(best_hits, *junctions); |
1509 |
> |
update_insertions_and_deletions(best_hits, *insertions, *deletions); |
1510 |
> |
update_fusions(best_hits, *rt, *fusions); |
1511 |
> |
} |
1512 |
|
|
1513 |
< |
// Get next hit group |
1514 |
< |
right_hs.next_read_hits(curr_right_hit_group); |
1515 |
< |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1516 |
< |
} |
1513 |
> |
// Get next hit group |
1514 |
> |
r_hs.next_read_hits(curr_right_hit_group); |
1515 |
> |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1516 |
> |
} |
1517 |
|
|
1518 |
< |
// Since we have both left hits and right hits for this insert, |
1519 |
< |
// Find the best pairing and print both |
1520 |
< |
while (curr_left_obs_order == curr_right_obs_order && |
1521 |
< |
curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32) |
1522 |
< |
{ |
1523 |
< |
if (curr_left_hit_group.hits.empty()) |
1524 |
< |
{ |
1525 |
< |
HitsForRead right_best_hits; |
1526 |
< |
right_best_hits.insert_id = curr_right_obs_order; |
1527 |
< |
|
1528 |
< |
FragmentAlignmentGrade grade; |
1529 |
< |
read_best_alignments(curr_right_hit_group, grade, right_best_hits, gtf_junctions); |
1530 |
< |
update_junctions(right_best_hits, junctions); |
1531 |
< |
} |
1532 |
< |
else if (curr_right_hit_group.hits.empty()) |
1533 |
< |
{ |
1534 |
< |
HitsForRead left_best_hits; |
1535 |
< |
left_best_hits.insert_id = curr_left_obs_order; |
1536 |
< |
|
1537 |
< |
FragmentAlignmentGrade grade; |
1538 |
< |
// Process hits for left singleton, select best alignments |
1539 |
< |
read_best_alignments(curr_left_hit_group, grade, left_best_hits, gtf_junctions); |
1540 |
< |
update_junctions(left_best_hits, junctions); |
1541 |
< |
} |
1542 |
< |
else |
1543 |
< |
{ |
1544 |
< |
HitsForRead left_best_hits; |
1545 |
< |
HitsForRead right_best_hits; |
1546 |
< |
left_best_hits.insert_id = curr_left_obs_order; |
1547 |
< |
right_best_hits.insert_id = curr_right_obs_order; |
1548 |
< |
|
1549 |
< |
// daehwan - apply gtf_junctions here, too! |
1550 |
< |
|
1551 |
< |
InsertAlignmentGrade grade; |
1552 |
< |
pair_best_alignments(curr_left_hit_group, |
1553 |
< |
curr_right_hit_group, |
1554 |
< |
grade, |
1555 |
< |
left_best_hits, |
1556 |
< |
right_best_hits); |
1557 |
< |
|
1558 |
< |
update_junctions(left_best_hits, junctions); |
1559 |
< |
update_junctions(right_best_hits, junctions); |
1560 |
< |
} |
1561 |
< |
|
705 |
< |
left_hs.next_read_hits(curr_left_hit_group); |
706 |
< |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1518 |
> |
// Since we have both left hits and right hits for this insert, |
1519 |
> |
// Find the best pairing and print both |
1520 |
> |
while (curr_left_obs_order == curr_right_obs_order && |
1521 |
> |
curr_left_obs_order < end_id && |
1522 |
> |
curr_left_obs_order != VMAXINT32) |
1523 |
> |
{ |
1524 |
> |
vector<pair<BowtieHit, BowtieHit> > best_hits; |
1525 |
> |
|
1526 |
> |
InsertAlignmentGrade grade; |
1527 |
> |
pair_best_alignments(curr_left_hit_group, |
1528 |
> |
curr_right_hit_group, |
1529 |
> |
grade, |
1530 |
> |
best_hits, |
1531 |
> |
*gtf_junctions); |
1532 |
> |
|
1533 |
> |
HitsForRead best_left_hit_group; best_left_hit_group.insert_id = curr_left_obs_order; |
1534 |
> |
HitsForRead best_right_hit_group; best_right_hit_group.insert_id = curr_left_obs_order; |
1535 |
> |
|
1536 |
> |
if (best_hits.size() > 0) |
1537 |
> |
{ |
1538 |
> |
for (size_t i = 0; i < best_hits.size(); ++i) |
1539 |
> |
{ |
1540 |
> |
best_left_hit_group.hits.push_back(best_hits[i].first); |
1541 |
> |
best_right_hit_group.hits.push_back(best_hits[i].second); |
1542 |
> |
} |
1543 |
> |
} |
1544 |
> |
else |
1545 |
> |
{ |
1546 |
> |
best_left_hit_group.hits = curr_left_hit_group.hits; |
1547 |
> |
best_right_hit_group.hits = curr_right_hit_group.hits; |
1548 |
> |
} |
1549 |
> |
|
1550 |
> |
update_coverage(best_left_hit_group, *coverage); |
1551 |
> |
update_junctions(best_left_hit_group, *junctions); |
1552 |
> |
update_insertions_and_deletions(best_left_hit_group, *insertions, *deletions); |
1553 |
> |
update_fusions(best_left_hit_group, *rt, *fusions); |
1554 |
> |
|
1555 |
> |
update_coverage(best_right_hit_group, *coverage); |
1556 |
> |
update_junctions(best_right_hit_group, *junctions); |
1557 |
> |
update_insertions_and_deletions(best_right_hit_group, *insertions, *deletions); |
1558 |
> |
update_fusions(best_right_hit_group, *rt, *fusions); |
1559 |
> |
|
1560 |
> |
l_hs.next_read_hits(curr_left_hit_group); |
1561 |
> |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1562 |
|
|
1563 |
< |
right_hs.next_read_hits(curr_right_hit_group); |
1564 |
< |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1565 |
< |
} |
1566 |
< |
} |
712 |
< |
left_hs.reset(); |
713 |
< |
right_hs.reset(); |
714 |
< |
} |
1563 |
> |
r_hs.next_read_hits(curr_right_hit_group); |
1564 |
> |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1565 |
> |
} |
1566 |
> |
} |
1567 |
|
|
1568 |
+ |
for (size_t i = 0; i < hit_factories.size(); ++i) |
1569 |
+ |
delete hit_factories[i]; |
1570 |
+ |
|
1571 |
+ |
hit_factories.clear(); |
1572 |
+ |
} |
1573 |
+ |
|
1574 |
+ |
string left_map_fname; |
1575 |
+ |
string right_map_fname; |
1576 |
+ |
|
1577 |
+ |
RefSequenceTable* rt; |
1578 |
+ |
|
1579 |
+ |
JunctionSet* gtf_junctions; |
1580 |
+ |
|
1581 |
+ |
uint64_t begin_id; |
1582 |
+ |
uint64_t end_id; |
1583 |
+ |
int64_t left_map_offset; |
1584 |
+ |
int64_t right_map_offset; |
1585 |
+ |
|
1586 |
+ |
JunctionSet* junctions; |
1587 |
+ |
InsertionSet* insertions; |
1588 |
+ |
DeletionSet* deletions; |
1589 |
+ |
FusionSet* fusions; |
1590 |
+ |
|
1591 |
+ |
Coverage* coverage; |
1592 |
+ |
}; |
1593 |
|
|
1594 |
< |
void driver(GBamWriter& bam_writer, |
1595 |
< |
string& left_map_fname, |
1596 |
< |
FILE* left_reads, |
1597 |
< |
string& right_map_fname, |
1598 |
< |
FILE* right_reads, |
1594 |
> |
struct ReportWorker |
1595 |
> |
{ |
1596 |
> |
void write_singleton_alignments(uint64_t curr_obs_order, |
1597 |
> |
HitsForRead& curr_hit_group, |
1598 |
> |
ReadStream& reads_file, |
1599 |
> |
GBamWriter& bam_writer, |
1600 |
> |
GBamWriter& um_out, |
1601 |
> |
FragmentType fragment_type) |
1602 |
> |
{ |
1603 |
> |
HitsForRead best_hits; |
1604 |
> |
best_hits.insert_id = curr_obs_order; |
1605 |
> |
|
1606 |
> |
realign_reads(curr_hit_group, *rt, *junctions, *rev_junctions, |
1607 |
> |
*insertions, *deletions, *rev_deletions, *fusions); |
1608 |
> |
|
1609 |
> |
exclude_hits_on_filtered_junctions(*junctions, curr_hit_group); |
1610 |
> |
|
1611 |
> |
// Process hits for singleton, select best alignments |
1612 |
> |
const bool final_report = true; |
1613 |
> |
read_best_alignments(curr_hit_group, best_hits, *gtf_junctions, |
1614 |
> |
*junctions, *insertions, *deletions, *fusions, *coverage, |
1615 |
> |
final_report); |
1616 |
> |
|
1617 |
> |
if (best_hits.hits.size() > 0) |
1618 |
> |
{ |
1619 |
> |
Read read; |
1620 |
> |
bool got_read = reads_file.getRead(curr_obs_order, read, |
1621 |
> |
reads_format, false, begin_id, end_id, |
1622 |
> |
&um_out, false); |
1623 |
> |
if (got_read) |
1624 |
> |
{ |
1625 |
> |
update_junctions(best_hits, *final_junctions); |
1626 |
> |
update_insertions_and_deletions(best_hits, *final_insertions, *final_deletions); |
1627 |
> |
update_fusions(best_hits, *rt, *final_fusions, *fusions); |
1628 |
> |
|
1629 |
> |
print_sam_for_single(*rt, |
1630 |
> |
best_hits, |
1631 |
> |
fragment_type, |
1632 |
> |
read.alt_name, |
1633 |
> |
bam_writer, |
1634 |
> |
rng); |
1635 |
> |
} |
1636 |
> |
} |
1637 |
> |
} |
1638 |
> |
|
1639 |
> |
void operator()() |
1640 |
> |
{ |
1641 |
> |
rng.seed(1); |
1642 |
> |
|
1643 |
> |
ReadTable it; |
1644 |
> |
GBamWriter bam_writer(bam_output_fname.c_str(), sam_header.c_str()); |
1645 |
> |
|
1646 |
> |
ReadStream left_reads_file(left_reads_fname); |
1647 |
> |
if (left_reads_file.file() == NULL) |
1648 |
> |
err_die("Error: cannot open %s for reading\n", left_reads_fname.c_str()); |
1649 |
> |
if (left_reads_file.isBam()) { |
1650 |
> |
left_reads_file.use_alt_name(); |
1651 |
> |
left_reads_file.ignoreQC(); |
1652 |
> |
} |
1653 |
> |
if (left_reads_offset > 0) |
1654 |
> |
left_reads_file.seek(left_reads_offset); |
1655 |
> |
|
1656 |
> |
//if (!zpacker.empty()) left_um_fname+=".z"; |
1657 |
> |
GBamWriter* left_um_out=new GBamWriter(left_um_fname.c_str(), sam_header.c_str()); |
1658 |
> |
GBamWriter* right_um_out=NULL; |
1659 |
> |
|
1660 |
> |
ReadStream right_reads_file(right_reads_fname); |
1661 |
> |
if (right_reads_offset > 0) |
1662 |
> |
right_reads_file.seek(right_reads_offset); |
1663 |
> |
|
1664 |
> |
//FZPipe right_um_out; |
1665 |
> |
if (!right_reads_fname.empty()) |
1666 |
> |
{ |
1667 |
> |
if (right_reads_file.isBam()) { |
1668 |
> |
right_reads_file.use_alt_name(); |
1669 |
> |
right_reads_file.ignoreQC(); |
1670 |
> |
right_um_out=new GBamWriter(right_um_fname.c_str(), sam_header.c_str()); |
1671 |
> |
} |
1672 |
> |
//if (!zpacker.empty()) right_um_fname+=".z"; |
1673 |
> |
//if (right_um_out.openWrite(right_um_fname.c_str(), zpacker)==NULL) |
1674 |
> |
// err_die("Error: cannot open file %s for writing!\n",right_um_fname.c_str()); |
1675 |
> |
} |
1676 |
> |
|
1677 |
> |
vector<BAMHitFactory*> hit_factories; |
1678 |
> |
hit_factories.push_back(new BAMHitFactory(it, *rt)); |
1679 |
> |
HitStream left_hs(left_map_fname, hit_factories.back(), false, true, true, true); |
1680 |
> |
if (left_map_offset > 0) |
1681 |
> |
left_hs.seek(left_map_offset); |
1682 |
> |
|
1683 |
> |
hit_factories.push_back(new BAMHitFactory(it, *rt)); |
1684 |
> |
HitStream right_hs(right_map_fname, hit_factories.back(), false, true, true, true); |
1685 |
> |
if (right_map_offset > 0) |
1686 |
> |
right_hs.seek(right_map_offset); |
1687 |
> |
|
1688 |
> |
HitsForRead curr_left_hit_group; |
1689 |
> |
HitsForRead curr_right_hit_group; |
1690 |
> |
|
1691 |
> |
left_hs.next_read_hits(curr_left_hit_group); |
1692 |
> |
right_hs.next_read_hits(curr_right_hit_group); |
1693 |
> |
|
1694 |
> |
uint64_t curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1695 |
> |
uint64_t curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1696 |
> |
|
1697 |
> |
const bool final_report = true; |
1698 |
> |
|
1699 |
> |
// While we still have unreported hits... |
1700 |
> |
Read l_read; |
1701 |
> |
Read r_read; |
1702 |
> |
while((curr_left_obs_order != VMAXINT32 || curr_right_obs_order != VMAXINT32) && |
1703 |
> |
(curr_left_obs_order < end_id || curr_right_obs_order < end_id)) |
1704 |
> |
{ |
1705 |
> |
// Chew up left singletons (pairs with right reads unmapped) |
1706 |
> |
while (curr_left_obs_order < curr_right_obs_order && |
1707 |
> |
curr_left_obs_order < end_id && |
1708 |
> |
curr_left_obs_order != VMAXINT32) |
1709 |
> |
{ |
1710 |
> |
write_singleton_alignments(curr_left_obs_order, |
1711 |
> |
curr_left_hit_group, |
1712 |
> |
left_reads_file, |
1713 |
> |
bam_writer, |
1714 |
> |
*left_um_out, |
1715 |
> |
right_map_fname.empty() ? FRAG_UNPAIRED : FRAG_LEFT); |
1716 |
> |
|
1717 |
> |
// Get next hit group |
1718 |
> |
left_hs.next_read_hits(curr_left_hit_group); |
1719 |
> |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1720 |
> |
} //left singletons |
1721 |
> |
|
1722 |
> |
// Chew up right singletons |
1723 |
> |
while (curr_left_obs_order > curr_right_obs_order && |
1724 |
> |
curr_right_obs_order < end_id && |
1725 |
> |
curr_right_obs_order != VMAXINT32) |
1726 |
> |
{ |
1727 |
> |
write_singleton_alignments(curr_right_obs_order, |
1728 |
> |
curr_right_hit_group, |
1729 |
> |
right_reads_file, |
1730 |
> |
bam_writer, |
1731 |
> |
*right_um_out, |
1732 |
> |
FRAG_RIGHT); |
1733 |
> |
|
1734 |
> |
// Get next hit group |
1735 |
> |
right_hs.next_read_hits(curr_right_hit_group); |
1736 |
> |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1737 |
> |
} |
1738 |
> |
|
1739 |
> |
// Since we have both left hits and right hits for this insert, |
1740 |
> |
// Find the best pairing and print both |
1741 |
> |
while (curr_left_obs_order == curr_right_obs_order && |
1742 |
> |
curr_left_obs_order < end_id && |
1743 |
> |
curr_left_obs_order != VMAXINT32) |
1744 |
> |
{ |
1745 |
> |
realign_reads(curr_left_hit_group, *rt, *junctions, *rev_junctions, |
1746 |
> |
*insertions, *deletions, *rev_deletions, *fusions); |
1747 |
> |
exclude_hits_on_filtered_junctions(*junctions, curr_left_hit_group); |
1748 |
> |
|
1749 |
> |
realign_reads(curr_right_hit_group, *rt, *junctions, *rev_junctions, |
1750 |
> |
*insertions, *deletions, *rev_deletions, *fusions); |
1751 |
> |
exclude_hits_on_filtered_junctions(*junctions, curr_right_hit_group); |
1752 |
> |
|
1753 |
> |
vector<pair<BowtieHit, BowtieHit> > best_hits; |
1754 |
> |
|
1755 |
> |
bool paired_alignments = curr_left_hit_group.hits.size() > 0 && curr_right_hit_group.hits.size() > 0; |
1756 |
> |
InsertAlignmentGrade grade; |
1757 |
> |
|
1758 |
> |
if (paired_alignments) |
1759 |
> |
{ |
1760 |
> |
pair_best_alignments(curr_left_hit_group, |
1761 |
> |
curr_right_hit_group, |
1762 |
> |
grade, best_hits, *gtf_junctions, |
1763 |
> |
*junctions, *insertions, *deletions, *fusions, *coverage, |
1764 |
> |
final_report); |
1765 |
> |
|
1766 |
> |
if (best_hits.size() <= 0 || |
1767 |
> |
(grade.fusion && !fusion_search && !report_discordant_pair_alignments)) |
1768 |
> |
paired_alignments = false; |
1769 |
> |
} |
1770 |
> |
|
1771 |
> |
if (paired_alignments) |
1772 |
> |
{ |
1773 |
> |
HitsForRead best_left_hit_group; best_left_hit_group.insert_id = curr_left_obs_order; |
1774 |
> |
HitsForRead best_right_hit_group; best_right_hit_group.insert_id = curr_left_obs_order; |
1775 |
> |
|
1776 |
> |
for (size_t i = 0; i < best_hits.size(); ++i) |
1777 |
> |
{ |
1778 |
> |
best_left_hit_group.hits.push_back(best_hits[i].first); |
1779 |
> |
best_right_hit_group.hits.push_back(best_hits[i].second); |
1780 |
> |
} |
1781 |
> |
|
1782 |
> |
if (best_hits.size() > 0) |
1783 |
> |
{ |
1784 |
> |
bool got_left_read = left_reads_file.getRead(best_hits[0].first.insert_id(), l_read, |
1785 |
> |
reads_format, false, begin_id, end_id, |
1786 |
> |
left_um_out, false); |
1787 |
> |
|
1788 |
> |
bool got_right_read = right_reads_file.getRead(best_hits[0].first.insert_id(), r_read, |
1789 |
> |
reads_format, false, begin_id, end_id, |
1790 |
> |
right_um_out, false); |
1791 |
> |
|
1792 |
> |
if (got_left_read && got_right_read) |
1793 |
> |
{ |
1794 |
> |
update_junctions(best_left_hit_group, *final_junctions); |
1795 |
> |
update_insertions_and_deletions(best_left_hit_group, *final_insertions, *final_deletions); |
1796 |
> |
update_fusions(best_left_hit_group, *rt, *final_fusions, *fusions); |
1797 |
> |
|
1798 |
> |
update_junctions(best_right_hit_group, *final_junctions); |
1799 |
> |
update_insertions_and_deletions(best_right_hit_group, *final_insertions, *final_deletions); |
1800 |
> |
update_fusions(best_right_hit_group, *rt, *final_fusions, *fusions); |
1801 |
> |
|
1802 |
> |
pair_support(best_hits, *final_fusions, *fusions); |
1803 |
> |
|
1804 |
> |
print_sam_for_pair(*rt, |
1805 |
> |
best_hits, |
1806 |
> |
grade, |
1807 |
> |
bam_writer, |
1808 |
> |
l_read.alt_name, |
1809 |
> |
r_read.alt_name, |
1810 |
> |
rng, |
1811 |
> |
begin_id, |
1812 |
> |
end_id); |
1813 |
> |
} |
1814 |
> |
} |
1815 |
> |
} |
1816 |
> |
else |
1817 |
> |
{ |
1818 |
> |
if (curr_left_hit_group.hits.size() > 0) |
1819 |
> |
{ |
1820 |
> |
write_singleton_alignments(curr_left_obs_order, |
1821 |
> |
curr_left_hit_group, |
1822 |
> |
left_reads_file, |
1823 |
> |
bam_writer, |
1824 |
> |
*left_um_out, |
1825 |
> |
(right_map_fname.empty() ? FRAG_UNPAIRED : FRAG_LEFT)); |
1826 |
> |
} |
1827 |
> |
|
1828 |
> |
if (curr_right_hit_group.hits.size() > 0) |
1829 |
> |
{ //only right read mapped |
1830 |
> |
//write it in the mapped file with the #MAPPED# flag |
1831 |
> |
write_singleton_alignments(curr_right_obs_order, |
1832 |
> |
curr_right_hit_group, |
1833 |
> |
right_reads_file, |
1834 |
> |
bam_writer, |
1835 |
> |
*right_um_out, |
1836 |
> |
FRAG_RIGHT); |
1837 |
> |
} |
1838 |
> |
} |
1839 |
> |
|
1840 |
> |
left_hs.next_read_hits(curr_left_hit_group); |
1841 |
> |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1842 |
> |
|
1843 |
> |
right_hs.next_read_hits(curr_right_hit_group); |
1844 |
> |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1845 |
> |
} |
1846 |
> |
} //while we still have unreported hits.. |
1847 |
> |
//print the remaining unmapped reads at the end of each reads' stream |
1848 |
> |
|
1849 |
> |
left_reads_file.getRead(VMAXINT32, l_read, |
1850 |
> |
reads_format, false, begin_id, end_id, |
1851 |
> |
left_um_out, false); |
1852 |
> |
if (right_reads_file.file()) |
1853 |
> |
right_reads_file.getRead(VMAXINT32, r_read, |
1854 |
> |
reads_format, false, begin_id, end_id, |
1855 |
> |
right_um_out, false); |
1856 |
> |
|
1857 |
> |
|
1858 |
> |
// pclose (pipe close), which waits for a process to end, seems to conflict with boost::thread::join somehow, |
1859 |
> |
// resulting in deadlock like behavior. |
1860 |
> |
delete left_um_out; |
1861 |
> |
delete right_um_out; |
1862 |
> |
|
1863 |
> |
|
1864 |
> |
for (size_t i = 0; i < hit_factories.size(); ++i) |
1865 |
> |
delete hit_factories[i]; |
1866 |
> |
|
1867 |
> |
hit_factories.clear(); |
1868 |
> |
} |
1869 |
> |
|
1870 |
> |
string bam_output_fname; |
1871 |
> |
string sam_header_fname; |
1872 |
> |
|
1873 |
> |
string left_reads_fname; |
1874 |
> |
string left_map_fname; |
1875 |
> |
string right_reads_fname; |
1876 |
> |
string right_map_fname; |
1877 |
> |
|
1878 |
> |
string left_um_fname; |
1879 |
> |
string right_um_fname; |
1880 |
> |
|
1881 |
> |
JunctionSet* gtf_junctions; |
1882 |
> |
|
1883 |
> |
JunctionSet* junctions; |
1884 |
> |
JunctionSet* rev_junctions; |
1885 |
> |
InsertionSet* insertions; |
1886 |
> |
DeletionSet* deletions; |
1887 |
> |
DeletionSet* rev_deletions; |
1888 |
> |
FusionSet* fusions; |
1889 |
> |
Coverage* coverage; |
1890 |
> |
|
1891 |
> |
JunctionSet* final_junctions; |
1892 |
> |
InsertionSet* final_insertions; |
1893 |
> |
DeletionSet* final_deletions; |
1894 |
> |
FusionSet* final_fusions; |
1895 |
> |
|
1896 |
> |
RefSequenceTable* rt; |
1897 |
> |
|
1898 |
> |
uint64_t begin_id; |
1899 |
> |
uint64_t end_id; |
1900 |
> |
int64_t left_reads_offset; |
1901 |
> |
int64_t left_map_offset; |
1902 |
> |
int64_t right_reads_offset; |
1903 |
> |
int64_t right_map_offset; |
1904 |
> |
|
1905 |
> |
boost::random::mt19937 rng; |
1906 |
> |
}; |
1907 |
> |
|
1908 |
> |
void driver(const string& bam_output_fname, |
1909 |
> |
istream& ref_stream, |
1910 |
> |
const string& left_map_fname, |
1911 |
> |
const string& left_reads_fname, |
1912 |
> |
const string& right_map_fname, |
1913 |
> |
const string& right_reads_fname, |
1914 |
|
FILE* junctions_out, |
1915 |
|
FILE* insertions_out, |
1916 |
|
FILE* deletions_out, |
1917 |
< |
FILE* left_um_out, |
726 |
< |
FILE* right_um_out |
727 |
< |
) |
1917 |
> |
FILE* fusions_out) |
1918 |
|
{ |
1919 |
< |
ReadTable it; |
1919 |
> |
if (!parallel) |
1920 |
> |
num_threads = 1; |
1921 |
> |
|
1922 |
|
RefSequenceTable rt(sam_header, true); |
1923 |
< |
srandom(1); |
1923 |
> |
get_seqs(ref_stream, rt, true); |
1924 |
> |
|
1925 |
> |
srandom(1); |
1926 |
> |
|
1927 |
|
JunctionSet gtf_junctions; |
1928 |
|
if (!gtf_juncs.empty()) |
1929 |
|
{ |
1930 |
< |
char splice_buf[2048]; |
1930 |
> |
char splice_buf[4096]; |
1931 |
|
FILE* splice_coords = fopen(gtf_juncs.c_str(), "r"); |
1932 |
|
if (splice_coords) |
1933 |
|
{ |
1954 |
|
uint32_t right_coord = atoi(scan_right_coord); |
1955 |
|
bool antisense = *orientation == '-'; |
1956 |
|
|
1957 |
< |
// add 1 to left_coord to meet BED format |
1958 |
< |
gtf_junctions.insert(make_pair<Junction, JunctionStats>(Junction(ref_id, left_coord + 1, right_coord, antisense), JunctionStats())); |
1957 |
> |
JunctionStats junction_stat; |
1958 |
> |
junction_stat.gtf_match = true; |
1959 |
> |
junction_stat.accepted = true; |
1960 |
> |
|
1961 |
> |
gtf_junctions.insert(make_pair<Junction, JunctionStats>(Junction(ref_id, left_coord, right_coord, antisense), junction_stat)); |
1962 |
|
} |
1963 |
|
} |
1964 |
|
fprintf(stderr, "Loaded %d GFF junctions from %s.\n", (int)(gtf_junctions.size()), gtf_juncs.c_str()); |
1965 |
|
} |
1966 |
|
|
1967 |
< |
BAMHitFactory hit_factory(it,rt); |
1968 |
< |
JunctionSet junctions; |
1969 |
< |
{ |
772 |
< |
HitStream l_hs(left_map_fname, &hit_factory, false, true, true, true); |
773 |
< |
HitStream r_hs(right_map_fname, &hit_factory, false, true, true, true); |
774 |
< |
get_junctions_from_best_hits(l_hs, r_hs, it, junctions, gtf_junctions); |
775 |
< |
//this resets the streams |
776 |
< |
} |
777 |
< |
|
778 |
< |
HitStream left_hs(left_map_fname, &hit_factory, false, true, true, true); |
779 |
< |
HitStream right_hs(right_map_fname, &hit_factory, false, true, true, true); |
780 |
< |
|
781 |
< |
size_t num_unfiltered_juncs = junctions.size(); |
782 |
< |
fprintf(stderr, "Loaded %lu junctions\n", (long unsigned int) num_unfiltered_juncs); |
783 |
< |
|
784 |
< |
HitsForRead curr_left_hit_group; |
785 |
< |
HitsForRead curr_right_hit_group; |
786 |
< |
|
787 |
< |
left_hs.next_read_hits(curr_left_hit_group); |
788 |
< |
right_hs.next_read_hits(curr_right_hit_group); |
789 |
< |
|
790 |
< |
uint32_t curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
791 |
< |
uint32_t curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
792 |
< |
|
793 |
< |
// Read hits, extract junctions, and toss the ones that arent strongly enough supported. |
794 |
< |
filter_junctions(junctions, gtf_junctions); |
795 |
< |
//size_t num_juncs_after_filter = junctions.size(); |
796 |
< |
//fprintf(stderr, "Filtered %lu junctions\n", |
797 |
< |
// num_unfiltered_juncs - num_juncs_after_filter); |
798 |
< |
|
799 |
< |
/* |
800 |
< |
size_t small_overhangs = 0; |
801 |
< |
for (JunctionSet::iterator i = junctions.begin(); i != junctions.end(); ++i) |
1967 |
> |
vector<uint64_t> read_ids; |
1968 |
> |
vector<vector<int64_t> > offsets; |
1969 |
> |
if (num_threads > 1) |
1970 |
|
{ |
1971 |
< |
if (i->second.accepted && |
1972 |
< |
(i->second.left_extent < min_anchor_len || i->second.right_extent < min_anchor_len)) |
1973 |
< |
{ |
1974 |
< |
small_overhangs++; |
1975 |
< |
} |
1971 |
> |
vector<string> fnames; |
1972 |
> |
if (right_map_fname != "") |
1973 |
> |
{ |
1974 |
> |
fnames.push_back(right_reads_fname); |
1975 |
> |
fnames.push_back(right_map_fname); |
1976 |
> |
} |
1977 |
> |
fnames.push_back(left_reads_fname); |
1978 |
> |
fnames.push_back(left_map_fname); |
1979 |
> |
bool enough_data = calculate_offsets(fnames, read_ids, offsets); |
1980 |
> |
if (!enough_data) |
1981 |
> |
num_threads = 1; |
1982 |
|
} |
809 |
– |
|
810 |
– |
if (small_overhangs >0) |
811 |
– |
fprintf(stderr, "Warning: %lu small overhang junctions!\n", (long unsigned int)small_overhangs); |
812 |
– |
*/ |
813 |
– |
JunctionSet final_junctions; // the junctions formed from best hits |
814 |
– |
InsertionSet final_insertions; |
815 |
– |
DeletionSet final_deletions; |
816 |
– |
|
817 |
– |
fprintf (stderr, "Reporting final accepted alignments..."); |
818 |
– |
// While we still have unreported hits... |
819 |
– |
Read l_read; |
820 |
– |
Read r_read; |
821 |
– |
while(curr_left_obs_order != VMAXINT32 || |
822 |
– |
curr_right_obs_order != VMAXINT32) |
823 |
– |
{ |
824 |
– |
// Chew up left singletons (pairs with right reads unmapped) |
825 |
– |
while (curr_left_obs_order < curr_right_obs_order && |
826 |
– |
curr_left_obs_order != VMAXINT32) |
827 |
– |
{ |
828 |
– |
HitsForRead best_hits; |
829 |
– |
best_hits.insert_id = curr_left_obs_order; |
830 |
– |
FragmentAlignmentGrade grade; |
831 |
– |
bool got_read=get_read_from_stream(curr_left_obs_order, |
832 |
– |
left_reads,reads_format, false, l_read, left_um_out); |
833 |
– |
assert(got_read); |
834 |
– |
if (right_reads) { |
835 |
– |
fprintf(left_um_out, "@%s #MAPPED#\n%s\n+\n%s\n", l_read.alt_name.c_str(), |
836 |
– |
l_read.seq.c_str(), l_read.qual.c_str()); |
837 |
– |
got_read=get_read_from_stream(curr_left_obs_order, |
838 |
– |
right_reads,reads_format, false, |
839 |
– |
r_read, right_um_out, true); |
840 |
– |
assert(got_read); |
841 |
– |
} |
842 |
– |
exclude_hits_on_filtered_junctions(junctions, curr_left_hit_group); |
843 |
– |
|
844 |
– |
// Process hits for left singleton, select best alignments |
845 |
– |
read_best_alignments(curr_left_hit_group, grade, best_hits, gtf_junctions); |
846 |
– |
if (best_hits.hits.size()>0 && best_hits.hits.size() <= max_multihits) |
847 |
– |
{ |
848 |
– |
update_junctions(best_hits, final_junctions); |
849 |
– |
update_insertions_and_deletions(best_hits, final_insertions, final_deletions); |
850 |
– |
|
851 |
– |
print_sam_for_single(rt, |
852 |
– |
best_hits, |
853 |
– |
grade, |
854 |
– |
(right_map_fname.empty() ? FRAG_UNPAIRED : FRAG_LEFT), |
855 |
– |
l_read, |
856 |
– |
bam_writer, |
857 |
– |
left_um_out); |
858 |
– |
} |
859 |
– |
|
860 |
– |
// Get next hit group |
861 |
– |
left_hs.next_read_hits(curr_left_hit_group); |
862 |
– |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
863 |
– |
} //left singletons |
864 |
– |
|
865 |
– |
// Chew up right singletons |
866 |
– |
while (curr_left_obs_order > curr_right_obs_order && |
867 |
– |
curr_right_obs_order != VMAXINT32) |
868 |
– |
{ |
869 |
– |
HitsForRead best_hits; |
870 |
– |
best_hits.insert_id = curr_right_obs_order; |
871 |
– |
FragmentAlignmentGrade grade; |
872 |
– |
|
873 |
– |
bool got_read=get_read_from_stream(curr_right_obs_order, |
874 |
– |
right_reads,reads_format, false, r_read, right_um_out); |
875 |
– |
assert(got_read); |
876 |
– |
fprintf(right_um_out, "@%s #MAPPED#\n%s\n+\n%s\n", r_read.alt_name.c_str(), |
877 |
– |
r_read.seq.c_str(), r_read.qual.c_str()); |
878 |
– |
got_read=get_read_from_stream(curr_right_obs_order, |
879 |
– |
left_reads,reads_format, false, |
880 |
– |
l_read, left_um_out, true); |
881 |
– |
assert(got_read); |
1983 |
|
|
1984 |
< |
exclude_hits_on_filtered_junctions(junctions, curr_right_hit_group); |
1985 |
< |
|
1986 |
< |
// Process hit for right singleton, select best alignments |
1987 |
< |
read_best_alignments(curr_right_hit_group, grade, best_hits, gtf_junctions); |
1988 |
< |
|
1989 |
< |
if (best_hits.hits.size()>0 && best_hits.hits.size() <= max_multihits) |
1990 |
< |
{ |
1991 |
< |
update_junctions(best_hits, final_junctions); |
1992 |
< |
update_insertions_and_deletions(best_hits, final_insertions, final_deletions); |
1993 |
< |
|
893 |
< |
print_sam_for_single(rt, |
894 |
< |
best_hits, |
895 |
< |
grade, |
896 |
< |
FRAG_RIGHT, |
897 |
< |
r_read, |
898 |
< |
bam_writer, |
899 |
< |
right_um_out); |
900 |
< |
} |
901 |
< |
|
902 |
< |
// Get next hit group |
903 |
< |
right_hs.next_read_hits(curr_right_hit_group); |
904 |
< |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
905 |
< |
} |
906 |
< |
|
907 |
< |
// Since we have both left hits and right hits for this insert, |
908 |
< |
// Find the best pairing and print both |
909 |
< |
while (curr_left_obs_order == curr_right_obs_order && |
910 |
< |
curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32) |
911 |
< |
{ |
912 |
< |
exclude_hits_on_filtered_junctions(junctions, curr_left_hit_group); |
913 |
< |
exclude_hits_on_filtered_junctions(junctions, curr_right_hit_group); |
914 |
< |
|
915 |
< |
if (curr_left_hit_group.hits.empty()) |
916 |
< |
{ //only right read mapped |
917 |
< |
//write it in the mapped file with the #MAPPED# flag |
918 |
< |
|
919 |
< |
bool got_read=get_read_from_stream(curr_left_obs_order, |
920 |
< |
right_reads,reads_format, false, r_read, right_um_out); |
921 |
< |
assert(got_read); |
922 |
< |
fprintf(right_um_out, "@%s #MAPPED#\n%s\n+\n%s\n", r_read.alt_name.c_str(), |
923 |
< |
r_read.seq.c_str(), r_read.qual.c_str()); |
924 |
< |
HitsForRead right_best_hits; |
925 |
< |
right_best_hits.insert_id = curr_right_obs_order; |
926 |
< |
|
927 |
< |
FragmentAlignmentGrade grade; |
928 |
< |
read_best_alignments(curr_right_hit_group, grade, right_best_hits, gtf_junctions); |
929 |
< |
|
930 |
< |
if (right_best_hits.hits.size()>0 && right_best_hits.hits.size() <= max_multihits) |
931 |
< |
{ |
932 |
< |
update_junctions(right_best_hits, final_junctions); |
933 |
< |
update_insertions_and_deletions(right_best_hits, final_insertions, final_deletions); |
934 |
< |
|
935 |
< |
print_sam_for_single(rt, |
936 |
< |
right_best_hits, |
937 |
< |
grade, |
938 |
< |
FRAG_RIGHT, |
939 |
< |
r_read, |
940 |
< |
bam_writer, |
941 |
< |
right_um_out); |
942 |
< |
} |
943 |
< |
} |
944 |
< |
else if (curr_right_hit_group.hits.empty()) |
945 |
< |
{ |
946 |
< |
HitsForRead left_best_hits; |
947 |
< |
left_best_hits.insert_id = curr_left_obs_order; |
948 |
< |
//only left read mapped |
949 |
< |
bool got_read=get_read_from_stream(curr_left_obs_order, |
950 |
< |
left_reads,reads_format, false, l_read, left_um_out); |
951 |
< |
assert(got_read); |
952 |
< |
fprintf(left_um_out, "@%s #MAPPED#\n%s\n+\n%s\n", l_read.alt_name.c_str(), |
953 |
< |
l_read.seq.c_str(), l_read.qual.c_str()); |
954 |
< |
FragmentAlignmentGrade grade; |
955 |
< |
// Process hits for left singleton, select best alignments |
956 |
< |
read_best_alignments(curr_left_hit_group, grade, left_best_hits, gtf_junctions); |
957 |
< |
|
958 |
< |
if (left_best_hits.hits.size()>0 && left_best_hits.hits.size() <= max_multihits) |
959 |
< |
{ |
960 |
< |
update_junctions(left_best_hits, final_junctions); |
961 |
< |
update_insertions_and_deletions(left_best_hits, final_insertions, final_deletions); |
962 |
< |
|
963 |
< |
print_sam_for_single(rt, |
964 |
< |
left_best_hits, |
965 |
< |
grade, |
966 |
< |
FRAG_LEFT, |
967 |
< |
l_read, |
968 |
< |
bam_writer, |
969 |
< |
left_um_out); |
970 |
< |
} |
971 |
< |
} |
972 |
< |
else |
973 |
< |
{ //hits for both left and right reads |
974 |
< |
HitsForRead left_best_hits; |
975 |
< |
HitsForRead right_best_hits; |
976 |
< |
left_best_hits.insert_id = curr_left_obs_order; |
977 |
< |
right_best_hits.insert_id = curr_right_obs_order; |
978 |
< |
|
979 |
< |
InsertAlignmentGrade grade; |
980 |
< |
pair_best_alignments(curr_left_hit_group, |
981 |
< |
curr_right_hit_group, |
982 |
< |
grade, |
983 |
< |
left_best_hits, |
984 |
< |
right_best_hits); |
985 |
< |
|
986 |
< |
if (left_best_hits.hits.size()>0 && left_best_hits.hits.size() <= max_multihits && |
987 |
< |
right_best_hits.hits.size()>0 && right_best_hits.hits.size() <= max_multihits) |
988 |
< |
{ |
989 |
< |
update_junctions(left_best_hits, final_junctions); |
990 |
< |
update_junctions(right_best_hits, final_junctions); |
991 |
< |
update_insertions_and_deletions(left_best_hits, final_insertions, final_deletions); |
992 |
< |
update_insertions_and_deletions(right_best_hits, final_insertions, final_deletions); |
993 |
< |
|
994 |
< |
print_sam_for_pair(rt, |
995 |
< |
left_best_hits, |
996 |
< |
right_best_hits, |
997 |
< |
grade, |
998 |
< |
left_reads, |
999 |
< |
right_reads, |
1000 |
< |
bam_writer, |
1001 |
< |
left_um_out, |
1002 |
< |
right_um_out); |
1003 |
< |
} |
1004 |
< |
} |
1005 |
< |
|
1006 |
< |
left_hs.next_read_hits(curr_left_hit_group); |
1007 |
< |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
1008 |
< |
|
1009 |
< |
right_hs.next_read_hits(curr_right_hit_group); |
1010 |
< |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
1011 |
< |
} |
1012 |
< |
|
1013 |
< |
} //while we still have unreported hits.. |
1014 |
< |
//print the remaining unmapped reads at the end of each reads' stream |
1015 |
< |
get_read_from_stream(VMAXINT32, |
1016 |
< |
left_reads, |
1017 |
< |
reads_format, |
1018 |
< |
false, |
1019 |
< |
l_read, |
1020 |
< |
left_um_out); |
1021 |
< |
if (right_reads) |
1022 |
< |
get_read_from_stream(VMAXINT32, |
1023 |
< |
right_reads, |
1024 |
< |
reads_format, |
1025 |
< |
false, |
1026 |
< |
r_read, |
1027 |
< |
right_um_out); |
1028 |
< |
fprintf (stderr, "done.\n"); |
1029 |
< |
|
1030 |
< |
//small_overhangs = 0; |
1031 |
< |
for (JunctionSet::iterator i = final_junctions.begin(); i != final_junctions.end();) |
1032 |
< |
{ |
1033 |
< |
if (i->second.supporting_hits == 0 || i->second.left_extent < 8 || i->second.right_extent < 8) |
1034 |
< |
{ |
1035 |
< |
final_junctions.erase(i++); |
1036 |
< |
} |
1037 |
< |
else |
1038 |
< |
{ |
1039 |
< |
++i; |
1040 |
< |
} |
1041 |
< |
} |
1042 |
< |
|
1043 |
< |
// if (small_overhangs > 0) |
1044 |
< |
// fprintf(stderr, "Warning: %lu small overhang junctions!\n", small_overhangs); |
1045 |
< |
|
1046 |
< |
fprintf (stderr, "Printing junction BED track..."); |
1047 |
< |
print_junctions(junctions_out, final_junctions, rt); |
1048 |
< |
fprintf (stderr, "done\n"); |
1049 |
< |
|
1050 |
< |
fprintf (stderr, "Printing insertions..."); |
1051 |
< |
print_insertions(insertions_out, final_insertions,rt); |
1052 |
< |
fclose(insertions_out); |
1053 |
< |
fprintf (stderr, "done\n"); |
1054 |
< |
|
1055 |
< |
fprintf (stderr, "Printing deletions..."); |
1056 |
< |
print_deletions(deletions_out, final_deletions, rt); |
1057 |
< |
fclose(deletions_out); |
1058 |
< |
fprintf (stderr, "done\n"); |
1059 |
< |
|
1060 |
< |
fprintf(stderr, "Found %lu junctions from happy spliced reads\n", (long unsigned int)final_junctions.size()); |
1061 |
< |
} |
1062 |
< |
|
1063 |
< |
void print_usage() |
1064 |
< |
{ |
1065 |
< |
fprintf(stderr, "Usage: tophat_reports <junctions.bed> <insertions.vcf> <deletions.vcf> <accepted_hits.sam> <left_map1,...,left_mapN> <left_reads.fq> [right_map1,...,right_mapN] [right_reads.fq]\n"); |
1066 |
< |
|
1067 |
< |
// fprintf(stderr, "Usage: tophat_reports <coverage.wig> <junctions.bed> <accepted_hits.sam> <map1.bwtout> [splice_map1.sbwtout]\n"); |
1068 |
< |
} |
1984 |
> |
JunctionSet vjunctions[num_threads]; |
1985 |
> |
InsertionSet vinsertions[num_threads]; |
1986 |
> |
DeletionSet vdeletions[num_threads]; |
1987 |
> |
FusionSet vfusions[num_threads]; |
1988 |
> |
Coverage vcoverages[num_threads]; |
1989 |
> |
|
1990 |
> |
vector<boost::thread*> threads; |
1991 |
> |
for (int i = 0; i < num_threads; ++i) |
1992 |
> |
{ |
1993 |
> |
ConsensusEventsWorker worker; |
1994 |
|
|
1995 |
< |
int main(int argc, char** argv) |
1996 |
< |
{ |
1997 |
< |
fprintf(stderr, "tophat_reports v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION); |
1998 |
< |
fprintf(stderr, "---------------------------------------\n"); |
1999 |
< |
|
2000 |
< |
reads_format = FASTQ; |
2001 |
< |
|
2002 |
< |
int parse_ret = parse_options(argc, argv, print_usage); |
2003 |
< |
if (parse_ret) |
2004 |
< |
return parse_ret; |
2005 |
< |
|
2006 |
< |
if(optind >= argc) |
1995 |
> |
worker.left_map_fname = left_map_fname; |
1996 |
> |
worker.right_map_fname = right_map_fname; |
1997 |
> |
worker.rt = &rt; |
1998 |
> |
worker.gtf_junctions = >f_junctions; |
1999 |
> |
|
2000 |
> |
worker.junctions = &vjunctions[i]; |
2001 |
> |
worker.insertions = &vinsertions[i]; |
2002 |
> |
worker.deletions = &vdeletions[i]; |
2003 |
> |
worker.fusions = &vfusions[i]; |
2004 |
> |
worker.coverage = &vcoverages[i]; |
2005 |
> |
|
2006 |
> |
worker.right_map_offset = 0; |
2007 |
> |
|
2008 |
> |
if (i == 0) |
2009 |
|
{ |
2010 |
< |
print_usage(); |
2011 |
< |
return 1; |
2010 |
> |
worker.begin_id = 0; |
2011 |
> |
worker.left_map_offset = 0; |
2012 |
|
} |
2013 |
< |
|
1087 |
< |
string junctions_file_name = argv[optind++]; |
1088 |
< |
|
1089 |
< |
if(optind >= argc) |
2013 |
> |
else |
2014 |
|
{ |
2015 |
< |
print_usage(); |
2016 |
< |
return 1; |
2017 |
< |
} |
2018 |
< |
|
2019 |
< |
string insertions_file_name = argv[optind++]; |
2020 |
< |
if(optind >= argc) |
1097 |
< |
{ |
1098 |
< |
print_usage(); |
1099 |
< |
return 1; |
2015 |
> |
size_t offsets_size = offsets[i-1].size(); |
2016 |
> |
|
2017 |
> |
worker.begin_id = read_ids[i-1]; |
2018 |
> |
worker.left_map_offset = offsets[i-1].back(); |
2019 |
> |
if (offsets_size == 4) |
2020 |
> |
worker.right_map_offset = offsets[i-1][1]; |
2021 |
|
} |
2022 |
+ |
|
2023 |
+ |
worker.end_id = (i+1 < num_threads) ? read_ids[i] : std::numeric_limits<uint64_t>::max(); |
2024 |
+ |
|
2025 |
+ |
if (num_threads > 1 && i + 1 < num_threads) |
2026 |
+ |
threads.push_back(new boost::thread(worker)); |
2027 |
+ |
else |
2028 |
+ |
worker(); |
2029 |
+ |
} |
2030 |
+ |
|
2031 |
+ |
for (size_t i = 0; i < threads.size(); ++i) |
2032 |
+ |
{ |
2033 |
+ |
threads[i]->join(); |
2034 |
+ |
delete threads[i]; |
2035 |
+ |
threads[i] = NULL; |
2036 |
+ |
} |
2037 |
+ |
threads.clear(); |
2038 |
+ |
|
2039 |
+ |
JunctionSet& junctions = vjunctions[0]; |
2040 |
+ |
InsertionSet& insertions = vinsertions[0]; |
2041 |
+ |
DeletionSet& deletions = vdeletions[0]; |
2042 |
+ |
FusionSet& fusions = vfusions[0]; |
2043 |
+ |
Coverage& coverage = vcoverages[0]; |
2044 |
+ |
for (int i = 1; i < num_threads; ++i) |
2045 |
+ |
{ |
2046 |
+ |
merge_with(junctions, vjunctions[i]); |
2047 |
+ |
vjunctions[i].clear(); |
2048 |
+ |
|
2049 |
+ |
merge_with(insertions, vinsertions[i]); |
2050 |
+ |
vinsertions[i].clear(); |
2051 |
+ |
|
2052 |
+ |
merge_with(deletions, vdeletions[i]); |
2053 |
+ |
vdeletions[i].clear(); |
2054 |
+ |
|
2055 |
+ |
merge_with(fusions, vfusions[i]); |
2056 |
+ |
vfusions[i].clear(); |
2057 |
+ |
|
2058 |
+ |
coverage.merge_with(vcoverages[i]); |
2059 |
+ |
vcoverages[i].clear(); |
2060 |
+ |
} |
2061 |
+ |
|
2062 |
+ |
merge_with(junctions, gtf_junctions); |
2063 |
+ |
coverage.calculate_coverage(); |
2064 |
+ |
|
2065 |
+ |
JunctionSet rev_junctions; |
2066 |
+ |
JunctionSet::const_iterator junction_iter = junctions.begin(); |
2067 |
+ |
for (; junction_iter != junctions.end(); ++junction_iter) |
2068 |
+ |
{ |
2069 |
+ |
const Junction& junction = junction_iter->first; |
2070 |
+ |
Junction rev_junction = junction; |
2071 |
+ |
rev_junction.left = junction.right; |
2072 |
+ |
rev_junction.right = junction.left; |
2073 |
+ |
rev_junctions[rev_junction] = junction_iter->second; |
2074 |
+ |
} |
2075 |
+ |
|
2076 |
+ |
DeletionSet rev_deletions; |
2077 |
+ |
#if 0 |
2078 |
+ |
DeletionSet::const_iterator deletion_iter = deletions.begin(); |
2079 |
+ |
for (; deletion_iter != deletions.end(); ++deletion_iter) |
2080 |
+ |
{ |
2081 |
+ |
const Deletion& deletion = deletion_iter->first; |
2082 |
+ |
Deletion rev_deletion = deletion; |
2083 |
+ |
rev_deletion.left = deletion.right; |
2084 |
+ |
rev_deletion.right = deletion.left; |
2085 |
+ |
rev_deletions[rev_deletion] = deletion_iter->second; |
2086 |
+ |
} |
2087 |
+ |
#endif |
2088 |
+ |
|
2089 |
+ |
size_t num_unfiltered_juncs = junctions.size(); |
2090 |
+ |
fprintf(stderr, "Loaded %lu junctions\n", (long unsigned int) num_unfiltered_juncs); |
2091 |
+ |
|
2092 |
+ |
// Read hits, extract junctions, and toss the ones that arent strongly enough supported. |
2093 |
+ |
filter_junctions(junctions, gtf_junctions); |
2094 |
+ |
|
2095 |
+ |
//size_t num_juncs_after_filter = junctions.size(); |
2096 |
+ |
//fprintf(stderr, "Filtered %lu junctions\n", |
2097 |
+ |
// num_unfiltered_juncs - num_juncs_after_filter); |
2098 |
+ |
|
2099 |
+ |
/* |
2100 |
+ |
size_t small_overhangs = 0; |
2101 |
+ |
for (JunctionSet::iterator i = junctions.begin(); i != junctions.end(); ++i) |
2102 |
+ |
{ |
2103 |
+ |
if (i->second.accepted && |
2104 |
+ |
(i->second.left_extent < min_anchor_len || i->second.right_extent < min_anchor_len)) |
2105 |
+ |
{ |
2106 |
+ |
small_overhangs++; |
2107 |
+ |
} |
2108 |
+ |
} |
2109 |
|
|
2110 |
< |
string deletions_file_name = argv[optind++]; |
2111 |
< |
if(optind >= argc) |
2110 |
> |
if (small_overhangs >0) |
2111 |
> |
fprintf(stderr, "Warning: %lu small overhang junctions!\n", (long unsigned int)small_overhangs); |
2112 |
> |
*/ |
2113 |
> |
|
2114 |
> |
JunctionSet vfinal_junctions[num_threads]; |
2115 |
> |
InsertionSet vfinal_insertions[num_threads]; |
2116 |
> |
DeletionSet vfinal_deletions[num_threads]; |
2117 |
> |
FusionSet vfinal_fusions[num_threads]; |
2118 |
> |
|
2119 |
> |
for (int i = 0; i < num_threads; ++i) |
2120 |
> |
{ |
2121 |
> |
ReportWorker worker; |
2122 |
> |
|
2123 |
> |
worker.sam_header_fname = sam_header; |
2124 |
> |
char filename[1024] = {0}; |
2125 |
> |
sprintf(filename, "%s%d.bam", bam_output_fname.c_str(), i); |
2126 |
> |
worker.bam_output_fname = filename; |
2127 |
> |
string tmpoutdir=getFdir(worker.bam_output_fname); |
2128 |
> |
worker.left_um_fname = tmpoutdir; |
2129 |
> |
sprintf(filename, "unmapped_left_%d.bam", i); |
2130 |
> |
worker.left_um_fname+=filename; |
2131 |
> |
|
2132 |
> |
if (right_reads_fname != "") |
2133 |
|
{ |
2134 |
< |
print_usage(); |
2135 |
< |
return 1; |
2134 |
> |
sprintf(filename, "unmapped_right_%d.bam", i); |
2135 |
> |
worker.right_um_fname = tmpoutdir; |
2136 |
> |
worker.right_um_fname += filename; |
2137 |
|
} |
2138 |
< |
|
2139 |
< |
string accepted_hits_file_name = argv[optind++]; |
2140 |
< |
|
2141 |
< |
if(optind >= argc) |
2138 |
> |
|
2139 |
> |
worker.left_reads_fname = left_reads_fname; |
2140 |
> |
worker.left_map_fname = left_map_fname; |
2141 |
> |
worker.right_reads_fname = right_reads_fname; |
2142 |
> |
worker.right_map_fname = right_map_fname; |
2143 |
> |
|
2144 |
> |
worker.gtf_junctions = >f_junctions; |
2145 |
> |
worker.junctions = &junctions; |
2146 |
> |
worker.rev_junctions = &rev_junctions; |
2147 |
> |
worker.insertions = &insertions; |
2148 |
> |
worker.deletions = &deletions; |
2149 |
> |
worker.rev_deletions = &rev_deletions; |
2150 |
> |
worker.fusions = &fusions; |
2151 |
> |
worker.coverage = &coverage; |
2152 |
> |
|
2153 |
> |
worker.final_junctions = &vfinal_junctions[i]; |
2154 |
> |
worker.final_insertions = &vfinal_insertions[i]; |
2155 |
> |
worker.final_deletions = &vfinal_deletions[i]; |
2156 |
> |
worker.final_fusions = &vfinal_fusions[i]; |
2157 |
> |
worker.rt = &rt; |
2158 |
> |
|
2159 |
> |
worker.right_reads_offset = 0; |
2160 |
> |
worker.right_map_offset = 0; |
2161 |
> |
|
2162 |
> |
if (i == 0) |
2163 |
|
{ |
2164 |
< |
print_usage(); |
2165 |
< |
return 1; |
2164 |
> |
worker.begin_id = 0; |
2165 |
> |
worker.left_reads_offset = 0; |
2166 |
> |
worker.left_map_offset = 0; |
2167 |
|
} |
2168 |
< |
|
1117 |
< |
string left_map_filename = argv[optind++]; |
1118 |
< |
|
1119 |
< |
if(optind >= argc) |
2168 |
> |
else |
2169 |
|
{ |
2170 |
< |
print_usage(); |
2171 |
< |
return 1; |
2170 |
> |
size_t offsets_size = offsets[i-1].size(); |
2171 |
> |
|
2172 |
> |
worker.begin_id = read_ids[i-1]; |
2173 |
> |
worker.left_reads_offset = offsets[i-1][offsets_size - 2]; |
2174 |
> |
worker.left_map_offset = offsets[i-1].back(); |
2175 |
> |
if (offsets_size == 4) |
2176 |
> |
{ |
2177 |
> |
worker.right_reads_offset = offsets[i-1][0]; |
2178 |
> |
worker.right_map_offset = offsets[i-1][1]; |
2179 |
> |
} |
2180 |
|
} |
2181 |
< |
//FZPipe left_map_file; |
2182 |
< |
//string unbamcmd=getBam2SamCmd(left_map_filename); |
1126 |
< |
//left_map_file.openRead(left_map_filename, unbamcmd); |
1127 |
< |
string left_reads_filename = argv[optind++]; |
1128 |
< |
string unzcmd=getUnpackCmd(left_reads_filename, false); |
1129 |
< |
|
1130 |
< |
string right_map_filename; |
1131 |
< |
string right_reads_filename; |
1132 |
< |
FZPipe right_reads_file; |
1133 |
< |
string right_um_filename; |
1134 |
< |
FZPipe right_um_file; |
2181 |
> |
|
2182 |
> |
worker.end_id = (i+1 < num_threads) ? read_ids[i] : std::numeric_limits<uint64_t>::max(); |
2183 |
|
|
2184 |
< |
if (optind < argc) |
2185 |
< |
{ |
2186 |
< |
right_map_filename = argv[optind++]; |
2187 |
< |
if(optind >= argc) { |
2188 |
< |
print_usage(); |
1141 |
< |
return 1; |
1142 |
< |
} |
1143 |
< |
//right_map_file.openRead(right_map_filename, unbamcmd); |
1144 |
< |
//if (optind<argc) { |
1145 |
< |
right_reads_filename=argv[optind++]; |
1146 |
< |
right_reads_file.openRead(right_reads_filename,unzcmd); |
1147 |
< |
right_um_filename=output_dir+"/unmapped_right.fq"; |
1148 |
< |
if (!zpacker.empty()) right_um_filename+=".z"; |
1149 |
< |
if (right_um_file.openWrite(right_um_filename.c_str(), zpacker)==NULL) |
1150 |
< |
err_die("Error: cannot open file %s for writing!\n",right_um_filename.c_str()); |
2184 |
> |
if (num_threads > 1 && i + 1 < num_threads) |
2185 |
> |
threads.push_back(new boost::thread(worker)); |
2186 |
> |
else |
2187 |
> |
worker(); |
2188 |
> |
} |
2189 |
|
|
2190 |
< |
// } |
2191 |
< |
} |
2192 |
< |
FILE* junctions_file = fopen(junctions_file_name.c_str(), "w"); |
2193 |
< |
if (junctions_file == NULL) |
2194 |
< |
{ |
2195 |
< |
fprintf(stderr, "Error: cannot open BED file %s for writing\n", |
2196 |
< |
junctions_file_name.c_str()); |
2197 |
< |
exit(1); |
2198 |
< |
} |
2199 |
< |
|
2200 |
< |
FILE* insertions_file = fopen(insertions_file_name.c_str(), "w"); |
2201 |
< |
if (insertions_file == NULL) |
2190 |
> |
for (size_t i = 0; i < threads.size(); ++i) |
2191 |
> |
{ |
2192 |
> |
threads[i]->join(); |
2193 |
> |
delete threads[i]; |
2194 |
> |
threads[i] = NULL; |
2195 |
> |
} |
2196 |
> |
threads.clear(); |
2197 |
> |
|
2198 |
> |
JunctionSet& final_junctions = vfinal_junctions[0]; |
2199 |
> |
InsertionSet& final_insertions = vfinal_insertions[0]; |
2200 |
> |
DeletionSet& final_deletions = vfinal_deletions[0]; |
2201 |
> |
FusionSet& final_fusions = vfinal_fusions[0]; |
2202 |
> |
for (int i = 1; i < num_threads; ++i) |
2203 |
> |
{ |
2204 |
> |
merge_with(final_junctions, vfinal_junctions[i]); |
2205 |
> |
vfinal_junctions[i].clear(); |
2206 |
> |
|
2207 |
> |
merge_with(final_insertions, vfinal_insertions[i]); |
2208 |
> |
vfinal_insertions[i].clear(); |
2209 |
> |
|
2210 |
> |
merge_with(final_deletions, vfinal_deletions[i]); |
2211 |
> |
vfinal_deletions[i].clear(); |
2212 |
> |
|
2213 |
> |
merge_with(final_fusions, vfinal_fusions[i]); |
2214 |
> |
vfinal_fusions[i].clear(); |
2215 |
> |
} |
2216 |
> |
|
2217 |
> |
fprintf (stderr, "Reporting final accepted alignments..."); |
2218 |
> |
fprintf (stderr, "done.\n"); |
2219 |
> |
|
2220 |
> |
//small_overhangs = 0; |
2221 |
> |
for (JunctionSet::iterator i = final_junctions.begin(); i != final_junctions.end();) |
2222 |
> |
{ |
2223 |
> |
if (i->second.supporting_hits == 0 || i->second.left_extent < 8 || i->second.right_extent < 8) |
2224 |
|
{ |
2225 |
< |
fprintf(stderr, "Error: cannot open VCF file %s for writing\n", |
1166 |
< |
insertions_file_name.c_str()); |
1167 |
< |
exit(1); |
2225 |
> |
final_junctions.erase(i++); |
2226 |
|
} |
2227 |
< |
|
1170 |
< |
|
1171 |
< |
FILE* deletions_file = fopen(deletions_file_name.c_str(), "w"); |
1172 |
< |
if (deletions_file == NULL) |
2227 |
> |
else |
2228 |
|
{ |
2229 |
< |
fprintf(stderr, "Error: cannot open VCF file %s for writing\n", |
1175 |
< |
deletions_file_name.c_str()); |
1176 |
< |
exit(1); |
2229 |
> |
++i; |
2230 |
|
} |
2231 |
< |
bool uncompressed_bam=(accepted_hits_file_name=="-"); |
2232 |
< |
GBamWriter bam_writer(accepted_hits_file_name.c_str(), sam_header.c_str(), uncompressed_bam); |
2231 |
> |
} |
2232 |
> |
|
2233 |
> |
// if (small_overhangs > 0) |
2234 |
> |
// fprintf(stderr, "Warning: %lu small overhang junctions!\n", small_overhangs); |
2235 |
> |
|
2236 |
> |
fprintf (stderr, "Printing junction BED track..."); |
2237 |
> |
print_junctions(junctions_out, final_junctions, rt); |
2238 |
> |
fprintf (stderr, "done\n"); |
2239 |
> |
|
2240 |
> |
fprintf (stderr, "Printing insertions..."); |
2241 |
> |
print_insertions(insertions_out, final_insertions,rt); |
2242 |
> |
fclose(insertions_out); |
2243 |
> |
fprintf (stderr, "done\n"); |
2244 |
> |
|
2245 |
> |
fprintf (stderr, "Printing deletions..."); |
2246 |
> |
print_deletions(deletions_out, final_deletions, rt); |
2247 |
> |
fclose(deletions_out); |
2248 |
> |
fprintf (stderr, "done\n"); |
2249 |
|
|
2250 |
< |
FZPipe left_reads_file(left_reads_filename, unzcmd); |
2251 |
< |
if (left_reads_file.file==NULL) |
2252 |
< |
{ |
2253 |
< |
fprintf(stderr, "Error: cannot open reads file %s for reading\n", |
2254 |
< |
left_reads_filename.c_str()); |
2255 |
< |
exit(1); |
2256 |
< |
} |
2257 |
< |
string left_um_filename(output_dir+"/unmapped_left.fq"); |
2258 |
< |
if (!zpacker.empty()) left_um_filename+=".z"; |
1190 |
< |
FZPipe left_um_file; |
1191 |
< |
if (left_um_file.openWrite(left_um_filename.c_str(), zpacker)==NULL) |
1192 |
< |
err_die("Error: cannot open file %s for writing!\n",left_um_filename.c_str()); |
1193 |
< |
|
1194 |
< |
driver(bam_writer, left_map_filename, |
1195 |
< |
left_reads_file.file, |
1196 |
< |
right_map_filename, |
1197 |
< |
right_reads_file.file, |
1198 |
< |
junctions_file, |
1199 |
< |
insertions_file, |
1200 |
< |
deletions_file, |
1201 |
< |
left_um_file.file, |
1202 |
< |
right_um_file.file); |
1203 |
< |
left_um_file.close(); |
1204 |
< |
right_um_file.close(); |
1205 |
< |
return 0; |
2250 |
> |
if (fusion_search) |
2251 |
> |
{ |
2252 |
> |
fprintf (stderr, "Printing fusions..."); |
2253 |
> |
print_fusions(fusions_out, final_fusions, rt); |
2254 |
> |
fclose(fusions_out); |
2255 |
> |
fprintf (stderr, "done\n"); |
2256 |
> |
} |
2257 |
> |
|
2258 |
> |
fprintf(stderr, "Found %lu junctions from happy spliced reads\n", (long unsigned int)final_junctions.size()); |
2259 |
|
} |
2260 |
|
|
2261 |
+ |
void print_usage() |
2262 |
+ |
{ |
2263 |
+ |
fprintf(stderr, "Usage: tophat_reports <junctions.bed> <insertions.vcf> <deletions.vcf> <accepted_hits.sam> <left_map1,...,left_mapN> <left_reads.fq> [right_map1,...,right_mapN] [right_reads.fq]\n"); |
2264 |
+ |
|
2265 |
+ |
// fprintf(stderr, "Usage: tophat_reports <coverage.wig> <junctions.bed> <accepted_hits.sam> <map1.bwtout> [splice_map1.sbwtout]\n"); |
2266 |
+ |
} |
2267 |
|
|
2268 |
+ |
int main(int argc, char** argv) |
2269 |
+ |
{ |
2270 |
+ |
fprintf(stderr, "tophat_reports v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION); |
2271 |
+ |
fprintf(stderr, "---------------------------------------\n"); |
2272 |
+ |
|
2273 |
+ |
reads_format = FASTQ; |
2274 |
+ |
|
2275 |
+ |
int parse_ret = parse_options(argc, argv, print_usage); |
2276 |
+ |
if (parse_ret) |
2277 |
+ |
return parse_ret; |
2278 |
+ |
|
2279 |
+ |
if(optind >= argc) |
2280 |
+ |
{ |
2281 |
+ |
print_usage(); |
2282 |
+ |
return 1; |
2283 |
+ |
} |
2284 |
+ |
|
2285 |
+ |
string ref_file_name = argv[optind++]; |
2286 |
+ |
|
2287 |
+ |
if(optind >= argc) |
2288 |
+ |
{ |
2289 |
+ |
print_usage(); |
2290 |
+ |
return 1; |
2291 |
+ |
} |
2292 |
+ |
|
2293 |
+ |
string junctions_file_name = argv[optind++]; |
2294 |
+ |
|
2295 |
+ |
if(optind >= argc) |
2296 |
+ |
{ |
2297 |
+ |
print_usage(); |
2298 |
+ |
return 1; |
2299 |
+ |
} |
2300 |
+ |
|
2301 |
+ |
string insertions_file_name = argv[optind++]; |
2302 |
+ |
if(optind >= argc) |
2303 |
+ |
{ |
2304 |
+ |
print_usage(); |
2305 |
+ |
return 1; |
2306 |
+ |
} |
2307 |
+ |
|
2308 |
+ |
string deletions_file_name = argv[optind++]; |
2309 |
+ |
if(optind >= argc) |
2310 |
+ |
{ |
2311 |
+ |
print_usage(); |
2312 |
+ |
return 1; |
2313 |
+ |
} |
2314 |
+ |
|
2315 |
+ |
string fusions_file_name = argv[optind++]; |
2316 |
+ |
if(optind >= argc) |
2317 |
+ |
{ |
2318 |
+ |
print_usage(); |
2319 |
+ |
return 1; |
2320 |
+ |
} |
2321 |
+ |
|
2322 |
+ |
string accepted_hits_file_name = argv[optind++]; |
2323 |
+ |
if(optind >= argc) |
2324 |
+ |
{ |
2325 |
+ |
print_usage(); |
2326 |
+ |
return 1; |
2327 |
+ |
} |
2328 |
+ |
|
2329 |
+ |
string left_map_filename = argv[optind++]; |
2330 |
+ |
if(optind >= argc) |
2331 |
+ |
{ |
2332 |
+ |
print_usage(); |
2333 |
+ |
return 1; |
2334 |
+ |
} |
2335 |
+ |
|
2336 |
+ |
string left_reads_filename = argv[optind++]; |
2337 |
+ |
string unzcmd=getUnpackCmd(left_reads_filename, false); |
2338 |
+ |
string right_map_filename; |
2339 |
+ |
string right_reads_filename; |
2340 |
+ |
|
2341 |
+ |
if (optind < argc) |
2342 |
+ |
{ |
2343 |
+ |
right_map_filename = argv[optind++]; |
2344 |
+ |
if(optind >= argc) { |
2345 |
+ |
print_usage(); |
2346 |
+ |
return 1; |
2347 |
+ |
} |
2348 |
+ |
right_reads_filename=argv[optind++]; |
2349 |
+ |
} |
2350 |
+ |
|
2351 |
+ |
ifstream ref_stream(ref_file_name.c_str(), ifstream::in); |
2352 |
+ |
if (!ref_stream.good()) |
2353 |
+ |
{ |
2354 |
+ |
fprintf(stderr, "Error: cannot open %s for reading\n", |
2355 |
+ |
ref_file_name.c_str()); |
2356 |
+ |
exit(1); |
2357 |
+ |
} |
2358 |
+ |
|
2359 |
+ |
FILE* junctions_file = fopen(junctions_file_name.c_str(), "w"); |
2360 |
+ |
if (junctions_file == NULL) |
2361 |
+ |
{ |
2362 |
+ |
fprintf(stderr, "Error: cannot open BED file %s for writing\n", |
2363 |
+ |
junctions_file_name.c_str()); |
2364 |
+ |
exit(1); |
2365 |
+ |
} |
2366 |
+ |
|
2367 |
+ |
FILE* insertions_file = fopen(insertions_file_name.c_str(), "w"); |
2368 |
+ |
if (insertions_file == NULL) |
2369 |
+ |
{ |
2370 |
+ |
fprintf(stderr, "Error: cannot open VCF file %s for writing\n", |
2371 |
+ |
insertions_file_name.c_str()); |
2372 |
+ |
exit(1); |
2373 |
+ |
} |
2374 |
+ |
|
2375 |
+ |
FILE* deletions_file = fopen(deletions_file_name.c_str(), "w"); |
2376 |
+ |
if (deletions_file == NULL) |
2377 |
+ |
{ |
2378 |
+ |
fprintf(stderr, "Error: cannot open VCF file %s for writing\n", |
2379 |
+ |
deletions_file_name.c_str()); |
2380 |
+ |
exit(1); |
2381 |
+ |
} |
2382 |
+ |
|
2383 |
+ |
FILE* fusions_file = NULL; |
2384 |
+ |
if (fusion_search) |
2385 |
+ |
{ |
2386 |
+ |
fusions_file = fopen(fusions_file_name.c_str(), "w"); |
2387 |
+ |
if (fusions_file == NULL) |
2388 |
+ |
{ |
2389 |
+ |
fprintf(stderr, "Error: cannot open VCF file %s for writing\n", |
2390 |
+ |
fusions_file_name.c_str()); |
2391 |
+ |
exit(1); |
2392 |
+ |
} |
2393 |
+ |
} |
2394 |
+ |
|
2395 |
+ |
driver(accepted_hits_file_name, |
2396 |
+ |
ref_stream, |
2397 |
+ |
left_map_filename, |
2398 |
+ |
left_reads_filename, |
2399 |
+ |
right_map_filename, |
2400 |
+ |
right_reads_filename, |
2401 |
+ |
junctions_file, |
2402 |
+ |
insertions_file, |
2403 |
+ |
deletions_file, |
2404 |
+ |
fusions_file); |
2405 |
+ |
|
2406 |
+ |
return 0; |
2407 |
+ |
} |