1 |
/* |
2 |
* closures.cpp |
3 |
* TopHat |
4 |
* |
5 |
* Created by Cole Trapnell on 1/15/09. |
6 |
* Copyright 2009 Cole Trapnell. All rights reserved. |
7 |
* |
8 |
*/ |
9 |
|
10 |
#ifdef HAVE_CONFIG_H |
11 |
#include <config.h> |
12 |
#endif |
13 |
|
14 |
#include <iostream> |
15 |
#include <fstream> |
16 |
#include <getopt.h> |
17 |
#include "bwt_map.h" |
18 |
#include "inserts.h" |
19 |
#include "closures.h" |
20 |
#include "reads.h" |
21 |
#include "tokenize.h" |
22 |
#include <seqan/sequence.h> |
23 |
#include <seqan/file.h> |
24 |
|
25 |
using namespace seqan; |
26 |
using namespace std; |
27 |
|
28 |
|
29 |
bool possible_cotranscript(const BowtieHit& h1, const BowtieHit& h2, bool check_strand = true) |
30 |
{ |
31 |
if (h1.insert_id() != h2.insert_id()) |
32 |
return false; |
33 |
int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev; |
34 |
if (max_mate_inner_dist == -1) |
35 |
{ |
36 |
max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev; |
37 |
} |
38 |
|
39 |
InsertAlignmentGrade grade(h1,h2, min_mate_inner_dist, max_mate_inner_dist); |
40 |
return (!grade.too_far && !grade.too_close && grade.opposite_strands); |
41 |
} |
42 |
|
43 |
|
44 |
void check_mates(const HitList& hits1_in_ref, |
45 |
const HitList& hits2_in_ref, |
46 |
vector<pair<size_t, size_t> >& happy_mates, |
47 |
vector<size_t>& map1_singletons, |
48 |
vector<size_t>& map2_singletons) |
49 |
{ |
50 |
std::set<size_t> marked; |
51 |
// TODO: if this shows up on the profile, replace it with a linear |
52 |
// time algorithm. This one is 2*n*lg(n). |
53 |
HitList::const_iterator last_good = hits2_in_ref.begin(); |
54 |
|
55 |
for (size_t i = 0; i < hits1_in_ref.size(); ++i) |
56 |
{ |
57 |
pair<HitList::const_iterator, HitList::const_iterator> range_pair; |
58 |
range_pair = equal_range(last_good, hits2_in_ref.end(), |
59 |
hits1_in_ref[i], hit_insert_id_lt); |
60 |
bool found_hit = false; |
61 |
if (range_pair.first != range_pair.second) |
62 |
last_good = range_pair.first; |
63 |
for (HitList::const_iterator f = range_pair.first; |
64 |
f != range_pair.second; |
65 |
++f) |
66 |
{ |
67 |
if (possible_cotranscript(hits1_in_ref[i], *f)) |
68 |
{ |
69 |
happy_mates.push_back(make_pair(i,f - hits2_in_ref.begin())); |
70 |
marked.insert(f - hits2_in_ref.begin()); |
71 |
found_hit = true; |
72 |
} |
73 |
} |
74 |
if (!found_hit) |
75 |
map1_singletons.push_back(i); |
76 |
} |
77 |
|
78 |
for (size_t i = 0; i < hits2_in_ref.size(); ++i) |
79 |
{ |
80 |
if (marked.find(i) == marked.end()) |
81 |
{ |
82 |
map2_singletons.push_back(i); |
83 |
} |
84 |
} |
85 |
} |
86 |
|
87 |
bool prefer_shorter_pairs = true; |
88 |
|
89 |
typedef pair<InsertAlignmentGrade, vector<InsertAlignment> > BestPairingHits; |
90 |
|
91 |
template<typename Visitor> |
92 |
void visit_best_pairing(HitsForRead& left_hit_group, |
93 |
HitsForRead& right_hit_group, |
94 |
Visitor& visitor) |
95 |
{ |
96 |
BestPairingHits insert_best; |
97 |
|
98 |
for (size_t i = 0; i < left_hit_group.hits.size(); ++i) |
99 |
{ |
100 |
BowtieHit& h1 = left_hit_group.hits[i]; |
101 |
|
102 |
for (size_t j = 0; j < right_hit_group.hits.size(); j++) |
103 |
{ |
104 |
BowtieHit& h2 = right_hit_group.hits[j]; |
105 |
if (h1.ref_id() != h2.ref_id()) |
106 |
continue; |
107 |
|
108 |
uint32_t refid = h1.ref_id(); |
109 |
|
110 |
int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev; |
111 |
if (max_mate_inner_dist == -1) |
112 |
{ |
113 |
max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev; |
114 |
} |
115 |
InsertAlignmentGrade s(h1, h2, min_mate_inner_dist, max_mate_inner_dist); |
116 |
|
117 |
//pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best |
118 |
// = best_status_for_inserts[curr_left_obs_order]; |
119 |
InsertAlignmentGrade& current = insert_best.first; |
120 |
// Is the new status better than the current best one? |
121 |
if (current < s) |
122 |
{ |
123 |
insert_best.second.clear(); |
124 |
current = s; |
125 |
insert_best.second.push_back(InsertAlignment(refid, &h1, &h2)); |
126 |
} |
127 |
else if (! (s < current)) |
128 |
{ |
129 |
if (prefer_shorter_pairs && current.num_mapped == 2) |
130 |
{ |
131 |
pair<int, int> dc = pair_distances(*(insert_best.second[0].left_alignment), *(insert_best.second[0].right_alignment)); |
132 |
pair<int, int> ds = pair_distances(h1,h2); |
133 |
if (ds.second < dc.second) |
134 |
{ |
135 |
//chucked_for_shorter_pair += insert_best.second.size(); |
136 |
insert_best.second.clear(); |
137 |
current = s; |
138 |
insert_best.second.push_back(InsertAlignment(refid, &h1, &h2)); |
139 |
|
140 |
} |
141 |
} |
142 |
else |
143 |
{ |
144 |
insert_best.second.push_back(InsertAlignment(refid, &h1, &h2)); |
145 |
} |
146 |
} |
147 |
} |
148 |
} |
149 |
|
150 |
visitor.visit(insert_best); |
151 |
} |
152 |
|
153 |
class CovMapIntronFinder |
154 |
{ |
155 |
std::set<SpliceMotif> _tmp_hits; |
156 |
vector<SpliceMotif> _hits; |
157 |
RefSequenceTable::Sequence* _ref_seq; |
158 |
public: |
159 |
CovMapIntronFinder() : _ref_seq(NULL) {} |
160 |
CovMapIntronFinder(RefSequenceTable::Sequence* ref_seq) : |
161 |
_ref_seq(ref_seq), |
162 |
_lms(vector<bool>(length(*ref_seq), false)), |
163 |
_rms(vector<bool>(length(*ref_seq), false)){} |
164 |
|
165 |
void add_motifs_in_window(int left, |
166 |
int right, |
167 |
const string& left_motif, |
168 |
const string& right_motif) |
169 |
{ |
170 |
size_t l_start = max(0,left); |
171 |
for (int i = l_start; |
172 |
i < min(right, (int)length(*_ref_seq) - 2); |
173 |
++i) |
174 |
{ |
175 |
seqan::Infix<RefSequenceTable::Sequence>::Type curr |
176 |
= seqan::infix(*_ref_seq,i, i + 2); |
177 |
if (curr == left_motif) |
178 |
_lms[i] = true; |
179 |
else if (curr == right_motif) |
180 |
_rms[i] = true; |
181 |
} |
182 |
} |
183 |
|
184 |
void finalize() |
185 |
{ |
186 |
int pos = 0; |
187 |
for (vector<bool>::iterator itr = _lms.begin(); |
188 |
itr != _lms.end(); |
189 |
++itr, ++pos) |
190 |
{ |
191 |
if (*itr) |
192 |
_hits.push_back(SpliceMotif(pos, true)); |
193 |
} |
194 |
|
195 |
pos = 0; |
196 |
for (vector<bool>::iterator itr = _rms.begin(); |
197 |
itr != _rms.end(); |
198 |
++itr, ++pos) |
199 |
{ |
200 |
if (*itr) |
201 |
_hits.push_back(SpliceMotif(pos, false)); |
202 |
} |
203 |
|
204 |
sort(_hits.begin(), _hits.end()); |
205 |
} |
206 |
|
207 |
size_t seq_len() const { return length(*_ref_seq); } |
208 |
const vector<SpliceMotif>& hits() const { return _hits; } |
209 |
vector<bool> _lms; |
210 |
vector<bool> _rms; |
211 |
}; |
212 |
|
213 |
typedef CovMapIntronFinder CIF; |
214 |
|
215 |
struct RefCIF |
216 |
{ |
217 |
RefCIF() : ref_seq(NULL) {} |
218 |
RefCIF(const CIF& f, const CIF& r, RefSequenceTable::Sequence* s) : |
219 |
fwd_cif(f), rev_cif(r), ref_seq(s) {} |
220 |
CIF fwd_cif; |
221 |
CIF rev_cif; |
222 |
RefSequenceTable::Sequence* ref_seq; |
223 |
}; |
224 |
|
225 |
class CoverageMapVisitor |
226 |
{ |
227 |
public: |
228 |
map<uint32_t, RefCIF> finders; |
229 |
|
230 |
CoverageMapVisitor(istream& ref_stream, |
231 |
RefSequenceTable& rt) |
232 |
{ |
233 |
|
234 |
while(ref_stream.good() && |
235 |
!ref_stream.eof()) |
236 |
{ |
237 |
RefSequenceTable::Sequence* ref_str = new RefSequenceTable::Sequence; |
238 |
string name; |
239 |
readMeta(ref_stream, name, Fasta()); |
240 |
string::size_type space_pos = name.find_first_of(" \t\r"); |
241 |
if (space_pos != string::npos) |
242 |
{ |
243 |
name.resize(space_pos); |
244 |
} |
245 |
read(ref_stream, *ref_str, Fasta()); |
246 |
|
247 |
uint32_t ref_id = rt.get_id(name, NULL, 0); |
248 |
finders[ref_id] = RefCIF(CIF(ref_str), CIF(ref_str), ref_str); |
249 |
} |
250 |
} |
251 |
|
252 |
void visit(BestPairingHits& pairings) |
253 |
{ |
254 |
if (!pairings.first.num_mapped == 2) |
255 |
return; |
256 |
|
257 |
static string fwd_lm("GT"); |
258 |
static string fwd_rm("AG"); |
259 |
static string rev_lm("CT"); |
260 |
static string rev_rm("AC"); |
261 |
|
262 |
for (size_t i = 0; |
263 |
i < pairings.second.size(); |
264 |
++i) |
265 |
{ |
266 |
|
267 |
InsertAlignment& al = pairings.second[i]; |
268 |
|
269 |
BowtieHit& bh_left = *(al.left_alignment); |
270 |
BowtieHit& bh_right = *(al.right_alignment); |
271 |
|
272 |
assert (bh_left.ref_id() == bh_right.ref_id()); |
273 |
|
274 |
map<uint32_t, RefCIF >::iterator if_itr = finders.find(bh_left.ref_id()); |
275 |
assert (if_itr != finders.end()); |
276 |
|
277 |
RefCIF& ref_finders = if_itr->second; |
278 |
|
279 |
pair<int, int> ds = pair_distances(bh_left, bh_right); |
280 |
|
281 |
int minor_hit_start, major_hit_start; |
282 |
int minor_hit_end, major_hit_end; |
283 |
if (bh_left.left() < bh_right.left()) |
284 |
{ |
285 |
minor_hit_start = (int)bh_left.left(); |
286 |
minor_hit_end = (int)bh_left.right(); |
287 |
major_hit_start = (int)bh_right.left(); |
288 |
major_hit_end = (int)bh_right.right(); |
289 |
} |
290 |
else |
291 |
{ |
292 |
minor_hit_start = (int)bh_right.left(); |
293 |
minor_hit_end = (int)bh_right.right(); |
294 |
major_hit_start = (int)bh_left.left(); |
295 |
major_hit_end = (int)bh_left.right(); |
296 |
} |
297 |
|
298 |
int inner_dist = major_hit_start - minor_hit_end; |
299 |
|
300 |
bool skip_fwd = false; |
301 |
bool skip_rev = false; |
302 |
|
303 |
if (library_type == FR_FIRSTSTRAND) |
304 |
{ |
305 |
if (bh_left.antisense_align()) skip_rev = true; |
306 |
else skip_fwd = true; |
307 |
} |
308 |
|
309 |
if (library_type == FR_SECONDSTRAND) |
310 |
{ |
311 |
if (bh_left.antisense_align()) skip_fwd = true; |
312 |
else skip_rev = true; |
313 |
} |
314 |
|
315 |
if (inner_dist > inner_dist_mean + 2 * inner_dist_std_dev) |
316 |
{ |
317 |
// Forward strand |
318 |
if (!skip_fwd) |
319 |
{ |
320 |
ref_finders.fwd_cif.add_motifs_in_window((int)bh_left.left() - 10, |
321 |
bh_left.right() + 10, |
322 |
fwd_lm, |
323 |
fwd_rm); |
324 |
|
325 |
ref_finders.fwd_cif.add_motifs_in_window((int)bh_right.left() - 10, |
326 |
bh_right.right() + 10, |
327 |
fwd_lm, |
328 |
fwd_rm); |
329 |
} |
330 |
|
331 |
// Reverse strand |
332 |
if (!skip_rev) |
333 |
{ |
334 |
ref_finders.rev_cif.add_motifs_in_window((int)bh_left.left() - 10, |
335 |
bh_left.right() + 10, |
336 |
rev_lm, |
337 |
rev_rm); |
338 |
|
339 |
ref_finders.rev_cif.add_motifs_in_window((int)bh_right.left() - 10, |
340 |
bh_right.right() + 10, |
341 |
rev_lm, |
342 |
rev_rm); |
343 |
} |
344 |
} |
345 |
} |
346 |
} |
347 |
|
348 |
void finalize() |
349 |
{ |
350 |
for (map<uint32_t, RefCIF >::iterator itr = finders.begin(); |
351 |
itr != finders.end(); |
352 |
++itr) |
353 |
{ |
354 |
itr->second.fwd_cif.finalize(); |
355 |
itr->second.rev_cif.finalize(); |
356 |
delete itr->second.ref_seq; |
357 |
itr->second.ref_seq = NULL; |
358 |
} |
359 |
} |
360 |
}; |
361 |
|
362 |
|
363 |
class JunctionMapVisitor |
364 |
{ |
365 |
public: |
366 |
typedef JunctionFinder<RefSequenceTable::Sequence, CIF> JF; |
367 |
|
368 |
struct JunctionTable |
369 |
{ |
370 |
JunctionTable() : jf(NULL), possible_splices(NULL) {} |
371 |
JunctionTable(ClosureJunctionSet* ps, JF* _jf, bool as) |
372 |
: jf(_jf), possible_splices(ps), antisense(as) {} |
373 |
|
374 |
JF* jf; |
375 |
ClosureJunctionSet* possible_splices; |
376 |
bool antisense; |
377 |
}; |
378 |
|
379 |
|
380 |
map<uint32_t, pair<JunctionTable, JunctionTable> > _finders; |
381 |
|
382 |
JunctionMapVisitor(ClosureJunctionSet& fwd_splices, |
383 |
ClosureJunctionSet& rev_splices, |
384 |
map<uint32_t, RefCIF >& finders) |
385 |
{ |
386 |
static const uint32_t bowtie_padding = 5; |
387 |
for (map<uint32_t, RefCIF >::iterator itr = finders.begin(); |
388 |
itr != finders.end(); |
389 |
++itr) |
390 |
{ |
391 |
JF* fwd_jf = new JF(itr->second.fwd_cif, |
392 |
inner_dist_mean, |
393 |
inner_dist_std_dev, |
394 |
min_closure_intron_length, |
395 |
min_closure_exon_length, |
396 |
bowtie_padding, |
397 |
island_extension); |
398 |
JF* rev_jf = new JF(itr->second.rev_cif, |
399 |
inner_dist_mean, |
400 |
inner_dist_std_dev, |
401 |
min_closure_intron_length, |
402 |
min_closure_exon_length, |
403 |
bowtie_padding, |
404 |
island_extension); |
405 |
|
406 |
_finders[itr->first] = make_pair(JunctionTable(&fwd_splices, fwd_jf,false), |
407 |
JunctionTable(&rev_splices, rev_jf, true)); |
408 |
} |
409 |
} |
410 |
|
411 |
|
412 |
void visit(BestPairingHits& pairings) |
413 |
{ |
414 |
if (!pairings.first.num_mapped == 2) |
415 |
return; |
416 |
for (size_t i = 0; |
417 |
i < pairings.second.size(); |
418 |
++i) |
419 |
{ |
420 |
|
421 |
InsertAlignment& al = pairings.second[i]; |
422 |
|
423 |
BowtieHit& bh_left = *(al.left_alignment); |
424 |
BowtieHit& bh_right = *(al.right_alignment); |
425 |
|
426 |
assert (bh_left.ref_id() == bh_right.ref_id()); |
427 |
|
428 |
map<uint32_t, pair<JunctionTable, JunctionTable> >::iterator if_itr = _finders.find(bh_left.ref_id()); |
429 |
assert (if_itr != _finders.end()); |
430 |
|
431 |
JF& fwd_jf = *(if_itr->second.first.jf); |
432 |
fwd_jf.possible_junctions(*(if_itr->second.first.possible_splices), bh_left, bh_right); |
433 |
|
434 |
JF& rev_jf = *(if_itr->second.second.jf); |
435 |
rev_jf.possible_junctions(*(if_itr->second.second.possible_splices), bh_left, bh_right); |
436 |
} |
437 |
} |
438 |
}; |
439 |
|
440 |
void closure_driver(vector<FZPipe>& map1, |
441 |
vector<FZPipe>& map2, |
442 |
ifstream& ref_stream, |
443 |
FILE* juncs_file) |
444 |
{ |
445 |
typedef RefSequenceTable::Sequence Reference; |
446 |
|
447 |
ReadTable it; |
448 |
RefSequenceTable rt(true); |
449 |
|
450 |
BowtieHitFactory hit_factory(it,rt); |
451 |
|
452 |
fprintf (stderr, "Finding near-covered motifs..."); |
453 |
CoverageMapVisitor cov_map_visitor(ref_stream, rt); |
454 |
uint32_t coverage_attempts = 0; |
455 |
|
456 |
assert(map1.size() == map2.size()); |
457 |
for (size_t num = 0; num < map1.size(); ++num) |
458 |
{ |
459 |
HitStream left_hs(map1[num].file, &hit_factory, false, true, false); |
460 |
HitStream right_hs(map2[num].file, &hit_factory, false, true, false); |
461 |
|
462 |
HitsForRead curr_left_hit_group; |
463 |
HitsForRead curr_right_hit_group; |
464 |
|
465 |
left_hs.next_read_hits(curr_left_hit_group); |
466 |
right_hs.next_read_hits(curr_right_hit_group); |
467 |
|
468 |
uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
469 |
uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
470 |
|
471 |
while(curr_left_obs_order != 0xFFFFFFFF && |
472 |
curr_right_obs_order != 0xFFFFFFFF) |
473 |
{ |
474 |
while (curr_left_obs_order < curr_right_obs_order&& |
475 |
curr_left_obs_order != 0xFFFFFFFF && curr_right_obs_order != 0xFFFFFFFF) |
476 |
{ |
477 |
// Get hit group |
478 |
|
479 |
left_hs.next_read_hits(curr_left_hit_group); |
480 |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
481 |
} |
482 |
|
483 |
while (curr_left_obs_order > curr_right_obs_order && |
484 |
curr_left_obs_order != 0xFFFFFFFF && curr_right_obs_order != 0xFFFFFFFF) |
485 |
{ |
486 |
// Get hit group |
487 |
|
488 |
right_hs.next_read_hits(curr_right_hit_group); |
489 |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
490 |
} |
491 |
|
492 |
while (curr_left_obs_order == curr_right_obs_order && |
493 |
curr_left_obs_order != 0xFFFFFFFF && curr_right_obs_order != 0xFFFFFFFF) |
494 |
{ |
495 |
if (coverage_attempts++ % 1000 == 0) |
496 |
fprintf (stderr, "Adding covered motifs from pair %d\n", coverage_attempts); |
497 |
visit_best_pairing(curr_left_hit_group, curr_right_hit_group, cov_map_visitor); |
498 |
|
499 |
left_hs.next_read_hits(curr_left_hit_group); |
500 |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
501 |
|
502 |
right_hs.next_read_hits(curr_right_hit_group); |
503 |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
504 |
} |
505 |
} |
506 |
} |
507 |
|
508 |
cov_map_visitor.finalize(); |
509 |
fprintf (stderr, "done\n"); |
510 |
|
511 |
ClosureJunctionSet fwd_splices; |
512 |
ClosureJunctionSet rev_splices; |
513 |
|
514 |
JunctionMapVisitor junc_map_visitor(fwd_splices, rev_splices, cov_map_visitor.finders); |
515 |
fprintf (stderr, "Searching for closures..."); |
516 |
uint32_t closure_attempts = 0; |
517 |
|
518 |
for (size_t num = 0; num < map1.size(); ++num) |
519 |
{ |
520 |
map1[num].rewind(); |
521 |
map2[num].rewind(); |
522 |
|
523 |
HitStream left_hs = HitStream(map1[num].file, &hit_factory, false, true, false); |
524 |
HitStream right_hs = HitStream(map2[num].file, &hit_factory, false, true, false); |
525 |
|
526 |
HitsForRead curr_left_hit_group; |
527 |
HitsForRead curr_right_hit_group; |
528 |
|
529 |
left_hs.next_read_hits(curr_left_hit_group); |
530 |
right_hs.next_read_hits(curr_right_hit_group); |
531 |
|
532 |
uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
533 |
uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
534 |
|
535 |
while(curr_left_obs_order != 0xFFFFFFFF && |
536 |
curr_right_obs_order != 0xFFFFFFFF) |
537 |
{ |
538 |
while (curr_left_obs_order < curr_right_obs_order && |
539 |
curr_left_obs_order != 0xFFFFFFFF && curr_right_obs_order != 0xFFFFFFFF) |
540 |
{ |
541 |
// Get hit group |
542 |
|
543 |
left_hs.next_read_hits(curr_left_hit_group); |
544 |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
545 |
} |
546 |
|
547 |
while (curr_left_obs_order > curr_right_obs_order && |
548 |
curr_left_obs_order != 0xFFFFFFFF && curr_right_obs_order != 0xFFFFFFFF) |
549 |
{ |
550 |
// Get hit group |
551 |
|
552 |
right_hs.next_read_hits(curr_right_hit_group); |
553 |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
554 |
} |
555 |
|
556 |
while (curr_left_obs_order == curr_right_obs_order && |
557 |
curr_left_obs_order != 0xFFFFFFFF && curr_right_obs_order != 0xFFFFFFFF) |
558 |
{ |
559 |
if (closure_attempts++ % 1000 == 0) |
560 |
fprintf (stderr, "Trying to close pair %d\n", closure_attempts); |
561 |
visit_best_pairing(curr_left_hit_group, curr_right_hit_group, junc_map_visitor); |
562 |
left_hs.next_read_hits(curr_left_hit_group); |
563 |
curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id); |
564 |
|
565 |
right_hs.next_read_hits(curr_right_hit_group); |
566 |
curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id); |
567 |
} |
568 |
} |
569 |
} |
570 |
|
571 |
for (size_t num = 0; num < map1.size(); ++num) |
572 |
{ |
573 |
map1[num].close(); |
574 |
map2[num].close(); |
575 |
} |
576 |
|
577 |
fprintf(stderr, "%lu Forward strand splices\n", fwd_splices.size()); |
578 |
fprintf(stderr, "%lu Reverse strand splices\n", rev_splices.size()); |
579 |
|
580 |
fprintf (stderr, "done\n"); |
581 |
uint32_t num_potential_splices = 0; |
582 |
fprintf (stderr, "Reporting possible junctions..."); |
583 |
map<uint32_t, pair<JunctionMapVisitor::JunctionTable, JunctionMapVisitor::JunctionTable> >::iterator f_itr; |
584 |
f_itr = junc_map_visitor._finders.begin(); |
585 |
|
586 |
ClosureJunctionSet::iterator j_itr; |
587 |
j_itr = fwd_splices.begin(); |
588 |
while (j_itr != fwd_splices.end()) |
589 |
{ |
590 |
fprintf (juncs_file,"%s\t%u\t%u\t%c\n", |
591 |
rt.get_name(j_itr->refid), |
592 |
j_itr->left,j_itr->right,'+'); |
593 |
++num_potential_splices; |
594 |
++j_itr; |
595 |
} |
596 |
|
597 |
j_itr = rev_splices.begin(); |
598 |
while (j_itr != rev_splices.end()) |
599 |
{ |
600 |
fprintf (juncs_file,"%s\t%u\t%u\t%c\n", |
601 |
rt.get_name(j_itr->refid), |
602 |
j_itr->left,j_itr->right,'-'); |
603 |
++num_potential_splices; |
604 |
++j_itr; |
605 |
} |
606 |
|
607 |
//accept_all_best_hits(best_status_for_inserts); |
608 |
fprintf(stderr, "done\n"); |
609 |
fprintf(stderr, "Searched for closures between %d pairs\n", searched); |
610 |
fprintf(stderr, "Successfully closed %d pairs\n", closed); |
611 |
|
612 |
fprintf(stderr, "Found %d total possible splices\n", num_potential_splices); |
613 |
} |
614 |
|
615 |
void print_usage() |
616 |
{ |
617 |
fprintf(stderr, "Usage: closure_juncs <closure.juncs> <ref.fa> <left_map.bwtout> <right_map.bwtout>\n"); |
618 |
} |
619 |
|
620 |
int main(int argc, char** argv) |
621 |
{ |
622 |
fprintf(stderr, "closure_juncs v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION); |
623 |
fprintf(stderr, "---------------------------\n"); |
624 |
|
625 |
int parse_ret = parse_options(argc, argv, print_usage); |
626 |
if (parse_ret) |
627 |
return parse_ret; |
628 |
|
629 |
if(optind >= argc) |
630 |
{ |
631 |
print_usage(); |
632 |
return 1; |
633 |
} |
634 |
|
635 |
string junctions_file_name = argv[optind++]; |
636 |
|
637 |
if(optind >= argc) |
638 |
{ |
639 |
print_usage(); |
640 |
return 1; |
641 |
} |
642 |
|
643 |
string ref_fasta = argv[optind++]; |
644 |
|
645 |
if(optind >= argc) |
646 |
{ |
647 |
print_usage(); |
648 |
return 1; |
649 |
} |
650 |
|
651 |
string left_file_list = argv[optind++]; |
652 |
vector<string> left_file_names; |
653 |
vector<FZPipe> left_files; |
654 |
tokenize(left_file_list, ",", left_file_names); |
655 |
|
656 |
string unzcmd = getUnpackCmd(left_file_names[0], false); |
657 |
for (size_t i = 0; i < left_file_names.size(); ++i) |
658 |
{ |
659 |
FZPipe seg_file(left_file_names[i], unzcmd); |
660 |
if (seg_file.file == NULL) |
661 |
{ |
662 |
fprintf(stderr, "Error: cannot open file %s for reading\n", |
663 |
left_file_names[i].c_str()); |
664 |
exit(1); |
665 |
} |
666 |
left_files.push_back(seg_file); |
667 |
} |
668 |
|
669 |
if(optind >= argc) |
670 |
{ |
671 |
print_usage(); |
672 |
return 1; |
673 |
} |
674 |
|
675 |
string right_file_list = argv[optind++]; |
676 |
vector<string> right_file_names; |
677 |
vector<FZPipe> right_files; |
678 |
tokenize(right_file_list, ",", right_file_names); |
679 |
for (size_t i = 0; i < right_file_names.size(); ++i) |
680 |
{ |
681 |
FZPipe seg_file(right_file_names[i], unzcmd); |
682 |
if (seg_file.file == NULL) |
683 |
{ |
684 |
fprintf(stderr, "Error: cannot open %s for reading\n", |
685 |
right_file_names[i].c_str()); |
686 |
exit(1); |
687 |
} |
688 |
right_files.push_back(seg_file); |
689 |
} |
690 |
|
691 |
ifstream ref_stream(ref_fasta.c_str(), ifstream::in); |
692 |
|
693 |
FILE* splice_db = fopen(junctions_file_name.c_str(), "w"); |
694 |
if (splice_db == NULL) |
695 |
{ |
696 |
fprintf(stderr, "Error: cannot open junctions file %s for writing\n", |
697 |
junctions_file_name.c_str()); |
698 |
exit(1); |
699 |
} |
700 |
|
701 |
closure_driver(left_files, |
702 |
right_files, |
703 |
ref_stream, |
704 |
splice_db); |
705 |
|
706 |
return 0; |
707 |
} |