ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/closures.cpp
Revision: 154
Committed: Tue Jan 24 02:29:21 2012 UTC (12 years, 7 months ago) by gpertea
File size: 22466 byte(s)
Log Message:
massive update with Daehwan's work

Line File contents
1 /*
2 * closures.cpp
3 * TopHat
4 *
5 * Created by Cole Trapnell on 1/15/09.
6 * Copyright 2009 Cole Trapnell. All rights reserved.
7 *
8 */
9
10 #ifdef HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <iostream>
15 #include <fstream>
16 #include <getopt.h>
17 #include <seqan/sequence.h>
18 #include <seqan/file.h>
19
20 #include "bwt_map.h"
21 #include "inserts.h"
22 #include "closures.h"
23 #include "reads.h"
24 #include "tokenize.h"
25 #include "fusions.h"
26
27 using namespace seqan;
28 using namespace std;
29
30
31 bool possible_cotranscript(const BowtieHit& h1, const BowtieHit& h2, bool check_strand = true)
32 {
33 if (h1.insert_id() != h2.insert_id())
34 return false;
35 int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev;
36 if (max_mate_inner_dist == -1)
37 {
38 max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev;
39 }
40
41 InsertAlignmentGrade grade(h1,h2, min_mate_inner_dist, max_mate_inner_dist);
42 return (!grade.too_far && !grade.too_close && grade.opposite_strands);
43 }
44
45
46 void check_mates(const HitList& hits1_in_ref,
47 const HitList& hits2_in_ref,
48 vector<pair<size_t, size_t> >& happy_mates,
49 vector<size_t>& map1_singletons,
50 vector<size_t>& map2_singletons)
51 {
52 std::set<size_t> marked;
53 // TODO: if this shows up on the profile, replace it with a linear
54 // time algorithm. This one is 2*n*lg(n).
55 HitList::const_iterator last_good = hits2_in_ref.begin();
56
57 for (size_t i = 0; i < hits1_in_ref.size(); ++i)
58 {
59 pair<HitList::const_iterator, HitList::const_iterator> range_pair;
60 range_pair = equal_range(last_good, hits2_in_ref.end(),
61 hits1_in_ref[i], hit_insert_id_lt);
62 bool found_hit = false;
63 if (range_pair.first != range_pair.second)
64 last_good = range_pair.first;
65 for (HitList::const_iterator f = range_pair.first;
66 f != range_pair.second;
67 ++f)
68 {
69 if (possible_cotranscript(hits1_in_ref[i], *f))
70 {
71 happy_mates.push_back(make_pair(i,f - hits2_in_ref.begin()));
72 marked.insert(f - hits2_in_ref.begin());
73 found_hit = true;
74 }
75 }
76 if (!found_hit)
77 map1_singletons.push_back(i);
78 }
79
80 for (size_t i = 0; i < hits2_in_ref.size(); ++i)
81 {
82 if (marked.find(i) == marked.end())
83 {
84 map2_singletons.push_back(i);
85 }
86 }
87 }
88
89 void find_fusion_closure(HitsForRead& left_hits,
90 HitsForRead& right_hits,
91 std::set<Fusion>& fusions)
92 {
93 if (left_hits.hits.size() > 3 || right_hits.hits.size() > 3)
94 return;
95
96 for (size_t left_index = 0; left_index < left_hits.hits.size(); ++left_index)
97 {
98 for (size_t right_index = 0; right_index < right_hits.hits.size(); ++right_index)
99 {
100 BowtieHit* leftHit = &left_hits.hits[left_index];
101 BowtieHit* rightHit = &right_hits.hits[right_index];
102
103 uint32_t dir = FUSION_FF;
104 if (leftHit->antisense_align() != rightHit->antisense_align())
105 dir = FUSION_FF;
106 else if (!leftHit->antisense_align())
107 dir = FUSION_FR;
108 else
109 dir = FUSION_RF;
110
111 if (dir == FUSION_FF && leftHit->antisense_align())
112 {
113 BowtieHit * tmp = leftHit;
114 leftHit = rightHit;
115 rightHit = tmp;
116 }
117
118 uint32_t left, right;
119 if (dir == FUSION_FF || dir == FUSION_FR)
120 left = leftHit->right() - 4;
121 else
122 left = leftHit->left() + 4;
123
124 if (dir == FUSION_FF || dir == FUSION_RF)
125 right = rightHit->left() + 4;
126 else
127 right = rightHit->right() - 4;
128
129 if (leftHit->ref_id() == rightHit->ref_id() && dir == FUSION_FF)
130 {
131 int dist = 0;
132 dist = rightHit->left() - leftHit->right();
133
134 if (dist >= 0 && dist <= (int)fusion_min_dist)
135 continue;
136 }
137
138 uint32_t ref_id1 = leftHit->ref_id();
139 uint32_t ref_id2 = rightHit->ref_id();
140
141 if (dir == FUSION_FR || dir == FUSION_RF)
142 {
143 if ((ref_id2 < ref_id1) || (ref_id1 == ref_id2 && left > right))
144 {
145 uint32_t temp = ref_id1;
146 ref_id1 = ref_id2;
147 ref_id2 = temp;
148
149 temp = left;
150 left = right;
151 right = temp;
152 }
153 }
154
155 fusions.insert(Fusion(ref_id1, ref_id2, left, right, dir));
156 }
157 }
158 }
159
160 bool prefer_shorter_pairs = true;
161
162 typedef pair<InsertAlignmentGrade, vector<InsertAlignment> > BestPairingHits;
163
164 template<typename Visitor>
165 void visit_best_pairing(HitsForRead& left_hit_group,
166 HitsForRead& right_hit_group,
167 Visitor& visitor)
168 {
169 BestPairingHits insert_best;
170
171 for (size_t i = 0; i < left_hit_group.hits.size(); ++i)
172 {
173 BowtieHit& h1 = left_hit_group.hits[i];
174
175 for (size_t j = 0; j < right_hit_group.hits.size(); j++)
176 {
177 BowtieHit& h2 = right_hit_group.hits[j];
178 if (h1.ref_id() != h2.ref_id())
179 continue;
180
181 uint32_t refid = h1.ref_id();
182
183 int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev;
184 if (max_mate_inner_dist == -1)
185 {
186 max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev;
187 }
188 InsertAlignmentGrade s(h1, h2, min_mate_inner_dist, max_mate_inner_dist);
189
190 //pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
191 // = best_status_for_inserts[curr_left_obs_order];
192 InsertAlignmentGrade& current = insert_best.first;
193 // Is the new status better than the current best one?
194 if (current < s)
195 {
196 insert_best.second.clear();
197 current = s;
198 insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
199 }
200 else if (! (s < current))
201 {
202 if (prefer_shorter_pairs && current.num_mapped == 2)
203 {
204 pair<int, int> dc = pair_distances(*(insert_best.second[0].left_alignment), *(insert_best.second[0].right_alignment));
205 pair<int, int> ds = pair_distances(h1,h2);
206 if (ds.second < dc.second)
207 {
208 //chucked_for_shorter_pair += insert_best.second.size();
209 insert_best.second.clear();
210 current = s;
211 insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
212
213 }
214 }
215 else
216 {
217 insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
218 }
219 }
220 }
221 }
222
223 visitor.visit(insert_best);
224 }
225
226 class CovMapIntronFinder
227 {
228 std::set<SpliceMotif> _tmp_hits;
229 vector<SpliceMotif> _hits;
230 RefSequenceTable::Sequence* _ref_seq;
231 public:
232 CovMapIntronFinder() : _ref_seq(NULL) {}
233 CovMapIntronFinder(RefSequenceTable::Sequence* ref_seq) :
234 _ref_seq(ref_seq),
235 _lms(vector<bool>(length(*ref_seq), false)),
236 _rms(vector<bool>(length(*ref_seq), false)){}
237
238 void add_motifs_in_window(int left,
239 int right,
240 const string& left_motif,
241 const string& right_motif)
242 {
243 size_t l_start = max(0,left);
244 for (int i = l_start;
245 i < min(right, (int)length(*_ref_seq) - 2);
246 ++i)
247 {
248 seqan::Infix<RefSequenceTable::Sequence>::Type curr
249 = seqan::infix(*_ref_seq,i, i + 2);
250 if (curr == left_motif)
251 _lms[i] = true;
252 else if (curr == right_motif)
253 _rms[i] = true;
254 }
255 }
256
257 void finalize()
258 {
259 int pos = 0;
260 for (vector<bool>::iterator itr = _lms.begin();
261 itr != _lms.end();
262 ++itr, ++pos)
263 {
264 if (*itr)
265 _hits.push_back(SpliceMotif(pos, true));
266 }
267
268 pos = 0;
269 for (vector<bool>::iterator itr = _rms.begin();
270 itr != _rms.end();
271 ++itr, ++pos)
272 {
273 if (*itr)
274 _hits.push_back(SpliceMotif(pos, false));
275 }
276
277 sort(_hits.begin(), _hits.end());
278 }
279
280 size_t seq_len() const { return length(*_ref_seq); }
281 const vector<SpliceMotif>& hits() const { return _hits; }
282 vector<bool> _lms;
283 vector<bool> _rms;
284 };
285
286 typedef CovMapIntronFinder CIF;
287
288 struct RefCIF
289 {
290 RefCIF() : ref_seq(NULL) {}
291 RefCIF(const CIF& f, const CIF& r, RefSequenceTable::Sequence* s) :
292 fwd_cif(f), rev_cif(r), ref_seq(s) {}
293 CIF fwd_cif;
294 CIF rev_cif;
295 RefSequenceTable::Sequence* ref_seq;
296 };
297
298 class CoverageMapVisitor
299 {
300 public:
301 map<uint32_t, RefCIF> finders;
302
303 CoverageMapVisitor(istream& ref_stream,
304 RefSequenceTable& rt)
305 {
306
307 while(ref_stream.good() &&
308 !ref_stream.eof())
309 {
310 RefSequenceTable::Sequence* ref_str = new RefSequenceTable::Sequence;
311 string name;
312 readMeta(ref_stream, name, Fasta());
313 string::size_type space_pos = name.find_first_of(" \t\r");
314 if (space_pos != string::npos)
315 {
316 name.resize(space_pos);
317 }
318 read(ref_stream, *ref_str, Fasta());
319
320 uint32_t ref_id = rt.get_id(name, NULL, 0);
321 finders[ref_id] = RefCIF(CIF(ref_str), CIF(ref_str), ref_str);
322 }
323 }
324
325 void visit(BestPairingHits& pairings)
326 {
327 if (!pairings.first.num_mapped == 2)
328 return;
329
330 static string fwd_lm("GT");
331 static string fwd_rm("AG");
332 static string rev_lm("CT");
333 static string rev_rm("AC");
334
335 for (size_t i = 0;
336 i < pairings.second.size();
337 ++i)
338 {
339
340 InsertAlignment& al = pairings.second[i];
341
342 BowtieHit& bh_left = *(al.left_alignment);
343 BowtieHit& bh_right = *(al.right_alignment);
344
345 assert (bh_left.ref_id() == bh_right.ref_id());
346
347 map<uint32_t, RefCIF >::iterator if_itr = finders.find(bh_left.ref_id());
348 assert (if_itr != finders.end());
349
350 RefCIF& ref_finders = if_itr->second;
351
352 pair<int, int> ds = pair_distances(bh_left, bh_right);
353
354 int minor_hit_start, major_hit_start;
355 int minor_hit_end, major_hit_end;
356 if (bh_left.left() < bh_right.left())
357 {
358 minor_hit_start = (int)bh_left.left();
359 minor_hit_end = (int)bh_left.right();
360 major_hit_start = (int)bh_right.left();
361 major_hit_end = (int)bh_right.right();
362 }
363 else
364 {
365 minor_hit_start = (int)bh_right.left();
366 minor_hit_end = (int)bh_right.right();
367 major_hit_start = (int)bh_left.left();
368 major_hit_end = (int)bh_left.right();
369 }
370
371 int inner_dist = major_hit_start - minor_hit_end;
372
373 bool skip_fwd = false;
374 bool skip_rev = false;
375
376 if (library_type == FR_FIRSTSTRAND)
377 {
378 if (bh_left.antisense_align()) skip_rev = true;
379 else skip_fwd = true;
380 }
381
382 if (library_type == FR_SECONDSTRAND)
383 {
384 if (bh_left.antisense_align()) skip_fwd = true;
385 else skip_rev = true;
386 }
387
388 if (inner_dist > inner_dist_mean + 2 * inner_dist_std_dev)
389 {
390 // Forward strand
391 if (!skip_fwd)
392 {
393 ref_finders.fwd_cif.add_motifs_in_window((int)bh_left.left() - 10,
394 bh_left.right() + 10,
395 fwd_lm,
396 fwd_rm);
397
398 ref_finders.fwd_cif.add_motifs_in_window((int)bh_right.left() - 10,
399 bh_right.right() + 10,
400 fwd_lm,
401 fwd_rm);
402 }
403
404 // Reverse strand
405 if (!skip_rev)
406 {
407 ref_finders.rev_cif.add_motifs_in_window((int)bh_left.left() - 10,
408 bh_left.right() + 10,
409 rev_lm,
410 rev_rm);
411
412 ref_finders.rev_cif.add_motifs_in_window((int)bh_right.left() - 10,
413 bh_right.right() + 10,
414 rev_lm,
415 rev_rm);
416 }
417 }
418 }
419 }
420
421 void finalize()
422 {
423 for (map<uint32_t, RefCIF >::iterator itr = finders.begin();
424 itr != finders.end();
425 ++itr)
426 {
427 itr->second.fwd_cif.finalize();
428 itr->second.rev_cif.finalize();
429 delete itr->second.ref_seq;
430 itr->second.ref_seq = NULL;
431 }
432 }
433 };
434
435
436 class JunctionMapVisitor
437 {
438 public:
439 typedef JunctionFinder<RefSequenceTable::Sequence, CIF> JF;
440
441 struct JunctionTable
442 {
443 JunctionTable() : jf(NULL), possible_splices(NULL) {}
444 JunctionTable(ClosureJunctionSet* ps, JF* _jf, bool as)
445 : jf(_jf), possible_splices(ps), antisense(as) {}
446
447 JF* jf;
448 ClosureJunctionSet* possible_splices;
449 bool antisense;
450 };
451
452
453 map<uint32_t, pair<JunctionTable, JunctionTable> > _finders;
454
455 JunctionMapVisitor(ClosureJunctionSet& fwd_splices,
456 ClosureJunctionSet& rev_splices,
457 map<uint32_t, RefCIF >& finders)
458 {
459 static const uint32_t bowtie_padding = 5;
460 for (map<uint32_t, RefCIF >::iterator itr = finders.begin();
461 itr != finders.end();
462 ++itr)
463 {
464 JF* fwd_jf = new JF(itr->second.fwd_cif,
465 inner_dist_mean,
466 inner_dist_std_dev,
467 min_closure_intron_length,
468 min_closure_exon_length,
469 bowtie_padding,
470 island_extension);
471 JF* rev_jf = new JF(itr->second.rev_cif,
472 inner_dist_mean,
473 inner_dist_std_dev,
474 min_closure_intron_length,
475 min_closure_exon_length,
476 bowtie_padding,
477 island_extension);
478
479 _finders[itr->first] = make_pair(JunctionTable(&fwd_splices, fwd_jf,false),
480 JunctionTable(&rev_splices, rev_jf, true));
481 }
482 }
483
484
485 void visit(BestPairingHits& pairings)
486 {
487 if (!pairings.first.num_mapped == 2)
488 return;
489 for (size_t i = 0;
490 i < pairings.second.size();
491 ++i)
492 {
493
494 InsertAlignment& al = pairings.second[i];
495
496 BowtieHit& bh_left = *(al.left_alignment);
497 BowtieHit& bh_right = *(al.right_alignment);
498
499 assert (bh_left.ref_id() == bh_right.ref_id());
500
501 map<uint32_t, pair<JunctionTable, JunctionTable> >::iterator if_itr = _finders.find(bh_left.ref_id());
502 assert (if_itr != _finders.end());
503
504 JF& fwd_jf = *(if_itr->second.first.jf);
505 fwd_jf.possible_junctions(*(if_itr->second.first.possible_splices), bh_left, bh_right);
506
507 JF& rev_jf = *(if_itr->second.second.jf);
508 rev_jf.possible_junctions(*(if_itr->second.second.possible_splices), bh_left, bh_right);
509 }
510 }
511 };
512
513 void closure_driver(vector<FZPipe>& map1,
514 vector<FZPipe>& map2,
515 ifstream& ref_stream,
516 FILE* juncs_file,
517 FILE* fusions_out)
518 {
519 typedef RefSequenceTable::Sequence Reference;
520
521 ReadTable it;
522 RefSequenceTable rt(true);
523
524 BowtieHitFactory hit_factory(it, rt);
525
526 std::set<Fusion> fusions;
527
528 fprintf (stderr, "Finding near-covered motifs...");
529 CoverageMapVisitor cov_map_visitor(ref_stream, rt);
530 uint32_t coverage_attempts = 0;
531
532 assert(map1.size() == map2.size());
533 for (size_t num = 0; num < map1.size(); ++num)
534 {
535 HitStream left_hs(map1[num].file, &hit_factory, false, true, false);
536 HitStream right_hs(map2[num].file, &hit_factory, false, true, false);
537
538 HitsForRead curr_left_hit_group;
539 HitsForRead curr_right_hit_group;
540
541 left_hs.next_read_hits(curr_left_hit_group);
542 right_hs.next_read_hits(curr_right_hit_group);
543
544 uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
545 uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
546
547 while(curr_left_obs_order != VMAXINT32 &&
548 curr_right_obs_order != VMAXINT32)
549 {
550 while (curr_left_obs_order < curr_right_obs_order&&
551 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
552 {
553 // Get hit group
554
555 left_hs.next_read_hits(curr_left_hit_group);
556 curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
557 }
558
559 while (curr_left_obs_order > curr_right_obs_order &&
560 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
561 {
562 // Get hit group
563
564 right_hs.next_read_hits(curr_right_hit_group);
565 curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
566 }
567
568 while (curr_left_obs_order == curr_right_obs_order &&
569 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
570 {
571 if (num == 0)
572 find_fusion_closure(curr_left_hit_group, curr_right_hit_group, fusions);
573
574 if (coverage_attempts++ % 10000 == 0)
575 fprintf (stderr, "Adding covered motifs from pair %d\n", coverage_attempts);
576
577 visit_best_pairing(curr_left_hit_group, curr_right_hit_group, cov_map_visitor);
578
579 left_hs.next_read_hits(curr_left_hit_group);
580 curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
581
582 right_hs.next_read_hits(curr_right_hit_group);
583 curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
584 }
585 }
586 }
587
588 cov_map_visitor.finalize();
589 fprintf (stderr, "done\n");
590
591 ClosureJunctionSet fwd_splices;
592 ClosureJunctionSet rev_splices;
593
594 JunctionMapVisitor junc_map_visitor(fwd_splices, rev_splices, cov_map_visitor.finders);
595 fprintf (stderr, "Searching for closures...");
596 uint32_t closure_attempts = 0;
597
598 for (size_t num = 0; num < map1.size(); ++num)
599 {
600 map1[num].rewind();
601 map2[num].rewind();
602
603 HitStream left_hs = HitStream(map1[num].file, &hit_factory, false, true, false);
604 HitStream right_hs = HitStream(map2[num].file, &hit_factory, false, true, false);
605
606 HitsForRead curr_left_hit_group;
607 HitsForRead curr_right_hit_group;
608
609 left_hs.next_read_hits(curr_left_hit_group);
610 right_hs.next_read_hits(curr_right_hit_group);
611
612 uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
613 uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
614
615 while(curr_left_obs_order != VMAXINT32 &&
616 curr_right_obs_order != VMAXINT32)
617 {
618 while (curr_left_obs_order < curr_right_obs_order &&
619 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
620 {
621 // Get hit group
622
623 left_hs.next_read_hits(curr_left_hit_group);
624 curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
625 }
626
627 while (curr_left_obs_order > curr_right_obs_order &&
628 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
629 {
630 // Get hit group
631
632 right_hs.next_read_hits(curr_right_hit_group);
633 curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
634 }
635
636 while (curr_left_obs_order == curr_right_obs_order &&
637 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
638 {
639 if (closure_attempts++ % 10000 == 0)
640 fprintf (stderr, "Trying to close pair %d\n", closure_attempts);
641
642 visit_best_pairing(curr_left_hit_group, curr_right_hit_group, junc_map_visitor);
643 left_hs.next_read_hits(curr_left_hit_group);
644 curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
645
646 right_hs.next_read_hits(curr_right_hit_group);
647 curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
648 }
649 }
650 }
651
652 for (size_t num = 0; num < map1.size(); ++num)
653 {
654 map1[num].close();
655 map2[num].close();
656 }
657
658 fprintf(stderr, "%lu Forward strand splices\n", fwd_splices.size());
659 fprintf(stderr, "%lu Reverse strand splices\n", rev_splices.size());
660
661 fprintf (stderr, "done\n");
662 uint32_t num_potential_splices = 0;
663 fprintf (stderr, "Reporting possible junctions...");
664 map<uint32_t, pair<JunctionMapVisitor::JunctionTable, JunctionMapVisitor::JunctionTable> >::iterator f_itr;
665 f_itr = junc_map_visitor._finders.begin();
666
667 ClosureJunctionSet::iterator j_itr;
668 j_itr = fwd_splices.begin();
669 while (j_itr != fwd_splices.end())
670 {
671 fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
672 rt.get_name(j_itr->refid),
673 j_itr->left,j_itr->right,'+');
674 ++num_potential_splices;
675 ++j_itr;
676 }
677
678 j_itr = rev_splices.begin();
679 while (j_itr != rev_splices.end())
680 {
681 fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
682 rt.get_name(j_itr->refid),
683 j_itr->left,j_itr->right,'-');
684 ++num_potential_splices;
685 ++j_itr;
686 }
687
688 //accept_all_best_hits(best_status_for_inserts);
689 fprintf(stderr, "done\n");
690 fprintf(stderr, "Searched for closures between %d pairs\n", searched);
691 fprintf(stderr, "Successfully closed %d pairs\n", closed);
692
693 fprintf(stderr, "Found %d total possible splices\n", num_potential_splices);
694
695 // daehwan
696 #if 0
697 fprintf (stderr, "Reporting potential fusions...\n");
698 if(fusions_out){
699 for(std::set<Fusion>::iterator itr = fusions.begin(); itr != fusions.end(); ++itr){
700 const char* ref_name1 = rt.get_name(itr->refid1);
701 const char* ref_name2 = rt.get_name(itr->refid2);
702
703 const char* dir = "";
704 if (itr->dir == FUSION_FR)
705 dir = "fr";
706 else if(itr->dir == FUSION_RF)
707 dir = "rf";
708 else
709 dir = "ff";
710
711 fprintf(fusions_out,
712 "%s\t%d\t%s\t%d\t%s\n",
713 ref_name1,
714 itr->left,
715 ref_name2,
716 itr->right,
717 dir);
718 }
719 fclose(fusions_out);
720 }else{
721 fprintf(stderr, "Failed to open fusions file for writing\n");
722 }
723 #endif
724 }
725
726 void print_usage()
727 {
728 fprintf(stderr, "Usage: closure_juncs <closure.juncs> <ref.fa> <left_map.bwtout> <right_map.bwtout>\n");
729 }
730
731 int main(int argc, char** argv)
732 {
733 fprintf(stderr, "closure_juncs v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION);
734 fprintf(stderr, "---------------------------\n");
735
736 int parse_ret = parse_options(argc, argv, print_usage);
737 if (parse_ret)
738 return parse_ret;
739
740 if(optind >= argc)
741 {
742 print_usage();
743 return 1;
744 }
745
746 string junctions_file_name = argv[optind++];
747
748 if(optind >= argc)
749 {
750 print_usage();
751 return 1;
752 }
753
754 string fusions_file_name = argv[optind++];
755
756 if(optind >= argc)
757 {
758 print_usage();
759 return 1;
760 }
761
762 string ref_fasta = argv[optind++];
763
764 if(optind >= argc)
765 {
766 print_usage();
767 return 1;
768 }
769
770 string left_file_list = argv[optind++];
771 vector<string> left_file_names;
772 vector<FZPipe> left_files;
773 tokenize(left_file_list, ",", left_file_names);
774
775 string unzcmd = getUnpackCmd(left_file_names[0], false);
776 for (size_t i = 0; i < left_file_names.size(); ++i)
777 {
778 FZPipe seg_file(left_file_names[i], unzcmd);
779 if (seg_file.file == NULL)
780 {
781 fprintf(stderr, "Error: cannot open file %s for reading\n",
782 left_file_names[i].c_str());
783 exit(1);
784 }
785 left_files.push_back(seg_file);
786 }
787
788 if(optind >= argc)
789 {
790 print_usage();
791 return 1;
792 }
793
794 string right_file_list = argv[optind++];
795 vector<string> right_file_names;
796 vector<FZPipe> right_files;
797 tokenize(right_file_list, ",", right_file_names);
798 for (size_t i = 0; i < right_file_names.size(); ++i)
799 {
800 FZPipe seg_file(right_file_names[i], unzcmd);
801 if (seg_file.file == NULL)
802 {
803 fprintf(stderr, "Error: cannot open %s for reading\n",
804 right_file_names[i].c_str());
805 exit(1);
806 }
807 right_files.push_back(seg_file);
808 }
809
810 ifstream ref_stream(ref_fasta.c_str(), ifstream::in);
811
812 FILE* splice_db = fopen(junctions_file_name.c_str(), "w");
813 if (splice_db == NULL)
814 {
815 fprintf(stderr, "Error: cannot open junctions file %s for writing\n",
816 junctions_file_name.c_str());
817 exit(1);
818 }
819
820 FILE* fusion_db = fopen(fusions_file_name.c_str(), "w");
821 if (splice_db == NULL)
822 {
823 fprintf(stderr, "Error: cannot open fusions file %s for writing\n",
824 fusions_file_name.c_str());
825 exit(1);
826 }
827
828 closure_driver(left_files,
829 right_files,
830 ref_stream,
831 splice_db,
832 fusion_db);
833
834 return 0;
835 }