ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/closures.cpp
Revision: 233
Committed: Wed Apr 11 21:05:01 2012 UTC (12 years, 4 months ago) by gpertea
File size: 22020 byte(s)
Log Message:
Line User Rev File contents
1 gpertea 29 /*
2     * closures.cpp
3     * TopHat
4     *
5     * Created by Cole Trapnell on 1/15/09.
6     * Copyright 2009 Cole Trapnell. All rights reserved.
7     *
8     */
9    
10     #ifdef HAVE_CONFIG_H
11     #include <config.h>
12     #endif
13    
14     #include <iostream>
15     #include <fstream>
16     #include <getopt.h>
17 gpertea 154 #include <seqan/sequence.h>
18     #include <seqan/file.h>
19    
20 gpertea 29 #include "bwt_map.h"
21     #include "inserts.h"
22     #include "closures.h"
23     #include "reads.h"
24     #include "tokenize.h"
25 gpertea 154 #include "fusions.h"
26 gpertea 29
27     using namespace seqan;
28     using namespace std;
29    
30    
31     bool possible_cotranscript(const BowtieHit& h1, const BowtieHit& h2, bool check_strand = true)
32     {
33     if (h1.insert_id() != h2.insert_id())
34     return false;
35    
36 gpertea 233 InsertAlignmentGrade grade(h1, h2);
37 gpertea 29 return (!grade.too_far && !grade.too_close && grade.opposite_strands);
38     }
39    
40    
41     void check_mates(const HitList& hits1_in_ref,
42     const HitList& hits2_in_ref,
43     vector<pair<size_t, size_t> >& happy_mates,
44     vector<size_t>& map1_singletons,
45     vector<size_t>& map2_singletons)
46     {
47     std::set<size_t> marked;
48     // TODO: if this shows up on the profile, replace it with a linear
49     // time algorithm. This one is 2*n*lg(n).
50     HitList::const_iterator last_good = hits2_in_ref.begin();
51    
52     for (size_t i = 0; i < hits1_in_ref.size(); ++i)
53     {
54     pair<HitList::const_iterator, HitList::const_iterator> range_pair;
55     range_pair = equal_range(last_good, hits2_in_ref.end(),
56     hits1_in_ref[i], hit_insert_id_lt);
57     bool found_hit = false;
58     if (range_pair.first != range_pair.second)
59     last_good = range_pair.first;
60     for (HitList::const_iterator f = range_pair.first;
61     f != range_pair.second;
62     ++f)
63     {
64     if (possible_cotranscript(hits1_in_ref[i], *f))
65     {
66     happy_mates.push_back(make_pair(i,f - hits2_in_ref.begin()));
67     marked.insert(f - hits2_in_ref.begin());
68     found_hit = true;
69     }
70     }
71     if (!found_hit)
72     map1_singletons.push_back(i);
73     }
74    
75     for (size_t i = 0; i < hits2_in_ref.size(); ++i)
76     {
77     if (marked.find(i) == marked.end())
78     {
79     map2_singletons.push_back(i);
80     }
81     }
82     }
83    
84 gpertea 154 void find_fusion_closure(HitsForRead& left_hits,
85     HitsForRead& right_hits,
86     std::set<Fusion>& fusions)
87     {
88     if (left_hits.hits.size() > 3 || right_hits.hits.size() > 3)
89     return;
90    
91     for (size_t left_index = 0; left_index < left_hits.hits.size(); ++left_index)
92     {
93     for (size_t right_index = 0; right_index < right_hits.hits.size(); ++right_index)
94     {
95     BowtieHit* leftHit = &left_hits.hits[left_index];
96     BowtieHit* rightHit = &right_hits.hits[right_index];
97    
98     uint32_t dir = FUSION_FF;
99     if (leftHit->antisense_align() != rightHit->antisense_align())
100     dir = FUSION_FF;
101     else if (!leftHit->antisense_align())
102     dir = FUSION_FR;
103     else
104     dir = FUSION_RF;
105    
106     if (dir == FUSION_FF && leftHit->antisense_align())
107     {
108     BowtieHit * tmp = leftHit;
109     leftHit = rightHit;
110     rightHit = tmp;
111     }
112    
113     uint32_t left, right;
114     if (dir == FUSION_FF || dir == FUSION_FR)
115     left = leftHit->right() - 4;
116     else
117     left = leftHit->left() + 4;
118    
119     if (dir == FUSION_FF || dir == FUSION_RF)
120     right = rightHit->left() + 4;
121     else
122     right = rightHit->right() - 4;
123    
124     if (leftHit->ref_id() == rightHit->ref_id() && dir == FUSION_FF)
125     {
126     int dist = 0;
127     dist = rightHit->left() - leftHit->right();
128    
129     if (dist >= 0 && dist <= (int)fusion_min_dist)
130     continue;
131     }
132    
133     uint32_t ref_id1 = leftHit->ref_id();
134     uint32_t ref_id2 = rightHit->ref_id();
135    
136     if (dir == FUSION_FR || dir == FUSION_RF)
137     {
138     if ((ref_id2 < ref_id1) || (ref_id1 == ref_id2 && left > right))
139     {
140     uint32_t temp = ref_id1;
141     ref_id1 = ref_id2;
142     ref_id2 = temp;
143    
144     temp = left;
145     left = right;
146     right = temp;
147     }
148     }
149    
150     fusions.insert(Fusion(ref_id1, ref_id2, left, right, dir));
151     }
152     }
153     }
154    
155 gpertea 29 bool prefer_shorter_pairs = true;
156    
157     typedef pair<InsertAlignmentGrade, vector<InsertAlignment> > BestPairingHits;
158    
159     template<typename Visitor>
160     void visit_best_pairing(HitsForRead& left_hit_group,
161     HitsForRead& right_hit_group,
162     Visitor& visitor)
163     {
164     BestPairingHits insert_best;
165    
166     for (size_t i = 0; i < left_hit_group.hits.size(); ++i)
167     {
168     BowtieHit& h1 = left_hit_group.hits[i];
169    
170     for (size_t j = 0; j < right_hit_group.hits.size(); j++)
171     {
172     BowtieHit& h2 = right_hit_group.hits[j];
173     if (h1.ref_id() != h2.ref_id())
174     continue;
175    
176     uint32_t refid = h1.ref_id();
177 gpertea 233 InsertAlignmentGrade s(h1, h2);
178 gpertea 29
179     //pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
180     // = best_status_for_inserts[curr_left_obs_order];
181     InsertAlignmentGrade& current = insert_best.first;
182     // Is the new status better than the current best one?
183     if (current < s)
184     {
185     insert_best.second.clear();
186     current = s;
187     insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
188     }
189     else if (! (s < current))
190     {
191     if (prefer_shorter_pairs && current.num_mapped == 2)
192     {
193     pair<int, int> dc = pair_distances(*(insert_best.second[0].left_alignment), *(insert_best.second[0].right_alignment));
194     pair<int, int> ds = pair_distances(h1,h2);
195     if (ds.second < dc.second)
196     {
197     //chucked_for_shorter_pair += insert_best.second.size();
198     insert_best.second.clear();
199     current = s;
200     insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
201    
202     }
203     }
204     else
205     {
206     insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
207     }
208     }
209     }
210     }
211    
212     visitor.visit(insert_best);
213     }
214    
215     class CovMapIntronFinder
216     {
217     std::set<SpliceMotif> _tmp_hits;
218     vector<SpliceMotif> _hits;
219     RefSequenceTable::Sequence* _ref_seq;
220     public:
221     CovMapIntronFinder() : _ref_seq(NULL) {}
222     CovMapIntronFinder(RefSequenceTable::Sequence* ref_seq) :
223     _ref_seq(ref_seq),
224     _lms(vector<bool>(length(*ref_seq), false)),
225     _rms(vector<bool>(length(*ref_seq), false)){}
226    
227     void add_motifs_in_window(int left,
228     int right,
229     const string& left_motif,
230     const string& right_motif)
231     {
232     size_t l_start = max(0,left);
233     for (int i = l_start;
234     i < min(right, (int)length(*_ref_seq) - 2);
235     ++i)
236     {
237     seqan::Infix<RefSequenceTable::Sequence>::Type curr
238     = seqan::infix(*_ref_seq,i, i + 2);
239     if (curr == left_motif)
240     _lms[i] = true;
241     else if (curr == right_motif)
242     _rms[i] = true;
243     }
244     }
245    
246     void finalize()
247     {
248     int pos = 0;
249     for (vector<bool>::iterator itr = _lms.begin();
250     itr != _lms.end();
251     ++itr, ++pos)
252     {
253     if (*itr)
254     _hits.push_back(SpliceMotif(pos, true));
255     }
256    
257     pos = 0;
258     for (vector<bool>::iterator itr = _rms.begin();
259     itr != _rms.end();
260     ++itr, ++pos)
261     {
262     if (*itr)
263     _hits.push_back(SpliceMotif(pos, false));
264     }
265    
266     sort(_hits.begin(), _hits.end());
267     }
268    
269     size_t seq_len() const { return length(*_ref_seq); }
270     const vector<SpliceMotif>& hits() const { return _hits; }
271     vector<bool> _lms;
272     vector<bool> _rms;
273     };
274    
275     typedef CovMapIntronFinder CIF;
276    
277     struct RefCIF
278     {
279     RefCIF() : ref_seq(NULL) {}
280     RefCIF(const CIF& f, const CIF& r, RefSequenceTable::Sequence* s) :
281     fwd_cif(f), rev_cif(r), ref_seq(s) {}
282     CIF fwd_cif;
283     CIF rev_cif;
284     RefSequenceTable::Sequence* ref_seq;
285     };
286    
287     class CoverageMapVisitor
288     {
289     public:
290     map<uint32_t, RefCIF> finders;
291    
292     CoverageMapVisitor(istream& ref_stream,
293     RefSequenceTable& rt)
294     {
295    
296     while(ref_stream.good() &&
297     !ref_stream.eof())
298     {
299     RefSequenceTable::Sequence* ref_str = new RefSequenceTable::Sequence;
300     string name;
301     readMeta(ref_stream, name, Fasta());
302     string::size_type space_pos = name.find_first_of(" \t\r");
303     if (space_pos != string::npos)
304     {
305     name.resize(space_pos);
306     }
307     read(ref_stream, *ref_str, Fasta());
308    
309     uint32_t ref_id = rt.get_id(name, NULL, 0);
310     finders[ref_id] = RefCIF(CIF(ref_str), CIF(ref_str), ref_str);
311     }
312     }
313    
314     void visit(BestPairingHits& pairings)
315     {
316     if (!pairings.first.num_mapped == 2)
317     return;
318    
319     static string fwd_lm("GT");
320     static string fwd_rm("AG");
321     static string rev_lm("CT");
322     static string rev_rm("AC");
323    
324     for (size_t i = 0;
325     i < pairings.second.size();
326     ++i)
327     {
328    
329     InsertAlignment& al = pairings.second[i];
330    
331     BowtieHit& bh_left = *(al.left_alignment);
332     BowtieHit& bh_right = *(al.right_alignment);
333    
334     assert (bh_left.ref_id() == bh_right.ref_id());
335    
336     map<uint32_t, RefCIF >::iterator if_itr = finders.find(bh_left.ref_id());
337     assert (if_itr != finders.end());
338    
339     RefCIF& ref_finders = if_itr->second;
340    
341     pair<int, int> ds = pair_distances(bh_left, bh_right);
342    
343     int minor_hit_start, major_hit_start;
344     int minor_hit_end, major_hit_end;
345     if (bh_left.left() < bh_right.left())
346     {
347     minor_hit_start = (int)bh_left.left();
348     minor_hit_end = (int)bh_left.right();
349     major_hit_start = (int)bh_right.left();
350     major_hit_end = (int)bh_right.right();
351     }
352     else
353     {
354     minor_hit_start = (int)bh_right.left();
355     minor_hit_end = (int)bh_right.right();
356     major_hit_start = (int)bh_left.left();
357     major_hit_end = (int)bh_left.right();
358     }
359    
360     int inner_dist = major_hit_start - minor_hit_end;
361    
362     bool skip_fwd = false;
363     bool skip_rev = false;
364    
365     if (library_type == FR_FIRSTSTRAND)
366     {
367     if (bh_left.antisense_align()) skip_rev = true;
368     else skip_fwd = true;
369     }
370    
371     if (library_type == FR_SECONDSTRAND)
372     {
373     if (bh_left.antisense_align()) skip_fwd = true;
374     else skip_rev = true;
375     }
376    
377     if (inner_dist > inner_dist_mean + 2 * inner_dist_std_dev)
378     {
379     // Forward strand
380     if (!skip_fwd)
381     {
382     ref_finders.fwd_cif.add_motifs_in_window((int)bh_left.left() - 10,
383     bh_left.right() + 10,
384     fwd_lm,
385     fwd_rm);
386    
387     ref_finders.fwd_cif.add_motifs_in_window((int)bh_right.left() - 10,
388     bh_right.right() + 10,
389     fwd_lm,
390     fwd_rm);
391     }
392    
393     // Reverse strand
394     if (!skip_rev)
395     {
396     ref_finders.rev_cif.add_motifs_in_window((int)bh_left.left() - 10,
397     bh_left.right() + 10,
398     rev_lm,
399     rev_rm);
400    
401     ref_finders.rev_cif.add_motifs_in_window((int)bh_right.left() - 10,
402     bh_right.right() + 10,
403     rev_lm,
404     rev_rm);
405     }
406     }
407     }
408     }
409    
410     void finalize()
411     {
412     for (map<uint32_t, RefCIF >::iterator itr = finders.begin();
413     itr != finders.end();
414     ++itr)
415     {
416     itr->second.fwd_cif.finalize();
417     itr->second.rev_cif.finalize();
418     delete itr->second.ref_seq;
419     itr->second.ref_seq = NULL;
420     }
421     }
422     };
423    
424    
425     class JunctionMapVisitor
426     {
427     public:
428     typedef JunctionFinder<RefSequenceTable::Sequence, CIF> JF;
429    
430     struct JunctionTable
431     {
432     JunctionTable() : jf(NULL), possible_splices(NULL) {}
433     JunctionTable(ClosureJunctionSet* ps, JF* _jf, bool as)
434     : jf(_jf), possible_splices(ps), antisense(as) {}
435    
436     JF* jf;
437     ClosureJunctionSet* possible_splices;
438     bool antisense;
439     };
440    
441    
442     map<uint32_t, pair<JunctionTable, JunctionTable> > _finders;
443    
444     JunctionMapVisitor(ClosureJunctionSet& fwd_splices,
445     ClosureJunctionSet& rev_splices,
446     map<uint32_t, RefCIF >& finders)
447     {
448     static const uint32_t bowtie_padding = 5;
449     for (map<uint32_t, RefCIF >::iterator itr = finders.begin();
450     itr != finders.end();
451     ++itr)
452     {
453     JF* fwd_jf = new JF(itr->second.fwd_cif,
454     inner_dist_mean,
455     inner_dist_std_dev,
456     min_closure_intron_length,
457     min_closure_exon_length,
458     bowtie_padding,
459     island_extension);
460     JF* rev_jf = new JF(itr->second.rev_cif,
461     inner_dist_mean,
462     inner_dist_std_dev,
463     min_closure_intron_length,
464     min_closure_exon_length,
465     bowtie_padding,
466     island_extension);
467    
468     _finders[itr->first] = make_pair(JunctionTable(&fwd_splices, fwd_jf,false),
469     JunctionTable(&rev_splices, rev_jf, true));
470     }
471     }
472    
473    
474     void visit(BestPairingHits& pairings)
475     {
476     if (!pairings.first.num_mapped == 2)
477     return;
478     for (size_t i = 0;
479     i < pairings.second.size();
480     ++i)
481     {
482    
483     InsertAlignment& al = pairings.second[i];
484    
485     BowtieHit& bh_left = *(al.left_alignment);
486     BowtieHit& bh_right = *(al.right_alignment);
487    
488     assert (bh_left.ref_id() == bh_right.ref_id());
489    
490     map<uint32_t, pair<JunctionTable, JunctionTable> >::iterator if_itr = _finders.find(bh_left.ref_id());
491     assert (if_itr != _finders.end());
492    
493     JF& fwd_jf = *(if_itr->second.first.jf);
494     fwd_jf.possible_junctions(*(if_itr->second.first.possible_splices), bh_left, bh_right);
495    
496     JF& rev_jf = *(if_itr->second.second.jf);
497     rev_jf.possible_junctions(*(if_itr->second.second.possible_splices), bh_left, bh_right);
498     }
499     }
500     };
501    
502     void closure_driver(vector<FZPipe>& map1,
503     vector<FZPipe>& map2,
504     ifstream& ref_stream,
505 gpertea 154 FILE* juncs_file,
506     FILE* fusions_out)
507 gpertea 29 {
508     typedef RefSequenceTable::Sequence Reference;
509    
510     ReadTable it;
511     RefSequenceTable rt(true);
512 gpertea 154
513     BowtieHitFactory hit_factory(it, rt);
514    
515     std::set<Fusion> fusions;
516 gpertea 29
517     fprintf (stderr, "Finding near-covered motifs...");
518     CoverageMapVisitor cov_map_visitor(ref_stream, rt);
519     uint32_t coverage_attempts = 0;
520    
521     assert(map1.size() == map2.size());
522     for (size_t num = 0; num < map1.size(); ++num)
523     {
524     HitStream left_hs(map1[num].file, &hit_factory, false, true, false);
525     HitStream right_hs(map2[num].file, &hit_factory, false, true, false);
526    
527     HitsForRead curr_left_hit_group;
528     HitsForRead curr_right_hit_group;
529    
530     left_hs.next_read_hits(curr_left_hit_group);
531     right_hs.next_read_hits(curr_right_hit_group);
532    
533     uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
534     uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
535    
536 gpertea 135 while(curr_left_obs_order != VMAXINT32 &&
537     curr_right_obs_order != VMAXINT32)
538 gpertea 29 {
539     while (curr_left_obs_order < curr_right_obs_order&&
540 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
541 gpertea 29 {
542     // Get hit group
543    
544     left_hs.next_read_hits(curr_left_hit_group);
545     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
546     }
547    
548     while (curr_left_obs_order > curr_right_obs_order &&
549 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
550 gpertea 29 {
551     // Get hit group
552    
553     right_hs.next_read_hits(curr_right_hit_group);
554     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
555     }
556    
557     while (curr_left_obs_order == curr_right_obs_order &&
558 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
559 gpertea 154 {
560     if (num == 0)
561     find_fusion_closure(curr_left_hit_group, curr_right_hit_group, fusions);
562    
563     if (coverage_attempts++ % 10000 == 0)
564     fprintf (stderr, "Adding covered motifs from pair %d\n", coverage_attempts);
565    
566 gpertea 29 visit_best_pairing(curr_left_hit_group, curr_right_hit_group, cov_map_visitor);
567    
568     left_hs.next_read_hits(curr_left_hit_group);
569     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
570    
571     right_hs.next_read_hits(curr_right_hit_group);
572     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
573     }
574     }
575     }
576    
577     cov_map_visitor.finalize();
578     fprintf (stderr, "done\n");
579    
580     ClosureJunctionSet fwd_splices;
581     ClosureJunctionSet rev_splices;
582    
583     JunctionMapVisitor junc_map_visitor(fwd_splices, rev_splices, cov_map_visitor.finders);
584     fprintf (stderr, "Searching for closures...");
585     uint32_t closure_attempts = 0;
586    
587     for (size_t num = 0; num < map1.size(); ++num)
588     {
589     map1[num].rewind();
590     map2[num].rewind();
591    
592     HitStream left_hs = HitStream(map1[num].file, &hit_factory, false, true, false);
593     HitStream right_hs = HitStream(map2[num].file, &hit_factory, false, true, false);
594    
595     HitsForRead curr_left_hit_group;
596     HitsForRead curr_right_hit_group;
597    
598     left_hs.next_read_hits(curr_left_hit_group);
599     right_hs.next_read_hits(curr_right_hit_group);
600    
601     uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
602     uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
603    
604 gpertea 135 while(curr_left_obs_order != VMAXINT32 &&
605     curr_right_obs_order != VMAXINT32)
606 gpertea 29 {
607     while (curr_left_obs_order < curr_right_obs_order &&
608 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
609 gpertea 29 {
610     // Get hit group
611    
612     left_hs.next_read_hits(curr_left_hit_group);
613     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
614     }
615    
616     while (curr_left_obs_order > curr_right_obs_order &&
617 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
618 gpertea 29 {
619     // Get hit group
620    
621     right_hs.next_read_hits(curr_right_hit_group);
622     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
623     }
624    
625     while (curr_left_obs_order == curr_right_obs_order &&
626 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
627 gpertea 29 {
628 gpertea 154 if (closure_attempts++ % 10000 == 0)
629     fprintf (stderr, "Trying to close pair %d\n", closure_attempts);
630    
631 gpertea 29 visit_best_pairing(curr_left_hit_group, curr_right_hit_group, junc_map_visitor);
632     left_hs.next_read_hits(curr_left_hit_group);
633     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
634    
635     right_hs.next_read_hits(curr_right_hit_group);
636     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
637     }
638     }
639     }
640    
641     for (size_t num = 0; num < map1.size(); ++num)
642     {
643     map1[num].close();
644     map2[num].close();
645     }
646    
647     fprintf(stderr, "%lu Forward strand splices\n", fwd_splices.size());
648     fprintf(stderr, "%lu Reverse strand splices\n", rev_splices.size());
649    
650     fprintf (stderr, "done\n");
651     uint32_t num_potential_splices = 0;
652     fprintf (stderr, "Reporting possible junctions...");
653     map<uint32_t, pair<JunctionMapVisitor::JunctionTable, JunctionMapVisitor::JunctionTable> >::iterator f_itr;
654     f_itr = junc_map_visitor._finders.begin();
655    
656     ClosureJunctionSet::iterator j_itr;
657     j_itr = fwd_splices.begin();
658     while (j_itr != fwd_splices.end())
659     {
660     fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
661     rt.get_name(j_itr->refid),
662     j_itr->left,j_itr->right,'+');
663     ++num_potential_splices;
664     ++j_itr;
665     }
666    
667     j_itr = rev_splices.begin();
668     while (j_itr != rev_splices.end())
669     {
670     fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
671     rt.get_name(j_itr->refid),
672     j_itr->left,j_itr->right,'-');
673     ++num_potential_splices;
674     ++j_itr;
675     }
676    
677     //accept_all_best_hits(best_status_for_inserts);
678     fprintf(stderr, "done\n");
679     fprintf(stderr, "Searched for closures between %d pairs\n", searched);
680     fprintf(stderr, "Successfully closed %d pairs\n", closed);
681    
682     fprintf(stderr, "Found %d total possible splices\n", num_potential_splices);
683 gpertea 154
684     // daehwan
685     #if 0
686     fprintf (stderr, "Reporting potential fusions...\n");
687     if(fusions_out){
688     for(std::set<Fusion>::iterator itr = fusions.begin(); itr != fusions.end(); ++itr){
689     const char* ref_name1 = rt.get_name(itr->refid1);
690     const char* ref_name2 = rt.get_name(itr->refid2);
691    
692     const char* dir = "";
693     if (itr->dir == FUSION_FR)
694     dir = "fr";
695     else if(itr->dir == FUSION_RF)
696     dir = "rf";
697     else
698     dir = "ff";
699    
700     fprintf(fusions_out,
701     "%s\t%d\t%s\t%d\t%s\n",
702     ref_name1,
703     itr->left,
704     ref_name2,
705     itr->right,
706     dir);
707     }
708     fclose(fusions_out);
709     }else{
710     fprintf(stderr, "Failed to open fusions file for writing\n");
711     }
712     #endif
713 gpertea 29 }
714    
715     void print_usage()
716     {
717     fprintf(stderr, "Usage: closure_juncs <closure.juncs> <ref.fa> <left_map.bwtout> <right_map.bwtout>\n");
718     }
719    
720     int main(int argc, char** argv)
721     {
722     fprintf(stderr, "closure_juncs v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION);
723     fprintf(stderr, "---------------------------\n");
724    
725     int parse_ret = parse_options(argc, argv, print_usage);
726     if (parse_ret)
727     return parse_ret;
728    
729     if(optind >= argc)
730     {
731     print_usage();
732     return 1;
733     }
734    
735     string junctions_file_name = argv[optind++];
736    
737     if(optind >= argc)
738     {
739     print_usage();
740     return 1;
741     }
742 gpertea 154
743     string fusions_file_name = argv[optind++];
744 gpertea 29
745 gpertea 154 if(optind >= argc)
746     {
747     print_usage();
748     return 1;
749     }
750    
751 gpertea 29 string ref_fasta = argv[optind++];
752 gpertea 154
753 gpertea 29 if(optind >= argc)
754     {
755     print_usage();
756     return 1;
757     }
758    
759     string left_file_list = argv[optind++];
760     vector<string> left_file_names;
761     vector<FZPipe> left_files;
762     tokenize(left_file_list, ",", left_file_names);
763    
764     string unzcmd = getUnpackCmd(left_file_names[0], false);
765     for (size_t i = 0; i < left_file_names.size(); ++i)
766     {
767     FZPipe seg_file(left_file_names[i], unzcmd);
768     if (seg_file.file == NULL)
769     {
770     fprintf(stderr, "Error: cannot open file %s for reading\n",
771     left_file_names[i].c_str());
772     exit(1);
773     }
774     left_files.push_back(seg_file);
775     }
776    
777     if(optind >= argc)
778     {
779     print_usage();
780     return 1;
781     }
782    
783     string right_file_list = argv[optind++];
784     vector<string> right_file_names;
785     vector<FZPipe> right_files;
786     tokenize(right_file_list, ",", right_file_names);
787     for (size_t i = 0; i < right_file_names.size(); ++i)
788     {
789     FZPipe seg_file(right_file_names[i], unzcmd);
790     if (seg_file.file == NULL)
791     {
792     fprintf(stderr, "Error: cannot open %s for reading\n",
793     right_file_names[i].c_str());
794     exit(1);
795     }
796     right_files.push_back(seg_file);
797     }
798    
799     ifstream ref_stream(ref_fasta.c_str(), ifstream::in);
800    
801     FILE* splice_db = fopen(junctions_file_name.c_str(), "w");
802     if (splice_db == NULL)
803     {
804     fprintf(stderr, "Error: cannot open junctions file %s for writing\n",
805     junctions_file_name.c_str());
806     exit(1);
807     }
808 gpertea 154
809     FILE* fusion_db = fopen(fusions_file_name.c_str(), "w");
810     if (splice_db == NULL)
811     {
812     fprintf(stderr, "Error: cannot open fusions file %s for writing\n",
813     fusions_file_name.c_str());
814     exit(1);
815     }
816    
817 gpertea 29 closure_driver(left_files,
818     right_files,
819     ref_stream,
820 gpertea 154 splice_db,
821     fusion_db);
822 gpertea 29
823     return 0;
824     }