ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/closures.cpp
Revision: 245
Committed: Wed May 16 21:51:01 2012 UTC (12 years, 3 months ago) by gpertea
File size: 22092 byte(s)
Log Message:
merge-sync attempt

Line User Rev File contents
1 gpertea 29 /*
2     * closures.cpp
3     * TopHat
4     *
5     * Created by Cole Trapnell on 1/15/09.
6     * Copyright 2009 Cole Trapnell. All rights reserved.
7     *
8     */
9    
10     #ifdef HAVE_CONFIG_H
11     #include <config.h>
12     #endif
13    
14     #include <iostream>
15     #include <fstream>
16     #include <getopt.h>
17 gpertea 154 #include <seqan/sequence.h>
18     #include <seqan/file.h>
19    
20 gpertea 29 #include "bwt_map.h"
21     #include "inserts.h"
22     #include "closures.h"
23     #include "reads.h"
24     #include "tokenize.h"
25 gpertea 154 #include "fusions.h"
26 gpertea 29
27     using namespace seqan;
28     using namespace std;
29    
30    
31     bool possible_cotranscript(const BowtieHit& h1, const BowtieHit& h2, bool check_strand = true)
32     {
33     if (h1.insert_id() != h2.insert_id())
34     return false;
35 gpertea 245
36     JunctionSet junctions;
37     InsertAlignmentGrade grade(h1, h2, junctions);
38 gpertea 29 return (!grade.too_far && !grade.too_close && grade.opposite_strands);
39     }
40    
41    
42     void check_mates(const HitList& hits1_in_ref,
43     const HitList& hits2_in_ref,
44     vector<pair<size_t, size_t> >& happy_mates,
45     vector<size_t>& map1_singletons,
46     vector<size_t>& map2_singletons)
47     {
48     std::set<size_t> marked;
49     // TODO: if this shows up on the profile, replace it with a linear
50     // time algorithm. This one is 2*n*lg(n).
51     HitList::const_iterator last_good = hits2_in_ref.begin();
52    
53     for (size_t i = 0; i < hits1_in_ref.size(); ++i)
54     {
55     pair<HitList::const_iterator, HitList::const_iterator> range_pair;
56     range_pair = equal_range(last_good, hits2_in_ref.end(),
57     hits1_in_ref[i], hit_insert_id_lt);
58     bool found_hit = false;
59     if (range_pair.first != range_pair.second)
60     last_good = range_pair.first;
61     for (HitList::const_iterator f = range_pair.first;
62     f != range_pair.second;
63     ++f)
64     {
65     if (possible_cotranscript(hits1_in_ref[i], *f))
66     {
67     happy_mates.push_back(make_pair(i,f - hits2_in_ref.begin()));
68     marked.insert(f - hits2_in_ref.begin());
69     found_hit = true;
70     }
71     }
72     if (!found_hit)
73     map1_singletons.push_back(i);
74     }
75    
76     for (size_t i = 0; i < hits2_in_ref.size(); ++i)
77     {
78     if (marked.find(i) == marked.end())
79     {
80     map2_singletons.push_back(i);
81     }
82     }
83     }
84    
85 gpertea 154 void find_fusion_closure(HitsForRead& left_hits,
86     HitsForRead& right_hits,
87     std::set<Fusion>& fusions)
88     {
89     if (left_hits.hits.size() > 3 || right_hits.hits.size() > 3)
90     return;
91    
92     for (size_t left_index = 0; left_index < left_hits.hits.size(); ++left_index)
93     {
94     for (size_t right_index = 0; right_index < right_hits.hits.size(); ++right_index)
95     {
96     BowtieHit* leftHit = &left_hits.hits[left_index];
97     BowtieHit* rightHit = &right_hits.hits[right_index];
98    
99     uint32_t dir = FUSION_FF;
100     if (leftHit->antisense_align() != rightHit->antisense_align())
101     dir = FUSION_FF;
102     else if (!leftHit->antisense_align())
103     dir = FUSION_FR;
104     else
105     dir = FUSION_RF;
106    
107     if (dir == FUSION_FF && leftHit->antisense_align())
108     {
109     BowtieHit * tmp = leftHit;
110     leftHit = rightHit;
111     rightHit = tmp;
112     }
113    
114     uint32_t left, right;
115     if (dir == FUSION_FF || dir == FUSION_FR)
116     left = leftHit->right() - 4;
117     else
118     left = leftHit->left() + 4;
119    
120     if (dir == FUSION_FF || dir == FUSION_RF)
121     right = rightHit->left() + 4;
122     else
123     right = rightHit->right() - 4;
124    
125     if (leftHit->ref_id() == rightHit->ref_id() && dir == FUSION_FF)
126     {
127     int dist = 0;
128     dist = rightHit->left() - leftHit->right();
129    
130     if (dist >= 0 && dist <= (int)fusion_min_dist)
131     continue;
132     }
133    
134     uint32_t ref_id1 = leftHit->ref_id();
135     uint32_t ref_id2 = rightHit->ref_id();
136    
137     if (dir == FUSION_FR || dir == FUSION_RF)
138     {
139     if ((ref_id2 < ref_id1) || (ref_id1 == ref_id2 && left > right))
140     {
141     uint32_t temp = ref_id1;
142     ref_id1 = ref_id2;
143     ref_id2 = temp;
144    
145     temp = left;
146     left = right;
147     right = temp;
148     }
149     }
150    
151     fusions.insert(Fusion(ref_id1, ref_id2, left, right, dir));
152     }
153     }
154     }
155    
156 gpertea 29 bool prefer_shorter_pairs = true;
157    
158     typedef pair<InsertAlignmentGrade, vector<InsertAlignment> > BestPairingHits;
159    
160     template<typename Visitor>
161     void visit_best_pairing(HitsForRead& left_hit_group,
162     HitsForRead& right_hit_group,
163     Visitor& visitor)
164     {
165     BestPairingHits insert_best;
166    
167     for (size_t i = 0; i < left_hit_group.hits.size(); ++i)
168     {
169     BowtieHit& h1 = left_hit_group.hits[i];
170    
171     for (size_t j = 0; j < right_hit_group.hits.size(); j++)
172     {
173     BowtieHit& h2 = right_hit_group.hits[j];
174     if (h1.ref_id() != h2.ref_id())
175     continue;
176    
177     uint32_t refid = h1.ref_id();
178 gpertea 245
179     JunctionSet junctions;
180     InsertAlignmentGrade s(h1, h2, junctions);
181 gpertea 29
182     //pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
183     // = best_status_for_inserts[curr_left_obs_order];
184     InsertAlignmentGrade& current = insert_best.first;
185     // Is the new status better than the current best one?
186     if (current < s)
187     {
188     insert_best.second.clear();
189     current = s;
190     insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
191     }
192     else if (! (s < current))
193     {
194     if (prefer_shorter_pairs && current.num_mapped == 2)
195     {
196     pair<int, int> dc = pair_distances(*(insert_best.second[0].left_alignment), *(insert_best.second[0].right_alignment));
197     pair<int, int> ds = pair_distances(h1,h2);
198     if (ds.second < dc.second)
199     {
200     //chucked_for_shorter_pair += insert_best.second.size();
201     insert_best.second.clear();
202     current = s;
203     insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
204    
205     }
206     }
207     else
208     {
209     insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
210     }
211     }
212     }
213     }
214    
215     visitor.visit(insert_best);
216     }
217    
218     class CovMapIntronFinder
219     {
220     std::set<SpliceMotif> _tmp_hits;
221     vector<SpliceMotif> _hits;
222     RefSequenceTable::Sequence* _ref_seq;
223     public:
224     CovMapIntronFinder() : _ref_seq(NULL) {}
225     CovMapIntronFinder(RefSequenceTable::Sequence* ref_seq) :
226     _ref_seq(ref_seq),
227     _lms(vector<bool>(length(*ref_seq), false)),
228     _rms(vector<bool>(length(*ref_seq), false)){}
229    
230     void add_motifs_in_window(int left,
231     int right,
232     const string& left_motif,
233     const string& right_motif)
234     {
235     size_t l_start = max(0,left);
236     for (int i = l_start;
237     i < min(right, (int)length(*_ref_seq) - 2);
238     ++i)
239     {
240     seqan::Infix<RefSequenceTable::Sequence>::Type curr
241     = seqan::infix(*_ref_seq,i, i + 2);
242     if (curr == left_motif)
243     _lms[i] = true;
244     else if (curr == right_motif)
245     _rms[i] = true;
246     }
247     }
248    
249     void finalize()
250     {
251     int pos = 0;
252     for (vector<bool>::iterator itr = _lms.begin();
253     itr != _lms.end();
254     ++itr, ++pos)
255     {
256     if (*itr)
257     _hits.push_back(SpliceMotif(pos, true));
258     }
259    
260     pos = 0;
261     for (vector<bool>::iterator itr = _rms.begin();
262     itr != _rms.end();
263     ++itr, ++pos)
264     {
265     if (*itr)
266     _hits.push_back(SpliceMotif(pos, false));
267     }
268    
269     sort(_hits.begin(), _hits.end());
270     }
271    
272     size_t seq_len() const { return length(*_ref_seq); }
273     const vector<SpliceMotif>& hits() const { return _hits; }
274     vector<bool> _lms;
275     vector<bool> _rms;
276     };
277    
278     typedef CovMapIntronFinder CIF;
279    
280     struct RefCIF
281     {
282     RefCIF() : ref_seq(NULL) {}
283     RefCIF(const CIF& f, const CIF& r, RefSequenceTable::Sequence* s) :
284     fwd_cif(f), rev_cif(r), ref_seq(s) {}
285     CIF fwd_cif;
286     CIF rev_cif;
287     RefSequenceTable::Sequence* ref_seq;
288     };
289    
290     class CoverageMapVisitor
291     {
292     public:
293     map<uint32_t, RefCIF> finders;
294    
295     CoverageMapVisitor(istream& ref_stream,
296     RefSequenceTable& rt)
297     {
298    
299     while(ref_stream.good() &&
300     !ref_stream.eof())
301     {
302     RefSequenceTable::Sequence* ref_str = new RefSequenceTable::Sequence;
303     string name;
304     readMeta(ref_stream, name, Fasta());
305     string::size_type space_pos = name.find_first_of(" \t\r");
306     if (space_pos != string::npos)
307     {
308     name.resize(space_pos);
309     }
310     read(ref_stream, *ref_str, Fasta());
311    
312     uint32_t ref_id = rt.get_id(name, NULL, 0);
313     finders[ref_id] = RefCIF(CIF(ref_str), CIF(ref_str), ref_str);
314     }
315     }
316    
317     void visit(BestPairingHits& pairings)
318     {
319     if (!pairings.first.num_mapped == 2)
320     return;
321    
322     static string fwd_lm("GT");
323     static string fwd_rm("AG");
324     static string rev_lm("CT");
325     static string rev_rm("AC");
326    
327     for (size_t i = 0;
328     i < pairings.second.size();
329     ++i)
330     {
331    
332     InsertAlignment& al = pairings.second[i];
333    
334     BowtieHit& bh_left = *(al.left_alignment);
335     BowtieHit& bh_right = *(al.right_alignment);
336    
337     assert (bh_left.ref_id() == bh_right.ref_id());
338    
339     map<uint32_t, RefCIF >::iterator if_itr = finders.find(bh_left.ref_id());
340     assert (if_itr != finders.end());
341    
342     RefCIF& ref_finders = if_itr->second;
343    
344     pair<int, int> ds = pair_distances(bh_left, bh_right);
345    
346     int minor_hit_start, major_hit_start;
347     int minor_hit_end, major_hit_end;
348     if (bh_left.left() < bh_right.left())
349     {
350     minor_hit_start = (int)bh_left.left();
351     minor_hit_end = (int)bh_left.right();
352     major_hit_start = (int)bh_right.left();
353     major_hit_end = (int)bh_right.right();
354     }
355     else
356     {
357     minor_hit_start = (int)bh_right.left();
358     minor_hit_end = (int)bh_right.right();
359     major_hit_start = (int)bh_left.left();
360     major_hit_end = (int)bh_left.right();
361     }
362    
363     int inner_dist = major_hit_start - minor_hit_end;
364    
365     bool skip_fwd = false;
366     bool skip_rev = false;
367    
368     if (library_type == FR_FIRSTSTRAND)
369     {
370     if (bh_left.antisense_align()) skip_rev = true;
371     else skip_fwd = true;
372     }
373    
374     if (library_type == FR_SECONDSTRAND)
375     {
376     if (bh_left.antisense_align()) skip_fwd = true;
377     else skip_rev = true;
378     }
379    
380     if (inner_dist > inner_dist_mean + 2 * inner_dist_std_dev)
381     {
382     // Forward strand
383     if (!skip_fwd)
384     {
385     ref_finders.fwd_cif.add_motifs_in_window((int)bh_left.left() - 10,
386     bh_left.right() + 10,
387     fwd_lm,
388     fwd_rm);
389    
390     ref_finders.fwd_cif.add_motifs_in_window((int)bh_right.left() - 10,
391     bh_right.right() + 10,
392     fwd_lm,
393     fwd_rm);
394     }
395    
396     // Reverse strand
397     if (!skip_rev)
398     {
399     ref_finders.rev_cif.add_motifs_in_window((int)bh_left.left() - 10,
400     bh_left.right() + 10,
401     rev_lm,
402     rev_rm);
403    
404     ref_finders.rev_cif.add_motifs_in_window((int)bh_right.left() - 10,
405     bh_right.right() + 10,
406     rev_lm,
407     rev_rm);
408     }
409     }
410     }
411     }
412    
413     void finalize()
414     {
415     for (map<uint32_t, RefCIF >::iterator itr = finders.begin();
416     itr != finders.end();
417     ++itr)
418     {
419     itr->second.fwd_cif.finalize();
420     itr->second.rev_cif.finalize();
421     delete itr->second.ref_seq;
422     itr->second.ref_seq = NULL;
423     }
424     }
425     };
426    
427    
428     class JunctionMapVisitor
429     {
430     public:
431     typedef JunctionFinder<RefSequenceTable::Sequence, CIF> JF;
432    
433     struct JunctionTable
434     {
435     JunctionTable() : jf(NULL), possible_splices(NULL) {}
436     JunctionTable(ClosureJunctionSet* ps, JF* _jf, bool as)
437     : jf(_jf), possible_splices(ps), antisense(as) {}
438    
439     JF* jf;
440     ClosureJunctionSet* possible_splices;
441     bool antisense;
442     };
443    
444    
445     map<uint32_t, pair<JunctionTable, JunctionTable> > _finders;
446    
447     JunctionMapVisitor(ClosureJunctionSet& fwd_splices,
448     ClosureJunctionSet& rev_splices,
449     map<uint32_t, RefCIF >& finders)
450     {
451     static const uint32_t bowtie_padding = 5;
452     for (map<uint32_t, RefCIF >::iterator itr = finders.begin();
453     itr != finders.end();
454     ++itr)
455     {
456     JF* fwd_jf = new JF(itr->second.fwd_cif,
457     inner_dist_mean,
458     inner_dist_std_dev,
459     min_closure_intron_length,
460     min_closure_exon_length,
461     bowtie_padding,
462     island_extension);
463     JF* rev_jf = new JF(itr->second.rev_cif,
464     inner_dist_mean,
465     inner_dist_std_dev,
466     min_closure_intron_length,
467     min_closure_exon_length,
468     bowtie_padding,
469     island_extension);
470    
471     _finders[itr->first] = make_pair(JunctionTable(&fwd_splices, fwd_jf,false),
472     JunctionTable(&rev_splices, rev_jf, true));
473     }
474     }
475    
476    
477     void visit(BestPairingHits& pairings)
478     {
479     if (!pairings.first.num_mapped == 2)
480     return;
481     for (size_t i = 0;
482     i < pairings.second.size();
483     ++i)
484     {
485    
486     InsertAlignment& al = pairings.second[i];
487    
488     BowtieHit& bh_left = *(al.left_alignment);
489     BowtieHit& bh_right = *(al.right_alignment);
490    
491     assert (bh_left.ref_id() == bh_right.ref_id());
492    
493     map<uint32_t, pair<JunctionTable, JunctionTable> >::iterator if_itr = _finders.find(bh_left.ref_id());
494     assert (if_itr != _finders.end());
495    
496     JF& fwd_jf = *(if_itr->second.first.jf);
497     fwd_jf.possible_junctions(*(if_itr->second.first.possible_splices), bh_left, bh_right);
498    
499     JF& rev_jf = *(if_itr->second.second.jf);
500     rev_jf.possible_junctions(*(if_itr->second.second.possible_splices), bh_left, bh_right);
501     }
502     }
503     };
504    
505     void closure_driver(vector<FZPipe>& map1,
506     vector<FZPipe>& map2,
507     ifstream& ref_stream,
508 gpertea 154 FILE* juncs_file,
509     FILE* fusions_out)
510 gpertea 29 {
511     typedef RefSequenceTable::Sequence Reference;
512    
513     ReadTable it;
514     RefSequenceTable rt(true);
515 gpertea 154
516     BowtieHitFactory hit_factory(it, rt);
517    
518     std::set<Fusion> fusions;
519 gpertea 29
520     fprintf (stderr, "Finding near-covered motifs...");
521     CoverageMapVisitor cov_map_visitor(ref_stream, rt);
522     uint32_t coverage_attempts = 0;
523    
524     assert(map1.size() == map2.size());
525     for (size_t num = 0; num < map1.size(); ++num)
526     {
527     HitStream left_hs(map1[num].file, &hit_factory, false, true, false);
528     HitStream right_hs(map2[num].file, &hit_factory, false, true, false);
529    
530     HitsForRead curr_left_hit_group;
531     HitsForRead curr_right_hit_group;
532    
533     left_hs.next_read_hits(curr_left_hit_group);
534     right_hs.next_read_hits(curr_right_hit_group);
535    
536     uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
537     uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
538    
539 gpertea 135 while(curr_left_obs_order != VMAXINT32 &&
540     curr_right_obs_order != VMAXINT32)
541 gpertea 29 {
542     while (curr_left_obs_order < curr_right_obs_order&&
543 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
544 gpertea 29 {
545     // Get hit group
546    
547     left_hs.next_read_hits(curr_left_hit_group);
548     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
549     }
550    
551     while (curr_left_obs_order > curr_right_obs_order &&
552 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
553 gpertea 29 {
554     // Get hit group
555    
556     right_hs.next_read_hits(curr_right_hit_group);
557     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
558     }
559    
560     while (curr_left_obs_order == curr_right_obs_order &&
561 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
562 gpertea 154 {
563     if (num == 0)
564     find_fusion_closure(curr_left_hit_group, curr_right_hit_group, fusions);
565    
566     if (coverage_attempts++ % 10000 == 0)
567     fprintf (stderr, "Adding covered motifs from pair %d\n", coverage_attempts);
568    
569 gpertea 29 visit_best_pairing(curr_left_hit_group, curr_right_hit_group, cov_map_visitor);
570    
571     left_hs.next_read_hits(curr_left_hit_group);
572     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
573    
574     right_hs.next_read_hits(curr_right_hit_group);
575     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
576     }
577     }
578     }
579    
580     cov_map_visitor.finalize();
581     fprintf (stderr, "done\n");
582    
583     ClosureJunctionSet fwd_splices;
584     ClosureJunctionSet rev_splices;
585    
586     JunctionMapVisitor junc_map_visitor(fwd_splices, rev_splices, cov_map_visitor.finders);
587     fprintf (stderr, "Searching for closures...");
588     uint32_t closure_attempts = 0;
589    
590     for (size_t num = 0; num < map1.size(); ++num)
591     {
592     map1[num].rewind();
593     map2[num].rewind();
594    
595     HitStream left_hs = HitStream(map1[num].file, &hit_factory, false, true, false);
596     HitStream right_hs = HitStream(map2[num].file, &hit_factory, false, true, false);
597    
598     HitsForRead curr_left_hit_group;
599     HitsForRead curr_right_hit_group;
600    
601     left_hs.next_read_hits(curr_left_hit_group);
602     right_hs.next_read_hits(curr_right_hit_group);
603    
604     uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
605     uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
606    
607 gpertea 135 while(curr_left_obs_order != VMAXINT32 &&
608     curr_right_obs_order != VMAXINT32)
609 gpertea 29 {
610     while (curr_left_obs_order < curr_right_obs_order &&
611 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
612 gpertea 29 {
613     // Get hit group
614    
615     left_hs.next_read_hits(curr_left_hit_group);
616     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
617     }
618    
619     while (curr_left_obs_order > curr_right_obs_order &&
620 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
621 gpertea 29 {
622     // Get hit group
623    
624     right_hs.next_read_hits(curr_right_hit_group);
625     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
626     }
627    
628     while (curr_left_obs_order == curr_right_obs_order &&
629 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
630 gpertea 29 {
631 gpertea 154 if (closure_attempts++ % 10000 == 0)
632     fprintf (stderr, "Trying to close pair %d\n", closure_attempts);
633    
634 gpertea 29 visit_best_pairing(curr_left_hit_group, curr_right_hit_group, junc_map_visitor);
635     left_hs.next_read_hits(curr_left_hit_group);
636     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
637    
638     right_hs.next_read_hits(curr_right_hit_group);
639     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
640     }
641     }
642     }
643    
644     for (size_t num = 0; num < map1.size(); ++num)
645     {
646     map1[num].close();
647     map2[num].close();
648     }
649    
650     fprintf(stderr, "%lu Forward strand splices\n", fwd_splices.size());
651     fprintf(stderr, "%lu Reverse strand splices\n", rev_splices.size());
652    
653     fprintf (stderr, "done\n");
654     uint32_t num_potential_splices = 0;
655     fprintf (stderr, "Reporting possible junctions...");
656     map<uint32_t, pair<JunctionMapVisitor::JunctionTable, JunctionMapVisitor::JunctionTable> >::iterator f_itr;
657     f_itr = junc_map_visitor._finders.begin();
658    
659     ClosureJunctionSet::iterator j_itr;
660     j_itr = fwd_splices.begin();
661     while (j_itr != fwd_splices.end())
662     {
663     fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
664     rt.get_name(j_itr->refid),
665     j_itr->left,j_itr->right,'+');
666     ++num_potential_splices;
667     ++j_itr;
668     }
669    
670     j_itr = rev_splices.begin();
671     while (j_itr != rev_splices.end())
672     {
673     fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
674     rt.get_name(j_itr->refid),
675     j_itr->left,j_itr->right,'-');
676     ++num_potential_splices;
677     ++j_itr;
678     }
679    
680     //accept_all_best_hits(best_status_for_inserts);
681     fprintf(stderr, "done\n");
682     fprintf(stderr, "Searched for closures between %d pairs\n", searched);
683     fprintf(stderr, "Successfully closed %d pairs\n", closed);
684    
685     fprintf(stderr, "Found %d total possible splices\n", num_potential_splices);
686 gpertea 154
687     // daehwan
688     #if 0
689     fprintf (stderr, "Reporting potential fusions...\n");
690     if(fusions_out){
691     for(std::set<Fusion>::iterator itr = fusions.begin(); itr != fusions.end(); ++itr){
692     const char* ref_name1 = rt.get_name(itr->refid1);
693     const char* ref_name2 = rt.get_name(itr->refid2);
694    
695     const char* dir = "";
696     if (itr->dir == FUSION_FR)
697     dir = "fr";
698     else if(itr->dir == FUSION_RF)
699     dir = "rf";
700     else
701     dir = "ff";
702    
703     fprintf(fusions_out,
704     "%s\t%d\t%s\t%d\t%s\n",
705     ref_name1,
706     itr->left,
707     ref_name2,
708     itr->right,
709     dir);
710     }
711     fclose(fusions_out);
712     }else{
713     fprintf(stderr, "Failed to open fusions file for writing\n");
714     }
715     #endif
716 gpertea 29 }
717    
718     void print_usage()
719     {
720     fprintf(stderr, "Usage: closure_juncs <closure.juncs> <ref.fa> <left_map.bwtout> <right_map.bwtout>\n");
721     }
722    
723     int main(int argc, char** argv)
724     {
725     fprintf(stderr, "closure_juncs v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION);
726     fprintf(stderr, "---------------------------\n");
727    
728     int parse_ret = parse_options(argc, argv, print_usage);
729     if (parse_ret)
730     return parse_ret;
731    
732     if(optind >= argc)
733     {
734     print_usage();
735     return 1;
736     }
737    
738     string junctions_file_name = argv[optind++];
739    
740     if(optind >= argc)
741     {
742     print_usage();
743     return 1;
744     }
745 gpertea 154
746     string fusions_file_name = argv[optind++];
747 gpertea 29
748 gpertea 154 if(optind >= argc)
749     {
750     print_usage();
751     return 1;
752     }
753    
754 gpertea 29 string ref_fasta = argv[optind++];
755 gpertea 154
756 gpertea 29 if(optind >= argc)
757     {
758     print_usage();
759     return 1;
760     }
761    
762     string left_file_list = argv[optind++];
763     vector<string> left_file_names;
764     vector<FZPipe> left_files;
765     tokenize(left_file_list, ",", left_file_names);
766    
767     string unzcmd = getUnpackCmd(left_file_names[0], false);
768     for (size_t i = 0; i < left_file_names.size(); ++i)
769     {
770     FZPipe seg_file(left_file_names[i], unzcmd);
771     if (seg_file.file == NULL)
772     {
773     fprintf(stderr, "Error: cannot open file %s for reading\n",
774     left_file_names[i].c_str());
775     exit(1);
776     }
777     left_files.push_back(seg_file);
778     }
779    
780     if(optind >= argc)
781     {
782     print_usage();
783     return 1;
784     }
785    
786     string right_file_list = argv[optind++];
787     vector<string> right_file_names;
788     vector<FZPipe> right_files;
789     tokenize(right_file_list, ",", right_file_names);
790     for (size_t i = 0; i < right_file_names.size(); ++i)
791     {
792     FZPipe seg_file(right_file_names[i], unzcmd);
793     if (seg_file.file == NULL)
794     {
795     fprintf(stderr, "Error: cannot open %s for reading\n",
796     right_file_names[i].c_str());
797     exit(1);
798     }
799     right_files.push_back(seg_file);
800     }
801    
802     ifstream ref_stream(ref_fasta.c_str(), ifstream::in);
803    
804     FILE* splice_db = fopen(junctions_file_name.c_str(), "w");
805     if (splice_db == NULL)
806     {
807     fprintf(stderr, "Error: cannot open junctions file %s for writing\n",
808     junctions_file_name.c_str());
809     exit(1);
810     }
811 gpertea 154
812     FILE* fusion_db = fopen(fusions_file_name.c_str(), "w");
813     if (splice_db == NULL)
814     {
815     fprintf(stderr, "Error: cannot open fusions file %s for writing\n",
816     fusions_file_name.c_str());
817     exit(1);
818     }
819    
820 gpertea 29 closure_driver(left_files,
821     right_files,
822     ref_stream,
823 gpertea 154 splice_db,
824     fusion_db);
825 gpertea 29
826     return 0;
827     }