ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/closures.cpp
(Generate patch)
# Line 14 | Line 14
14   #include <iostream>
15   #include <fstream>
16   #include <getopt.h>
17 + #include <seqan/sequence.h>
18 + #include <seqan/file.h>
19 +
20   #include "bwt_map.h"
21   #include "inserts.h"
22   #include "closures.h"
23   #include "reads.h"
24   #include "tokenize.h"
25 < #include <seqan/sequence.h>
23 < #include <seqan/file.h>
25 > #include "fusions.h"
26  
27   using namespace seqan;
28   using namespace std;
# Line 84 | Line 86
86      }  
87   }
88  
89 + void find_fusion_closure(HitsForRead& left_hits,
90 +                         HitsForRead& right_hits,
91 +                         std::set<Fusion>& fusions)
92 + {
93 +  if (left_hits.hits.size() > 3 || right_hits.hits.size() > 3)
94 +    return;
95 +
96 +  for (size_t left_index = 0; left_index < left_hits.hits.size(); ++left_index)
97 +    {
98 +      for (size_t right_index = 0; right_index < right_hits.hits.size(); ++right_index)
99 +        {
100 +          BowtieHit* leftHit = &left_hits.hits[left_index];
101 +          BowtieHit* rightHit = &right_hits.hits[right_index];
102 +
103 +          uint32_t dir = FUSION_FF;
104 +          if (leftHit->antisense_align() != rightHit->antisense_align())
105 +            dir = FUSION_FF;
106 +          else if (!leftHit->antisense_align())
107 +            dir = FUSION_FR;
108 +          else
109 +            dir = FUSION_RF;
110 +
111 +          if (dir == FUSION_FF && leftHit->antisense_align())
112 +            {
113 +              BowtieHit * tmp = leftHit;
114 +              leftHit = rightHit;
115 +              rightHit = tmp;
116 +            }
117 +          
118 +          uint32_t left, right;
119 +          if (dir == FUSION_FF || dir == FUSION_FR)
120 +            left = leftHit->right() - 4;
121 +          else
122 +            left = leftHit->left() + 4;
123 +          
124 +          if (dir == FUSION_FF || dir == FUSION_RF)
125 +            right = rightHit->left() + 4;
126 +          else
127 +            right = rightHit->right() - 4;
128 +
129 +          if (leftHit->ref_id() == rightHit->ref_id() && dir == FUSION_FF)
130 +            {
131 +              int dist = 0;
132 +              dist = rightHit->left() - leftHit->right();
133 +              
134 +              if (dist >= 0 && dist <= (int)fusion_min_dist)
135 +                continue;
136 +            }
137 +          
138 +          uint32_t ref_id1 = leftHit->ref_id();
139 +          uint32_t ref_id2 = rightHit->ref_id();
140 +          
141 +          if (dir == FUSION_FR || dir == FUSION_RF)
142 +            {
143 +              if ((ref_id2 < ref_id1) || (ref_id1 == ref_id2 && left > right))
144 +                {
145 +                  uint32_t temp = ref_id1;
146 +                  ref_id1 = ref_id2;
147 +                  ref_id2 = temp;
148 +                  
149 +                  temp = left;
150 +                  left = right;
151 +                  right = temp;
152 +                }
153 +            }
154 +
155 +          fusions.insert(Fusion(ref_id1, ref_id2, left, right, dir));
156 +        }
157 +    }
158 + }
159 +
160   bool prefer_shorter_pairs = true;
161  
162   typedef pair<InsertAlignmentGrade, vector<InsertAlignment> > BestPairingHits;
# Line 440 | Line 513
513   void closure_driver(vector<FZPipe>& map1,
514                      vector<FZPipe>& map2,
515                      ifstream& ref_stream,
516 <                    FILE* juncs_file)
516 >                    FILE* juncs_file,
517 >                    FILE* fusions_out)
518   {
519    typedef RefSequenceTable::Sequence Reference;
520    
521    ReadTable it;
522    RefSequenceTable rt(true);
523 <  
524 <  BowtieHitFactory hit_factory(it,rt);
523 >
524 >  BowtieHitFactory hit_factory(it, rt);
525 >
526 >  std::set<Fusion> fusions;
527    
528    fprintf (stderr, "Finding near-covered motifs...");
529    CoverageMapVisitor cov_map_visitor(ref_stream, rt);
# Line 491 | Line 567
567            
568            while (curr_left_obs_order == curr_right_obs_order &&
569                   curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
570 <            {                  
571 <              if (coverage_attempts++ % 1000 == 0)
572 <                fprintf (stderr, "Adding covered motifs from pair %d\n", coverage_attempts);
570 >            {
571 >              if (num == 0)
572 >                find_fusion_closure(curr_left_hit_group, curr_right_hit_group, fusions);
573 >              
574 >              if (coverage_attempts++ % 10000 == 0)
575 >                fprintf (stderr, "Adding covered motifs from pair %d\n", coverage_attempts);
576 >
577                visit_best_pairing(curr_left_hit_group, curr_right_hit_group, cov_map_visitor);
578                
579                left_hs.next_read_hits(curr_left_hit_group);
# Line 556 | Line 636
636            while (curr_left_obs_order == curr_right_obs_order &&
637                   curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
638              {  
639 <              if (closure_attempts++ % 1000 == 0)
640 <                fprintf (stderr, "Trying to close pair %d\n", closure_attempts);
639 >              if (closure_attempts++ % 10000 == 0)
640 >                fprintf (stderr, "Trying to close pair %d\n", closure_attempts);
641 >
642                visit_best_pairing(curr_left_hit_group, curr_right_hit_group, junc_map_visitor);
643                left_hs.next_read_hits(curr_left_hit_group);
644                curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
# Line 610 | Line 691
691    fprintf(stderr, "Successfully closed %d pairs\n", closed);
692    
693    fprintf(stderr, "Found %d total possible splices\n", num_potential_splices);
694 +
695 +  // daehwan
696 + #if 0
697 +  fprintf (stderr, "Reporting potential fusions...\n");
698 +  if(fusions_out){
699 +    for(std::set<Fusion>::iterator itr = fusions.begin(); itr != fusions.end(); ++itr){
700 +      const char* ref_name1 = rt.get_name(itr->refid1);
701 +      const char* ref_name2 = rt.get_name(itr->refid2);
702 +      
703 +      const char* dir = "";
704 +      if (itr->dir == FUSION_FR)
705 +        dir = "fr";
706 +      else if(itr->dir == FUSION_RF)
707 +        dir = "rf";
708 +      else
709 +        dir = "ff";
710 +      
711 +      fprintf(fusions_out,
712 +              "%s\t%d\t%s\t%d\t%s\n",
713 +              ref_name1,
714 +              itr->left,
715 +              ref_name2,
716 +              itr->right,
717 +              dir);
718 +    }
719 +    fclose(fusions_out);
720 +  }else{
721 +    fprintf(stderr, "Failed to open fusions file for writing\n");
722 +  }
723 + #endif
724   }
725  
726   void print_usage()
# Line 639 | Line 750
750        print_usage();
751        return 1;
752      }
753 +
754 +  string fusions_file_name = argv[optind++];
755    
756 +  if(optind >= argc)
757 +    {
758 +      print_usage();
759 +      return 1;
760 +    }
761 +
762    string ref_fasta = argv[optind++];
763 <  
763 >
764    if(optind >= argc)
765      {
766        print_usage();
# Line 697 | Line 816
816                junctions_file_name.c_str());
817        exit(1);
818      }
819 <  
819 >
820 >  FILE* fusion_db = fopen(fusions_file_name.c_str(), "w");
821 >  if (splice_db == NULL)
822 >    {
823 >      fprintf(stderr, "Error: cannot open fusions file %s for writing\n",
824 >              fusions_file_name.c_str());
825 >      exit(1);
826 >    }
827 >
828    closure_driver(left_files,
829                   right_files,
830                   ref_stream,
831 <                 splice_db);
831 >                 splice_db,
832 >                 fusion_db);
833    
834    return 0;
835   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines