ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/tophat_reports.cpp
(Generate patch)
# Line 44 | Line 44
44   #include "wiggles.h"
45   #include "tokenize.h"
46   #include "reads.h"
47 + #include "coverage.h"
48  
49  
50   #include "inserts.h"
# Line 52 | Line 53
53   using namespace seqan;
54   using std::set;
55  
56 + static const JunctionSet empty_junctions;
57 + static const InsertionSet empty_insertions;
58 + static const DeletionSet empty_deletions;
59 + static const FusionSet empty_fusions;
60 + static const Coverage empty_coverage;
61 +
62   // daehwan - this is redundancy, which should be removed.
63   void get_seqs(istream& ref_stream,
64                RefSequenceTable& rt,
# Line 75 | Line 82
82  
83   struct cmp_read_alignment
84   {
78  cmp_read_alignment(const JunctionSet& gtf_juncs) :
79    _gtf_juncs(&gtf_juncs) {}
80    
85    bool operator() (const BowtieHit& l, const BowtieHit& r) const
86    {
87 <    FragmentAlignmentGrade gl(l, *_gtf_juncs);
84 <    FragmentAlignmentGrade gr(r, *_gtf_juncs);
85 <    bool better = gr < gl;
86 <    bool worse = gl < gr;
87 <
88 <    if (better && !worse)
89 <      return true;
90 <    else        
91 <      return false;
87 >    return l.alignment_score() > r.alignment_score();
88    }
93
94  const JunctionSet* _gtf_juncs;
89   };
90  
91   void read_best_alignments(const HitsForRead& hits_for_read,
98                          FragmentAlignmentGrade& best_grade,
92                            HitsForRead& best_hits,
93 <                          const JunctionSet& gtf_junctions)
93 >                          const JunctionSet& gtf_junctions,
94 >                          const JunctionSet& junctions = empty_junctions,
95 >                          const InsertionSet& insertions = empty_insertions,
96 >                          const DeletionSet& deletions = empty_deletions,
97 >                          const FusionSet& fusions = empty_fusions,
98 >                          const Coverage& coverage = empty_coverage,
99 >                          bool final_report = false)
100   {
101    const vector<BowtieHit>& hits = hits_for_read.hits;
102    for (size_t i = 0; i < hits.size(); ++i)
# Line 105 | Line 104
104        if (hits[i].edit_dist() > max_read_mismatches)
105          continue;
106  
107 <      if (report_secondary_alignments)
107 >      BowtieHit hit = hits[i];
108 >      AlignStatus align_status(hit, gtf_junctions,
109 >                               junctions, insertions, deletions, fusions, coverage);
110 >      hit.alignment_score(align_status._alignment_score);
111 >
112 >      if (report_secondary_alignments || !final_report)
113          {
114 <          best_hits.hits.push_back(hits[i]);
114 >          best_hits.hits.push_back(hit);
115          }
116        else
117          {
114          FragmentAlignmentGrade g(hits[i], gtf_junctions);
115          
118            // Is the new status better than the current best one?
119 <          if (best_grade < g)
119 >          if (best_hits.hits.size() == 0 || cmp_read_alignment()(hit, best_hits.hits[0]))
120              {
121                best_hits.hits.clear();
122 <              best_grade = g;
121 <              best_hits.hits.push_back(hits[i]);
122 >              best_hits.hits.push_back(hit);
123              }
124 <          else if (!(g < best_grade)) // is it just as good?
124 >          else if (!cmp_read_alignment()(best_hits.hits[0], hit)) // is it just as good?
125              {
126 <              best_grade.num_alignments++;
126 <              best_hits.hits.push_back(hits[i]);
126 >              best_hits.hits.push_back(hit);
127              }
128          }
129      }
130  
131 <  if (report_secondary_alignments && best_hits.hits.size() > 0)
131 >  if ((report_secondary_alignments || !final_report) && best_hits.hits.size() > 0)
132 >    {
133 >      sort(best_hits.hits.begin(), best_hits.hits.end(), cmp_read_alignment());
134 >    }
135 >
136 >  if (final_report)
137      {
138 <      cmp_read_alignment cmp(gtf_junctions);
139 <      sort(best_hits.hits.begin(), best_hits.hits.end(), cmp);
135 <      best_grade = FragmentAlignmentGrade(best_hits.hits[0], gtf_junctions);
136 <      best_grade.num_alignments = best_hits.hits.size();
138 >      if (best_hits.hits.size() > max_multihits)
139 >        best_hits.hits.erase(best_hits.hits.begin() + max_multihits, best_hits.hits.end());
140      }
141   }
142  
143 < void set_insert_alignment_grade(const BowtieHit& lh, const BowtieHit& rh, InsertAlignmentGrade& grade)
143 > bool set_insert_alignment_grade(const BowtieHit& lh, const BowtieHit& rh, InsertAlignmentGrade& grade)
144   {
145    // max mate inner distance (genomic)
146    int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev;
# Line 145 | Line 148
148      max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev;
149    
150    bool fusion = false;
151 <  if (fusion_search)
151 >  bool left_fusion = lh.fusion_opcode() != FUSION_NOTHING;
152 >  bool right_fusion = rh.fusion_opcode() != FUSION_NOTHING;
153 >  if (left_fusion && right_fusion)
154 >    return false;
155 >  
156 >  fusion = left_fusion || right_fusion;
157 >  if (!fusion && lh.ref_id() != rh.ref_id())
158 >    fusion = true;
159 >  
160 >  if (!fusion && lh.ref_id() == rh.ref_id())
161      {
162 <      bool left_fusion = lh.fusion_opcode() != FUSION_NOTHING;
151 <      bool right_fusion = rh.fusion_opcode() != FUSION_NOTHING;
152 <      if (left_fusion && right_fusion)
153 <        return;
154 <      
155 <      fusion = left_fusion || right_fusion;
156 <      if (!fusion && lh.ref_id() != rh.ref_id())
162 >      if (lh.antisense_align() == rh.antisense_align())
163          fusion = true;
164 <      
159 <      if (!fusion && lh.ref_id() == rh.ref_id())
164 >      else
165          {
166 <          if (lh.antisense_align() == rh.antisense_align())
167 <            fusion = true;
166 >          int inter_dist = 0;
167 >          if (lh.antisense_align())
168 >            inter_dist = lh.left() - rh.right();
169            else
170 <            {
171 <              int inter_dist = 0;
172 <              if (lh.antisense_align())
173 <                inter_dist = lh.left() - rh.right();
168 <              else
169 <                inter_dist = rh.left() - lh.right();
170 <              
171 <              if (inter_dist < -(int)max_insertion_length || inter_dist > (int)fusion_min_dist)
172 <                fusion = true;
173 <            }
170 >            inter_dist = rh.left() - lh.right();
171 >          
172 >          if (inter_dist < -(int)max_insertion_length || inter_dist > (int)fusion_min_dist)
173 >            fusion = true;
174          }
175      }
176  
177    grade = InsertAlignmentGrade(lh, rh, min_mate_inner_dist, max_mate_inner_dist, fusion);
178 +  
179 +  return true;
180   }
181  
182   struct cmp_pair_alignment
# Line 203 | Line 205
205                            const HitsForRead& right_hits,
206                            InsertAlignmentGrade& best_grade,
207                            vector<pair<BowtieHit, BowtieHit> >& best_hits,
208 <                          const JunctionSet& gtf_junctions)
208 >                          const JunctionSet& gtf_junctions,
209 >                          const JunctionSet& junctions = empty_junctions,
210 >                          const InsertionSet& insertions = empty_insertions,
211 >                          const DeletionSet& deletions = empty_deletions,
212 >                          const FusionSet& fusions = empty_fusions,
213 >                          const Coverage& coverage = empty_coverage,
214 >                          bool final_report = false)
215   {
216    const vector<BowtieHit>& left = left_hits.hits;
217    const vector<BowtieHit>& right = right_hits.hits;
218    
219    for (size_t i = 0; i < left.size(); ++i)
220      {
221 <      const BowtieHit& lh = left[i];
222 <      if (lh.edit_dist() > max_read_mismatches) continue;
221 >      if (left[i].edit_dist() > max_read_mismatches) continue;
222 >      
223 >      BowtieHit lh = left[i];
224 >      AlignStatus align_status(lh, gtf_junctions,
225 >                               junctions, insertions, deletions, fusions, coverage);
226 >      lh.alignment_score(align_status._alignment_score);
227 >
228        for (size_t j = 0; j < right.size(); ++j)
229          {
230 <          const BowtieHit& rh = right[j];
231 <          if (rh.edit_dist() > max_read_mismatches) continue;
230 >          if (right[j].edit_dist() > max_read_mismatches) continue;
231 >          
232 >          BowtieHit rh = right[j];
233 >          AlignStatus align_status(rh, gtf_junctions,
234 >                                   junctions, insertions, deletions, fusions, coverage);
235 >          rh.alignment_score(align_status._alignment_score);
236 >          InsertAlignmentGrade g;
237 >          bool allowed = set_insert_alignment_grade(lh, rh, g);
238  
239 <          InsertAlignmentGrade g; set_insert_alignment_grade(lh, rh, g);
239 >          if (!allowed) continue;
240 >          if (!fusion_search && !report_discordant_pair_alignments && g.is_fusion()) continue;
241  
242 <          if (report_secondary_alignments)
242 >          if (report_secondary_alignments || !final_report)
243              {
244                best_hits.push_back(make_pair(lh, rh));
245              }
# Line 235 | Line 255
255                else if (!(g < best_grade))
256                  {
257                    best_hits.push_back(make_pair(lh, rh));
238                  best_grade.num_alignments++;
258                  }
259              }
260          }
261      }
262  
263 <    if (report_secondary_alignments && best_hits.size() > 0)
263 >  if ((report_secondary_alignments || !final_report) && best_hits.size() > 0)
264      {
265        cmp_pair_alignment cmp(gtf_junctions);
266        sort(best_hits.begin(), best_hits.end(), cmp);
267        set_insert_alignment_grade(best_hits[0].first, best_hits[0].second, best_grade);
249      best_grade.num_alignments = best_hits.size();
268      }
269 +
270 +  if (final_report)
271 +    {
272 +      if (best_hits.size() > max_multihits)
273 +        best_hits.erase(best_hits.begin() + max_multihits, best_hits.end());
274 +    }
275 +  
276 +  best_grade.num_alignments = best_hits.size();
277   }
278  
279   enum FragmentType {FRAG_UNPAIRED, FRAG_LEFT, FRAG_RIGHT};
# Line 333 | Line 359
359                          const BowtieHit& bh,
360                          const char* bwt_buf,
361                          const char* read_alt_name,
336                        const FragmentAlignmentGrade& grade,
362                          FragmentType insert_side,
363                          int num_hits,
364                          const BowtieHit* next_hit,
# Line 346 | Line 371
371    tokenize(bwt_buf, "\t", sam_toks);
372  
373    string ref_name = sam_toks[2], ref_name2 = "";
374 <  char rebuf2[2048] = {0};
350 <  char cigar1[256] = {0}, cigar2[256] = {0};
374 >  char cigar1[1024] = {0}, cigar2[1024] = {0};
375    string left_seq, right_seq, left_qual, right_qual;
376    int left = -1, left1 = -1, left2 = -1;
377    bool fusion_alignment = false;
378    size_t XF_index = 0;
379    for (size_t i = 11; i < sam_toks.size(); ++i)
380      {
381 <      const string& tok = sam_toks[i];
381 >      string& tok = sam_toks[i];
382        if (strncmp(tok.c_str(), "XF", 2) == 0)
383          {
384            fusion_alignment = true;
# Line 376 | Line 400
400            
401            break;
402          }
403 +
404 +      else if (strncmp(tok.c_str(), "AS", 2) == 0)
405 +        {
406 +          char AS_score[128] = {0};
407 +          sprintf(AS_score, "AS:i:%d", bh.alignment_score());
408 +          tok = AS_score;
409 +        }
410      }
411    
412    string qname(read_alt_name);
# Line 402 | Line 433
433    
434    int gpos=isdigit(sam_toks[3][0]) ? atoi(sam_toks[3].c_str()) : 0;
435    int mapQ=255;
436 <  if (grade.num_alignments > 1)  {
437 <    double err_prob = 1 - (1.0 / grade.num_alignments);
436 >  if (num_hits > 1)  {
437 >    double err_prob = 1 - (1.0 / num_hits);
438      mapQ = (int)(-10.0 * log(err_prob) / log(10.0));
439    }
440    int tlen =atoi(sam_toks[8].c_str()); //TLEN
# Line 474 | Line 505
505      flag |= 0x100;
506  
507    string ref_name = sam_toks[2], ref_name2 = "";
508 <  char rebuf2[2048] = {0};
478 <  char cigar1[256] = {0}, cigar2[256] = {0};
508 >  char cigar1[1024] = {0}, cigar2[1024] = {0};
509    string left_seq, right_seq, left_qual, right_qual;
510    int left = -1, left1 = -1, left2 = -1;
511    bool fusion_alignment = false;
512    size_t XF_tok_idx = 11;
513    for (; XF_tok_idx < sam_toks.size(); ++XF_tok_idx)
514      {
515 <      const string& tok = sam_toks[XF_tok_idx];
515 >      string& tok = sam_toks[XF_tok_idx];
516        if (strncmp(tok.c_str(), "XF", 2) == 0)
517          {
518            fusion_alignment = true;
# Line 503 | Line 533
533            
534            break;
535          }
536 +
537 +      else if (strncmp(tok.c_str(), "AS", 2) == 0)
538 +        {
539 +          char AS_score[128] = {0};
540 +          sprintf(AS_score, "AS:i:%d", bh.alignment_score());
541 +          tok = AS_score;
542 +        }
543      }
544    
545    int gpos=isdigit(sam_toks[3][0]) ? atoi(sam_toks[3].c_str()) : 0;
# Line 599 | Line 636
636  
637   void print_sam_for_single(const RefSequenceTable& rt,
638                            const HitsForRead& hits,
602                          const FragmentAlignmentGrade& grade,
639                            FragmentType frag_type,
640                            Read& read,
641                            GBamWriter& bam_writer,
# Line 633 | Line 669
669                             bh,
670                             bh.hitfile_rec().c_str(),
671                             read.alt_name.c_str(),
636                           grade,
672                             frag_type,
673                             hits.hits.size(),
674                             (i < hits.hits.size()-1) ? &(hits.hits[index_vector[i+1]]) : NULL,
# Line 726 | Line 761
761                                       InsertionSet& insertions,
762                                       DeletionSet& deletions)
763   {
764 <        for (size_t i = 0; i < hits.hits.size(); ++i)
764 >  for (size_t i = 0; i < hits.hits.size(); ++i)
765 >    {
766 >      const BowtieHit& bh = hits.hits[i];
767 >      insertions_from_alignment(bh, insertions);
768 >      deletions_from_alignment(bh, deletions);
769 >    }
770 > }
771 >
772 > void update_coverage(const HitsForRead& hits,
773 >                     Coverage& coverage)
774 > {
775 >  for (size_t i = 0; i < hits.hits.size(); ++i)
776 >    {
777 >      const BowtieHit& hit = hits.hits[i];
778 >      const vector<CigarOp>& cigar = hit.cigar();
779 >      unsigned int positionInGenome = hit.left();
780 >      RefID ref_id = hit.ref_id();
781 >      
782 >      for(size_t c = 0; c < cigar.size(); ++c)
783          {
784 <                const BowtieHit& bh = hits.hits[i];
785 <                insertions_from_alignment(bh, insertions);
786 <                deletions_from_alignment(bh, deletions);
784 >          int opcode = cigar[c].opcode;
785 >          int length = cigar[c].length;
786 >          switch(opcode)
787 >            {
788 >            case REF_SKIP:
789 >            case MATCH:
790 >            case DEL:
791 >              if (opcode == MATCH)
792 >                coverage.add_coverage(ref_id, positionInGenome, length);
793 >      
794 >              positionInGenome += length;
795 >              break;
796 >            case rEF_SKIP:
797 >            case mATCH:
798 >            case dEL:
799 >              positionInGenome -= length;
800 >
801 >              if (opcode == mATCH)
802 >                coverage.add_coverage(ref_id, positionInGenome + 1, length);
803 >              break;
804 >            case FUSION_FF:
805 >            case FUSION_FR:
806 >            case FUSION_RF:
807 >            case FUSION_RR:
808 >                positionInGenome = length;
809 >                ref_id = hit.ref_id2();
810 >              break;
811 >            default:
812 >              break;
813 >            }  
814          }
815 +    }
816   }
817  
818  
738 FusionSet empty_fusions;
819   void update_fusions(const HitsForRead& hits,
820                      RefSequenceTable& rt,
821                      FusionSet& fusions,
822 <                    FusionSet& fusions_ref = empty_fusions)
822 >                    const FusionSet& fusions_ref = empty_fusions)
823   {
824    if (hits.hits.size() > fusion_multireads)
825      return;
# Line 752 | Line 832
832        if (bh.edit_dist() > fusion_read_mismatches)
833          continue;
834  
755      // daehwan
756 #if 0
757      vector<Fusion> new_fusions;
758      fusions_from_spliced_hit(bh, new_fusions);
759      
760      for(size_t i = 0; i < new_fusions.size(); ++i)
761        {
762          Fusion fusion = new_fusions[i];
763          if (fusion.left == 110343414 && fusion.right == 80211173 && hits.hits.size() > 1)
764            {
765              for (size_t t = 0; t < hits.hits.size(); ++t)
766                {
767                  const BowtieHit& lh = hits.hits[t];
768                  cout << "insert id: " << lh.insert_id() << endl;
769                  cout << (lh.antisense_align() ? "-" : "+") <<  endl;
770                  cout << lh.ref_id() << ": " << lh.left() << "-" << lh.right() << endl;
771                  cout << lh.ref_id2() << ": " << print_cigar(lh.cigar()) << endl;
772                }
773            }
774        }
775 #endif
776
835        fusions_from_alignment(bh, fusions, rt, update_stat);
836  
837        if (update_stat)
# Line 784 | Line 842
842   void update_junctions(const HitsForRead& hits,
843                        JunctionSet& junctions)
844   {
845 <        for (size_t i = 0; i < hits.hits.size(); ++i)
846 <        {
847 <                const BowtieHit& bh = hits.hits[i];
848 <                junctions_from_alignment(bh, junctions);
849 <        }
845 >  for (size_t i = 0; i < hits.hits.size(); ++i)
846 >    {
847 >      const BowtieHit& bh = hits.hits[i];
848 >      junctions_from_alignment(bh, junctions);
849 >    }
850   }
851  
852   // Extracts junctions from all the SAM hits (based on REF_SKIPs) in the hit file
# Line 865 | Line 923
923            {
924              HitsForRead best_hits;
925              best_hits.insert_id = curr_left_obs_order;
868            FragmentAlignmentGrade best_grade;
926              
927              // Process hits for left singleton, select best alignments
928 <            read_best_alignments(curr_left_hit_group, best_grade, best_hits, *gtf_junctions);
928 >            read_best_alignments(curr_left_hit_group, best_hits, *gtf_junctions);
929 >            update_coverage(best_hits, *coverage);
930              update_junctions(best_hits, *junctions);
931 +            update_insertions_and_deletions(best_hits, *insertions, *deletions);
932              update_fusions(best_hits, *rt, *fusions);
933              
934              // Get next hit group
# Line 884 | Line 943
943            {
944              HitsForRead best_hits;
945              best_hits.insert_id = curr_right_obs_order;
887            FragmentAlignmentGrade best_grade;
946              if (curr_right_obs_order >= begin_id)
947                {
948                  // Process hit for right singleton, select best alignments
949 <                read_best_alignments(curr_right_hit_group, best_grade, best_hits, *gtf_junctions);
949 >                read_best_alignments(curr_right_hit_group, best_hits, *gtf_junctions);
950 >                update_coverage(best_hits, *coverage);
951                  update_junctions(best_hits, *junctions);
952 +                update_insertions_and_deletions(best_hits, *insertions, *deletions);
953                  update_fusions(best_hits, *rt, *fusions);
954                }
955              
# Line 913 | Line 973
973                                   best_hits,
974                                   *gtf_junctions);
975  
976 <            HitsForRead single_best_hits;
977 <            single_best_hits.insert_id = curr_left_obs_order;
976 >            HitsForRead best_left_hit_group; best_left_hit_group.insert_id = curr_left_obs_order;
977 >            HitsForRead best_right_hit_group; best_right_hit_group.insert_id = curr_left_obs_order;
978  
919            // this is too much memory copy - we will make it efficient soon
979              for (size_t i = 0; i < best_hits.size(); ++i)
980                {
981 <                single_best_hits.hits.push_back(best_hits[i].first);
982 <                single_best_hits.hits.push_back(best_hits[i].second);
981 >                best_left_hit_group.hits.push_back(best_hits[i].first);
982 >                best_right_hit_group.hits.push_back(best_hits[i].second);
983                }
984  
985 <            update_junctions(single_best_hits, *junctions);
986 <            update_fusions(single_best_hits, *rt, *fusions);
985 >            update_coverage(best_left_hit_group, *coverage);
986 >            update_junctions(best_left_hit_group, *junctions);
987 >            update_insertions_and_deletions(best_left_hit_group, *insertions, *deletions);
988 >            update_fusions(best_left_hit_group, *rt, *fusions);
989 >
990 >            update_coverage(best_right_hit_group, *coverage);
991 >            update_junctions(best_right_hit_group, *junctions);
992 >            update_insertions_and_deletions(best_right_hit_group, *insertions, *deletions);
993 >            update_fusions(best_right_hit_group, *rt, *fusions);
994                      
995              l_hs.next_read_hits(curr_left_hit_group);
996              curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
# Line 943 | Line 1009
1009    string left_map_fname;
1010    string right_map_fname;
1011  
946  JunctionSet* junctions;
947  JunctionSet* gtf_junctions;
948  FusionSet* fusions;
1012    RefSequenceTable* rt;
1013  
1014 +  JunctionSet* gtf_junctions;
1015 +
1016    uint64_t begin_id;
1017    uint64_t end_id;
1018    int64_t left_map_offset;
1019    int64_t right_map_offset;
1020 +
1021 +  JunctionSet* junctions;
1022 +  InsertionSet* insertions;
1023 +  DeletionSet* deletions;
1024 +  FusionSet* fusions;
1025 +
1026 +  Coverage* coverage;
1027   };
1028  
1029   struct ReportWorker
# Line 1005 | Line 1077
1077      uint64_t curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
1078      uint64_t curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
1079  
1080 +    const bool final_report = true;
1081 +
1082      // While we still have unreported hits...
1083      Read l_read;
1084      Read r_read;
# Line 1018 | Line 1092
1092            {
1093              HitsForRead best_hits;
1094              best_hits.insert_id = curr_left_obs_order;
1021            FragmentAlignmentGrade grade;
1095              bool got_read=left_reads_file.getRead(curr_left_obs_order, l_read,
1096                                                    reads_format, false, begin_id, end_id,
1097                                                    left_um_out.file, false);
1098              //assert(got_read);
1099  
1100              if (right_reads_file.file()) {
1028           /*
1029              fprintf(left_um_out.file, "@%s #MAPPED#\n%s\n+\n%s\n", l_read.alt_name.c_str(),
1030                      l_read.seq.c_str(), l_read.qual.c_str());
1031           */
1101                got_read=right_reads_file.getRead(curr_left_obs_order, r_read,
1102                                                  reads_format, false, begin_id, end_id,
1103                                                  right_um_out.file, true);
# Line 1038 | Line 1107
1107              exclude_hits_on_filtered_junctions(*junctions, curr_left_hit_group);
1108  
1109              // Process hits for left singleton, select best alignments
1110 <            read_best_alignments(curr_left_hit_group, grade, best_hits, *gtf_junctions);
1111 <
1112 <            if (best_hits.hits.size() > max_multihits)
1044 <              {
1045 <                best_hits.hits.erase(best_hits.hits.begin() + max_multihits, best_hits.hits.end());
1046 <                grade.num_alignments = best_hits.hits.size();
1047 <              }
1110 >            read_best_alignments(curr_left_hit_group, best_hits, *gtf_junctions,
1111 >                                 *junctions, *insertions, *deletions, *fusions, *coverage,
1112 >                                 final_report);
1113  
1114              if (best_hits.hits.size() > 0)
1115                {
# Line 1054 | Line 1119
1119  
1120                  print_sam_for_single(*rt,
1121                                       best_hits,
1057                                     grade,
1122                                       (right_map_fname.empty() ? FRAG_UNPAIRED : FRAG_LEFT),
1123                                       l_read,
1124                                       bam_writer,
# Line 1072 | Line 1136
1136            {
1137              HitsForRead best_hits;
1138              best_hits.insert_id = curr_right_obs_order;
1075            FragmentAlignmentGrade grade;
1139  
1140              if (curr_right_obs_order >=  begin_id)
1141                {
# Line 1093 | Line 1156
1156                  exclude_hits_on_filtered_junctions(*junctions, curr_right_hit_group);
1157  
1158                  // Process hit for right singleton, select best alignments
1159 <                read_best_alignments(curr_right_hit_group, grade, best_hits, *gtf_junctions);
1159 >                read_best_alignments(curr_right_hit_group, best_hits, *gtf_junctions,
1160 >                                     *junctions, *insertions, *deletions, *fusions, *coverage,
1161 >                                     final_report);
1162  
1098                if (best_hits.hits.size() > max_multihits)
1099                  {
1100                    best_hits.hits.erase(best_hits.hits.begin() + max_multihits, best_hits.hits.end());
1101                    grade.num_alignments = best_hits.hits.size();
1102                  }
1103                
1163                  if (best_hits.hits.size() > 0)
1164                    {
1165                      update_junctions(best_hits, *final_junctions);
# Line 1109 | Line 1168
1168                      
1169                      print_sam_for_single(*rt,
1170                                           best_hits,
1112                                         grade,
1171                                           FRAG_RIGHT,
1172                                           r_read,
1173                                           bam_writer,
# Line 1145 | Line 1203
1203                  HitsForRead right_best_hits;
1204                  right_best_hits.insert_id = curr_right_obs_order;
1205                  
1206 <                FragmentAlignmentGrade grade;
1207 <                read_best_alignments(curr_right_hit_group, grade, right_best_hits, *gtf_junctions);
1206 >                read_best_alignments(curr_right_hit_group, right_best_hits, *gtf_junctions,
1207 >                                     *junctions, *insertions, *deletions, *fusions, *coverage,
1208 >                                     final_report);
1209  
1151                if (right_best_hits.hits.size() > max_multihits)
1152                  {
1153                    right_best_hits.hits.erase(right_best_hits.hits.begin() + max_multihits, right_best_hits.hits.end());
1154                    grade.num_alignments = right_best_hits.hits.size();
1155                  }
1156                
1210                  if (right_best_hits.hits.size() > 0)
1211                    {
1212                      update_junctions(right_best_hits, *final_junctions);
# Line 1162 | Line 1215
1215                      
1216                      print_sam_for_single(*rt,
1217                                           right_best_hits,
1165                                         grade,
1218                                           FRAG_RIGHT,
1219                                           r_read,
1220                                           bam_writer,
# Line 1177 | Line 1229
1229                  bool got_read=left_reads_file.getRead(curr_left_obs_order, l_read,
1230                                                        reads_format, false, begin_id, end_id,
1231                                                        left_um_out.file, false);
1180                //assert(got_read);
1181                /*
1182                fprintf(left_um_out.file, "@%s #MAPPED#\n%s\n+\n%s\n", l_read.alt_name.c_str(),
1183                        l_read.seq.c_str(), l_read.qual.c_str());
1184                */
1185                FragmentAlignmentGrade grade;
1186                // Process hits for left singleton, select best alignments
1187                read_best_alignments(curr_left_hit_group, grade, left_best_hits, *gtf_junctions);
1232  
1233 <                if (left_best_hits.hits.size() > max_multihits)
1234 <                  {
1235 <                    left_best_hits.hits.erase(left_best_hits.hits.begin() + max_multihits, left_best_hits.hits.end());
1236 <                    grade.num_alignments = left_best_hits.hits.size();
1193 <                  }
1233 >                // Process hits for left singleton, select best alignments
1234 >                read_best_alignments(curr_left_hit_group, left_best_hits, *gtf_junctions,
1235 >                                     *junctions, *insertions, *deletions, *fusions, *coverage,
1236 >                                     final_report);
1237                  
1238                  if (left_best_hits.hits.size() > 0)
1239                    {
# Line 1200 | Line 1243
1243                      
1244                      print_sam_for_single(*rt,
1245                                           left_best_hits,
1203                                         grade,
1246                                           FRAG_LEFT,
1247                                           l_read,
1248                                           bam_writer,
# Line 1214 | Line 1256
1256                  InsertAlignmentGrade grade;
1257                  pair_best_alignments(curr_left_hit_group,
1258                                       curr_right_hit_group,
1259 <                                     grade,
1260 <                                     best_hits,
1261 <                                     *gtf_junctions);
1259 >                                     grade, best_hits, *gtf_junctions,
1260 >                                     *junctions, *insertions, *deletions, *fusions, *coverage,
1261 >                                     final_report);
1262  
1263 <                if (best_hits.size() > max_multihits)
1264 <                  {
1265 <                    best_hits.erase(best_hits.begin() + max_multihits, best_hits.end());
1224 <                    grade.num_alignments = best_hits.size();
1225 <                  }
1226 <
1227 <                //hits for both left and right reads
1228 <                HitsForRead single_best_hits;
1229 <                single_best_hits.insert_id = curr_left_obs_order;
1263 >                HitsForRead best_left_hit_group; best_left_hit_group.insert_id = curr_left_obs_order;
1264 >                HitsForRead best_right_hit_group; best_right_hit_group.insert_id = curr_left_obs_order;
1265 >                
1266                  for (size_t i = 0; i < best_hits.size(); ++i)
1267                    {
1268 <                    single_best_hits.hits.push_back(best_hits[i].first);
1269 <                    single_best_hits.hits.push_back(best_hits[i].second);
1268 >                    best_left_hit_group.hits.push_back(best_hits[i].first);
1269 >                    best_right_hit_group.hits.push_back(best_hits[i].second);
1270                    }
1271 <                
1272 <                if (single_best_hits.hits.size() > 0)
1271 >
1272 >                if (best_hits.size() > 0)
1273                    {
1274 <                    update_junctions(single_best_hits, *final_junctions);
1275 <                    update_insertions_and_deletions(single_best_hits, *final_insertions, *final_deletions);
1274 >                    update_junctions(best_left_hit_group, *final_junctions);
1275 >                    update_insertions_and_deletions(best_left_hit_group, *final_insertions, *final_deletions);
1276 >                    update_fusions(best_left_hit_group, *rt, *final_fusions, *fusions);
1277 >
1278 >                    update_junctions(best_right_hit_group, *final_junctions);
1279 >                    update_insertions_and_deletions(best_right_hit_group, *final_insertions, *final_deletions);
1280 >                    update_fusions(best_right_hit_group, *rt, *final_fusions, *fusions);
1281  
1282                      pair_support(best_hits, *final_fusions, *fusions);
1242                    update_fusions(single_best_hits, *rt, *final_fusions, *fusions);
1283  
1284                      print_sam_for_pair(*rt,
1285                                         best_hits,
# Line 1262 | Line 1302
1302            }
1303        } //while we still have unreported hits..
1304      //print the remaining unmapped reads at the end of each reads' stream
1305 +
1306      left_reads_file.getRead(VMAXINT32, l_read,
1307                              reads_format, false, begin_id, end_id,
1308                              left_um_out.file, false);
# Line 1270 | Line 1311
1311                                 reads_format, false, begin_id, end_id,
1312                                 right_um_out.file, false);
1313  
1314 +
1315 +    // pclose (pipe close), which waits for a process to end, seems to conflict with boost::thread::join somehow,
1316 +    // resulting in deadlock like behavior.
1317 + #if 0
1318      left_um_out.close();
1319      right_um_out.close();
1320 + #endif
1321  
1322      for (size_t i = 0; i < hit_factories.size(); ++i)
1323        delete hit_factories[i];
# Line 1291 | Line 1337
1337    string right_um_fname;
1338  
1339    JunctionSet* gtf_junctions;
1340 +  
1341    JunctionSet* junctions;
1342 +  InsertionSet* insertions;
1343 +  DeletionSet* deletions;
1344    FusionSet* fusions;
1345 +  Coverage* coverage;
1346 +
1347    JunctionSet* final_junctions;
1348    InsertionSet* final_insertions;
1349    DeletionSet* final_deletions;
# Line 1332 | Line 1383
1383    JunctionSet gtf_junctions;
1384    if (!gtf_juncs.empty())
1385      {
1386 <      char splice_buf[2048];
1386 >      char splice_buf[4096];
1387        FILE* splice_coords = fopen(gtf_juncs.c_str(), "r");
1388        if (splice_coords)
1389          {
# Line 1359 | Line 1410
1410                uint32_t right_coord = atoi(scan_right_coord);
1411                bool antisense = *orientation == '-';
1412  
1413 <              // add 1 to left_coord to meet BED format
1363 <              gtf_junctions.insert(make_pair<Junction, JunctionStats>(Junction(ref_id, left_coord + 1, right_coord, antisense), JunctionStats()));
1413 >              gtf_junctions.insert(make_pair<Junction, JunctionStats>(Junction(ref_id, left_coord, right_coord, antisense), JunctionStats()));
1414              }
1415          }
1416        fprintf(stderr, "Loaded %d GFF junctions from %s.\n", (int)(gtf_junctions.size()), gtf_juncs.c_str());
# Line 1384 | Line 1434
1434      }
1435  
1436    JunctionSet vjunctions[num_threads];
1437 +  InsertionSet vinsertions[num_threads];
1438 +  DeletionSet vdeletions[num_threads];
1439    FusionSet vfusions[num_threads];
1440 +  Coverage vcoverages[num_threads];
1441 +  
1442    vector<boost::thread*> threads;
1443    for (int i = 0; i < num_threads; ++i)
1444      {
# Line 1392 | Line 1446
1446  
1447        worker.left_map_fname = left_map_fname;
1448        worker.right_map_fname = right_map_fname;
1449 +      worker.rt = &rt;
1450 +      worker.gtf_junctions = &gtf_junctions;
1451 +      
1452        worker.junctions = &vjunctions[i];
1453 +      worker.insertions = &vinsertions[i];
1454 +      worker.deletions = &vdeletions[i];
1455        worker.fusions = &vfusions[i];
1456 <      worker.gtf_junctions = &gtf_junctions;
1398 <      worker.rt = &rt;
1456 >      worker.coverage = &vcoverages[i];
1457  
1458        worker.right_map_offset = 0;
1459        
# Line 1430 | Line 1488
1488      }
1489    threads.clear();
1490  
1491 <  JunctionSet junctions = vjunctions[0];
1492 <  FusionSet fusions = vfusions[0];
1491 >  JunctionSet& junctions = vjunctions[0];
1492 >  InsertionSet& insertions = vinsertions[0];
1493 >  DeletionSet& deletions = vdeletions[0];
1494 >  FusionSet& fusions = vfusions[0];
1495 >  Coverage& coverage = vcoverages[0];
1496    for (size_t i = 1; i < num_threads; ++i)
1497      {
1498        merge_with(junctions, vjunctions[i]);
1499        vjunctions[i].clear();
1500  
1501 +      merge_with(insertions, vinsertions[i]);
1502 +      vinsertions[i].clear();
1503 +      
1504 +      merge_with(deletions, vdeletions[i]);
1505 +      vdeletions[i].clear();
1506 +
1507        merge_with(fusions, vfusions[i]);
1508        vfusions[i].clear();
1509 +
1510 +      coverage.merge_with(vcoverages[i]);
1511 +      vcoverages[i].clear();
1512      }
1513  
1514 +  coverage.calculate_coverage();
1515 +
1516    size_t num_unfiltered_juncs = junctions.size();
1517    fprintf(stderr, "Loaded %lu junctions\n", (long unsigned int) num_unfiltered_juncs);
1518  
1519    // Read hits, extract junctions, and toss the ones that arent strongly enough supported.
1448
1520    filter_junctions(junctions, gtf_junctions);
1521    
1522    //size_t num_juncs_after_filter = junctions.size();
# Line 1453 | Line 1524
1524    //     num_unfiltered_juncs - num_juncs_after_filter);
1525  
1526    /*
1527 <        size_t small_overhangs = 0;
1528 <        for (JunctionSet::iterator i = junctions.begin(); i != junctions.end(); ++i)
1527 >    size_t small_overhangs = 0;
1528 >    for (JunctionSet::iterator i = junctions.begin(); i != junctions.end(); ++i)
1529      {
1530 <        if (i->second.accepted &&
1531 <            (i->second.left_extent < min_anchor_len || i->second.right_extent < min_anchor_len))
1532 <            {
1533 <            small_overhangs++;
1534 <            }
1530 >    if (i->second.accepted &&
1531 >    (i->second.left_extent < min_anchor_len || i->second.right_extent < min_anchor_len))
1532 >    {
1533 >    small_overhangs++;
1534 >    }
1535      }
1536      
1537 <        if (small_overhangs >0)
1538 <        fprintf(stderr, "Warning: %lu small overhang junctions!\n", (long unsigned int)small_overhangs);
1537 >    if (small_overhangs >0)
1538 >    fprintf(stderr, "Warning: %lu small overhang junctions!\n", (long unsigned int)small_overhangs);
1539    */
1540  
1541    JunctionSet vfinal_junctions[num_threads];
# Line 1496 | Line 1567
1567  
1568        worker.gtf_junctions = &gtf_junctions;
1569        worker.junctions = &junctions;
1570 +      worker.insertions = &insertions;
1571 +      worker.deletions = &deletions;
1572        worker.fusions = &fusions;
1573 +      worker.coverage = &coverage;
1574 +      
1575        worker.final_junctions = &vfinal_junctions[i];
1576        worker.final_insertions = &vfinal_insertions[i];
1577        worker.final_deletions = &vfinal_deletions[i];

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines