ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/tophat_reports.cpp
Revision: 256
Committed: Mon Jun 11 19:35:44 2012 UTC (12 years, 2 months ago) by gpertea
File size: 70966 byte(s)
Log Message:
sync with Daehwan's fixes

Line User Rev File contents
1 gpertea 29 /*
2     * tophat_reports.cpp
3     * TopHat
4     *
5     * Created by Cole Trapnell on 11/20/08.
6     * Copyright 2008 Cole Trapnell. All rights reserved.
7     *
8     */
9    
10     #ifdef HAVE_CONFIG_H
11     #include <config.h>
12     #else
13     #define PACKAGE_VERSION "INTERNAL"
14     #define SVN_REVISION "XXX"
15     #endif
16    
17     #include <cassert>
18     #include <cstdio>
19     #include <cstring>
20     #include <vector>
21     #include <string>
22     #include <map>
23     #include <algorithm>
24     #include <set>
25     #include <stdexcept>
26     #include <iostream>
27     #include <fstream>
28     #include <seqan/sequence.h>
29     #include <seqan/find.h>
30     #include <seqan/file.h>
31     #include <getopt.h>
32    
33 gpertea 154 #include <boost/thread.hpp>
34 gpertea 236 #include <boost/random/mersenne_twister.hpp>
35 gpertea 154
36 gpertea 29 #include "common.h"
37 gpertea 154 #include "utils.h"
38 gpertea 29 #include "bwt_map.h"
39     #include "junctions.h"
40     #include "insertions.h"
41     #include "deletions.h"
42 gpertea 154 #include "fusions.h"
43 gpertea 29 #include "align_status.h"
44     #include "fragments.h"
45     #include "wiggles.h"
46     #include "tokenize.h"
47     #include "reads.h"
48 gpertea 200 #include "coverage.h"
49 gpertea 29
50 gpertea 154
51 gpertea 29 #include "inserts.h"
52    
53     using namespace std;
54     using namespace seqan;
55     using std::set;
56    
57 gpertea 200 static const JunctionSet empty_junctions;
58     static const InsertionSet empty_insertions;
59     static const DeletionSet empty_deletions;
60     static const FusionSet empty_fusions;
61     static const Coverage empty_coverage;
62    
63 gpertea 154 // daehwan - this is redundancy, which should be removed.
64     void get_seqs(istream& ref_stream,
65     RefSequenceTable& rt,
66     bool keep_seqs = true)
67 gpertea 166 {
68 gpertea 154 while(ref_stream.good() && !ref_stream.eof())
69     {
70     RefSequenceTable::Sequence* ref_str = new RefSequenceTable::Sequence();
71     string name;
72     readMeta(ref_stream, name, Fasta());
73     string::size_type space_pos = name.find_first_of(" \t\r");
74     if (space_pos != string::npos)
75     {
76     name.resize(space_pos);
77     }
78 gpertea 245 fprintf(stderr, "\tLoading %s...", name.c_str());
79 gpertea 154 seqan::read(ref_stream, *ref_str, Fasta());
80 gpertea 245 fprintf(stderr, "done\n");
81 gpertea 166
82 gpertea 154 rt.get_id(name, keep_seqs ? ref_str : NULL, 0);
83     }
84     }
85    
86 gpertea 166 struct cmp_read_alignment
87     {
88     bool operator() (const BowtieHit& l, const BowtieHit& r) const
89     {
90 gpertea 200 return l.alignment_score() > r.alignment_score();
91 gpertea 166 }
92     };
93    
94 gpertea 135 void read_best_alignments(const HitsForRead& hits_for_read,
95 gpertea 166 HitsForRead& best_hits,
96 gpertea 200 const JunctionSet& gtf_junctions,
97     const JunctionSet& junctions = empty_junctions,
98     const InsertionSet& insertions = empty_insertions,
99     const DeletionSet& deletions = empty_deletions,
100     const FusionSet& fusions = empty_fusions,
101     const Coverage& coverage = empty_coverage,
102     bool final_report = false)
103 gpertea 29 {
104     const vector<BowtieHit>& hits = hits_for_read.hits;
105 gpertea 222
106     if (hits.size() >= max_multihits * 5)
107     return;
108    
109 gpertea 29 for (size_t i = 0; i < hits.size(); ++i)
110     {
111 gpertea 166 if (hits[i].edit_dist() > max_read_mismatches)
112     continue;
113 gpertea 163
114 gpertea 200 BowtieHit hit = hits[i];
115     AlignStatus align_status(hit, gtf_junctions,
116     junctions, insertions, deletions, fusions, coverage);
117     hit.alignment_score(align_status._alignment_score);
118    
119     if (report_secondary_alignments || !final_report)
120 gpertea 166 {
121 gpertea 200 best_hits.hits.push_back(hit);
122 gpertea 166 }
123     else
124     {
125     // Is the new status better than the current best one?
126 gpertea 200 if (best_hits.hits.size() == 0 || cmp_read_alignment()(hit, best_hits.hits[0]))
127 gpertea 166 {
128     best_hits.hits.clear();
129 gpertea 200 best_hits.hits.push_back(hit);
130 gpertea 166 }
131 gpertea 200 else if (!cmp_read_alignment()(best_hits.hits[0], hit)) // is it just as good?
132 gpertea 166 {
133 gpertea 200 best_hits.hits.push_back(hit);
134 gpertea 166 }
135     }
136     }
137 gpertea 163
138 gpertea 200 if ((report_secondary_alignments || !final_report) && best_hits.hits.size() > 0)
139 gpertea 166 {
140 gpertea 200 sort(best_hits.hits.begin(), best_hits.hits.end(), cmp_read_alignment());
141 gpertea 29 }
142 gpertea 200
143     if (final_report)
144     {
145     if (best_hits.hits.size() > max_multihits)
146     best_hits.hits.erase(best_hits.hits.begin() + max_multihits, best_hits.hits.end());
147     }
148 gpertea 29 }
149    
150 gpertea 245 bool set_insert_alignment_grade(const BowtieHit& lh, const BowtieHit& rh, const JunctionSet& junctions, InsertAlignmentGrade& grade)
151 gpertea 29 {
152 gpertea 166 bool fusion = false;
153 gpertea 200 bool left_fusion = lh.fusion_opcode() != FUSION_NOTHING;
154     bool right_fusion = rh.fusion_opcode() != FUSION_NOTHING;
155     if (left_fusion && right_fusion)
156     return false;
157    
158     fusion = left_fusion || right_fusion;
159     if (!fusion && lh.ref_id() != rh.ref_id())
160     fusion = true;
161    
162     if (!fusion && lh.ref_id() == rh.ref_id())
163 gpertea 166 {
164 gpertea 200 if (lh.antisense_align() == rh.antisense_align())
165 gpertea 166 fusion = true;
166 gpertea 200 else
167 gpertea 166 {
168 gpertea 206 int inner_dist = 0;
169 gpertea 200 if (lh.antisense_align())
170 gpertea 206 // used rh.left() instead of rh.right() for the cases,
171     // where reads overlap with each other or reads span introns
172     inner_dist = lh.left() - rh.left();
173 gpertea 200 else
174 gpertea 206 inner_dist = rh.left() - lh.left();
175 gpertea 200
176 gpertea 206 if (inner_dist < 0 || inner_dist > (int)fusion_min_dist)
177 gpertea 166 fusion = true;
178     }
179     }
180 gpertea 163
181 gpertea 256 // a read contains its partner, in which case the paired mapping will be ignored.
182     if (!fusion)
183     {
184     if (lh.left() <= rh.left() && lh.right() >= rh.right() && lh.right() - lh.left() > rh.right() - rh.left())
185     return false;
186     else if (rh.left() <= lh.left() && rh.right() >= lh.right() && rh.right() - rh.left() > lh.right() - lh.left())
187     return false;
188     }
189    
190 gpertea 245 grade = InsertAlignmentGrade(lh, rh, junctions, fusion);
191 gpertea 200
192     return true;
193 gpertea 166 }
194    
195     struct cmp_pair_alignment
196     {
197 gpertea 245 cmp_pair_alignment(const JunctionSet& junctions) :
198     _junctions(&junctions) {}
199 gpertea 166
200     bool operator() (const pair<BowtieHit, BowtieHit>& l, const pair<BowtieHit, BowtieHit>& r) const
201     {
202 gpertea 245 InsertAlignmentGrade gl; set_insert_alignment_grade(l.first, l.second, *_junctions, gl);
203     InsertAlignmentGrade gr; set_insert_alignment_grade(r.first, r.second, *_junctions, gr);
204 gpertea 166
205     bool better = gr < gl;
206     bool worse = gl < gr;
207    
208     if (better && !worse)
209     return true;
210     else
211     return false;
212     }
213    
214 gpertea 245 const JunctionSet* _junctions;
215 gpertea 166 };
216    
217     void pair_best_alignments(const HitsForRead& left_hits,
218     const HitsForRead& right_hits,
219     InsertAlignmentGrade& best_grade,
220     vector<pair<BowtieHit, BowtieHit> >& best_hits,
221 gpertea 200 const JunctionSet& gtf_junctions,
222     const JunctionSet& junctions = empty_junctions,
223     const InsertionSet& insertions = empty_insertions,
224     const DeletionSet& deletions = empty_deletions,
225     const FusionSet& fusions = empty_fusions,
226     const Coverage& coverage = empty_coverage,
227     bool final_report = false)
228 gpertea 166 {
229 gpertea 154 const vector<BowtieHit>& left = left_hits.hits;
230     const vector<BowtieHit>& right = right_hits.hits;
231 gpertea 222
232     if (left.size() >= max_multihits * 5 || right.size() >= max_multihits * 5)
233     return;
234    
235 gpertea 154 for (size_t i = 0; i < left.size(); ++i)
236     {
237 gpertea 200 if (left[i].edit_dist() > max_read_mismatches) continue;
238 gpertea 222
239 gpertea 200 BowtieHit lh = left[i];
240     AlignStatus align_status(lh, gtf_junctions,
241     junctions, insertions, deletions, fusions, coverage);
242     lh.alignment_score(align_status._alignment_score);
243    
244 gpertea 154 for (size_t j = 0; j < right.size(); ++j)
245 gpertea 29 {
246 gpertea 200 if (right[j].edit_dist() > max_read_mismatches) continue;
247    
248     BowtieHit rh = right[j];
249     AlignStatus align_status(rh, gtf_junctions,
250     junctions, insertions, deletions, fusions, coverage);
251     rh.alignment_score(align_status._alignment_score);
252     InsertAlignmentGrade g;
253 gpertea 245 bool allowed;
254     allowed = set_insert_alignment_grade(lh, rh, final_report ? junctions : gtf_junctions, g);
255 gpertea 135
256 gpertea 236 // daehwan - for debugging purposes
257 gpertea 256 #if 0
258 gpertea 236 if (lh.insert_id() == 325708 && !g.is_fusion() && false)
259     {
260     fprintf(stderr, "lh %d:%d %s score: %d (from %d) NM: %d\n",
261     lh.ref_id(), lh.left(), print_cigar(lh.cigar()).c_str(),
262     lh.alignment_score(), left[i].alignment_score(), lh.edit_dist());
263     fprintf(stderr, "rh %d:%d %s score: %d (from %d) NM: %d\n",
264     rh.ref_id(), rh.left(), print_cigar(rh.cigar()).c_str(),
265     rh.alignment_score(), right[j].alignment_score(), rh.edit_dist());
266     fprintf(stderr, "combined score: %d is_fusion(%d)\n", g.align_score(), g.is_fusion());
267     }
268 gpertea 256 #endif
269 gpertea 236
270 gpertea 200 if (!allowed) continue;
271     if (!fusion_search && !report_discordant_pair_alignments && g.is_fusion()) continue;
272 gpertea 166
273 gpertea 200 if (report_secondary_alignments || !final_report)
274 gpertea 154 {
275 gpertea 166 best_hits.push_back(make_pair(lh, rh));
276 gpertea 154 }
277 gpertea 166 else
278 gpertea 154 {
279 gpertea 166 // Is the new status better than the current best one?
280     if (best_grade < g)
281 gpertea 159 {
282 gpertea 166 best_hits.clear();
283     best_grade = g;
284     best_hits.push_back(make_pair(lh, rh));
285 gpertea 159 }
286 gpertea 166 else if (!(g < best_grade))
287     {
288     best_hits.push_back(make_pair(lh, rh));
289     }
290 gpertea 154 }
291 gpertea 29 }
292 gpertea 154 }
293 gpertea 222
294 gpertea 200 if ((report_secondary_alignments || !final_report) && best_hits.size() > 0)
295 gpertea 166 {
296 gpertea 245 cmp_pair_alignment cmp(final_report ? junctions : gtf_junctions);
297 gpertea 166 sort(best_hits.begin(), best_hits.end(), cmp);
298 gpertea 245 set_insert_alignment_grade(best_hits[0].first, best_hits[0].second, final_report ? junctions : gtf_junctions, best_grade);
299 gpertea 166 }
300 gpertea 200
301     if (final_report)
302     {
303     if (best_hits.size() > max_multihits)
304     best_hits.erase(best_hits.begin() + max_multihits, best_hits.end());
305     }
306    
307     best_grade.num_alignments = best_hits.size();
308 gpertea 29 }
309    
310     enum FragmentType {FRAG_UNPAIRED, FRAG_LEFT, FRAG_RIGHT};
311    
312     void add_auxData(vector<string>& auxdata, vector<string>& sam_toks,
313     const RefSequenceTable& rt,const BowtieHit& bh, FragmentType insert_side,
314     int num_hits, const BowtieHit* next_hit, int hitIndex) {
315 gpertea 159 bool XS_found = false;
316     if (sam_toks.size()>11) {
317 gpertea 166
318 gpertea 159 for (size_t i=11;i<sam_toks.size();++i) {
319     if (sam_toks[i].find("NH:i:")==string::npos &&
320     sam_toks[i].find("XS:i:")==string::npos)
321     auxdata.push_back(sam_toks[i]);
322    
323     if (sam_toks[i].find("XS:A:")!=string::npos)
324     XS_found = true;
325     }
326     }
327     string aux("NH:i:");
328     str_appendInt(aux, num_hits);
329     auxdata.push_back(aux);
330     if (next_hit) {
331     const char* nh_ref_name = "=";
332     nh_ref_name = rt.get_name(next_hit->ref_id());
333     assert (nh_ref_name != NULL);
334     bool same_contig=(next_hit->ref_id()==bh.ref_id());
335     aux="CC:Z:";
336     aux+= (same_contig ? "=" : nh_ref_name);
337 gpertea 29 auxdata.push_back(aux);
338 gpertea 159 aux="CP:i:";
339     int nh_gpos=next_hit->left() + 1;
340     str_appendInt(aux, nh_gpos);
341     auxdata.push_back(aux);
342     } //has next_hit
343 gpertea 29 // FIXME: this code is still a bit brittle, because it contains no
344 gpertea 135 // consistency check that the mates are on opposite strands - a current protocol
345 gpertea 29 // requirement, and that the strand indicated by the alignment is consistent
346     // with the orientation of the splices (though that should be handled upstream).
347 gpertea 159 if (!XS_found) {
348 gpertea 29 const string xs_f("XS:A:+");
349     const string xs_r("XS:A:-");
350 gpertea 159 if (library_type == FR_FIRSTSTRAND) {
351     if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED) {
352     if (bh.antisense_align())
353     auxdata.push_back(xs_f);
354     else
355     auxdata.push_back(xs_r);
356     }
357     else {
358     if (bh.antisense_align())
359     auxdata.push_back(xs_r);
360     else
361     auxdata.push_back(xs_f);
362     }
363     }
364     else if (library_type == FR_SECONDSTRAND) {
365     if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED){
366     if (bh.antisense_align())
367     auxdata.push_back(xs_r);
368     else
369     auxdata.push_back(xs_f);
370     }
371     else
372     {
373     if (bh.antisense_align())
374     auxdata.push_back(xs_f);
375     else
376     auxdata.push_back(xs_r);
377     }
378     }
379     }
380     if (hitIndex >= 0)
381 gpertea 29 {
382 gpertea 159 string aux("HI:i:");
383     str_appendInt(aux, hitIndex);
384     auxdata.push_back(aux);
385 gpertea 29 }
386     }
387    
388 gpertea 154 bool rewrite_sam_record(GBamWriter& bam_writer,
389     const RefSequenceTable& rt,
390 gpertea 29 const BowtieHit& bh,
391     const char* bwt_buf,
392 gpertea 135 const char* read_alt_name,
393 gpertea 29 FragmentType insert_side,
394     int num_hits,
395     const BowtieHit* next_hit,
396     bool primary,
397     int hitIndex)
398     {
399 gpertea 154 // Rewrite this hit, filling in the alt name, mate mapping
400     // and setting the pair flag
401     vector<string> sam_toks;
402     tokenize(bwt_buf, "\t", sam_toks);
403    
404     string ref_name = sam_toks[2], ref_name2 = "";
405 gpertea 200 char cigar1[1024] = {0}, cigar2[1024] = {0};
406 gpertea 154 string left_seq, right_seq, left_qual, right_qual;
407 gpertea 245 int left1 = -1, left2 = -1;
408 gpertea 154 bool fusion_alignment = false;
409     size_t XF_index = 0;
410     for (size_t i = 11; i < sam_toks.size(); ++i)
411     {
412 gpertea 200 string& tok = sam_toks[i];
413 gpertea 154 if (strncmp(tok.c_str(), "XF", 2) == 0)
414     {
415     fusion_alignment = true;
416     XF_index = i;
417 gpertea 166
418 gpertea 154 vector<string> tuple_fields;
419     tokenize(tok.c_str(), " ", tuple_fields);
420     vector<string> contigs;
421     tokenize(tuple_fields[1].c_str(), "-", contigs);
422     if (contigs.size() >= 2)
423     {
424     ref_name = contigs[0];
425     ref_name2 = contigs[1];
426 gpertea 166 }
427    
428 gpertea 154 extract_partial_hits(bh, tuple_fields[4].c_str(), tuple_fields[5].c_str(),
429     cigar1, cigar2, left_seq, right_seq,
430     left_qual, right_qual, left1, left2);
431 gpertea 166
432 gpertea 154 break;
433     }
434 gpertea 200
435     else if (strncmp(tok.c_str(), "AS", 2) == 0)
436     {
437     char AS_score[128] = {0};
438 gpertea 245 sprintf(AS_score, "AS:i:%d", min(0, bh.alignment_score()));
439 gpertea 200 tok = AS_score;
440     }
441 gpertea 154 }
442 gpertea 166
443 gpertea 135 string qname(read_alt_name);
444     size_t slash_pos=qname.rfind('/');
445     if (slash_pos!=string::npos)
446 gpertea 154 qname.resize(slash_pos);
447     //read_alt_name as QNAME
448     int flag=atoi(sam_toks[1].c_str()); //FLAG
449     if (insert_side != FRAG_UNPAIRED) {
450     //flag = atoi(sam_toks[1].c_str());
451     // mark this as a singleton mate
452     flag |= 0x0001;
453     if (insert_side == FRAG_LEFT)
454     flag |= 0x0040;
455     else if (insert_side == FRAG_RIGHT)
456     flag |= 0x0080;
457     flag |= 0x0008;
458     //char flag_buf[64];
459     //sprintf(flag_buf, "%d", flag);
460     //sam_toks[t] = flag_buf;
461     }
462     if (!primary)
463     flag |= 0x100;
464 gpertea 166
465 gpertea 154 int gpos=isdigit(sam_toks[3][0]) ? atoi(sam_toks[3].c_str()) : 0;
466 gpertea 236 int mapQ = 50;
467 gpertea 200 if (num_hits > 1) {
468     double err_prob = 1 - (1.0 / num_hits);
469 gpertea 154 mapQ = (int)(-10.0 * log(err_prob) / log(10.0));
470     }
471     int tlen =atoi(sam_toks[8].c_str()); //TLEN
472     int mate_pos=atoi(sam_toks[7].c_str());
473    
474 gpertea 245 string rg_aux = "";
475     if (!sam_readgroup_id.empty())
476     rg_aux = string("RG:Z:") + sam_readgroup_id;
477    
478 gpertea 154 GBamRecord* bamrec=NULL;
479     if (fusion_alignment) {
480     vector<string> auxdata;
481     add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex);
482 gpertea 245 if (rg_aux != "")
483     auxdata.push_back(rg_aux);
484 gpertea 154 bamrec=bam_writer.new_record(qname.c_str(), flag, ref_name.c_str(), left1 + 1, mapQ,
485     cigar1, sam_toks[6].c_str(), mate_pos,
486     tlen, left_seq.c_str(), left_qual.c_str(), &auxdata);
487     bam_writer.write(bamrec);
488     delete bamrec;
489    
490     auxdata.clear();
491     sam_toks[XF_index][5] = '2';
492     add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex);
493 gpertea 245 if (rg_aux != "")
494     auxdata.push_back(rg_aux);
495 gpertea 154 bamrec=bam_writer.new_record(qname.c_str(), flag, ref_name2.c_str(), left2 + 1, mapQ,
496     cigar2, sam_toks[6].c_str(), mate_pos,
497     tlen, right_seq.c_str(), right_qual.c_str(), &auxdata);
498     bam_writer.write(bamrec);
499     delete bamrec;
500     } else {
501     vector<string> auxdata;
502     add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex);
503 gpertea 245 if (rg_aux != "")
504     auxdata.push_back(rg_aux);
505 gpertea 154 bamrec=bam_writer.new_record(qname.c_str(), flag, sam_toks[2].c_str(), gpos, mapQ,
506     sam_toks[5].c_str(), sam_toks[6].c_str(), mate_pos,
507     tlen, sam_toks[9].c_str(), sam_toks[10].c_str(), &auxdata);
508     bam_writer.write(bamrec);
509     delete bamrec;
510     }
511 gpertea 166
512 gpertea 154 return true;
513 gpertea 29 }
514    
515 gpertea 154 bool rewrite_sam_record(GBamWriter& bam_writer,
516     const RefSequenceTable& rt,
517 gpertea 29 const BowtieHit& bh,
518     const char* bwt_buf,
519 gpertea 135 const char* read_alt_name,
520 gpertea 29 const InsertAlignmentGrade& grade,
521     FragmentType insert_side,
522     const BowtieHit* partner,
523     int num_hits,
524     const BowtieHit* next_hit,
525     bool primary,
526     int hitIndex)
527     {
528 gpertea 154 // Rewrite this hit, filling in the alt name, mate mapping
529     // and setting the pair flag
530     vector<string> sam_toks;
531     tokenize(bwt_buf, "\t", sam_toks);
532 gpertea 135 string qname(read_alt_name);
533     size_t slash_pos=qname.rfind('/');
534     if (slash_pos!=string::npos)
535 gpertea 154 qname.resize(slash_pos);
536     //read_alt_name as QNAME
537     int flag = atoi(sam_toks[1].c_str());
538     // 0x0010 (strand of query) is assumed to be set correctly
539     // to begin with
540     flag |= 0x0001; //it must be paired
541     if (insert_side == FRAG_LEFT)
542     flag |= 0x0040;
543     else if (insert_side == FRAG_RIGHT)
544     flag |= 0x0080;
545     if (!primary)
546     flag |= 0x100;
547    
548     string ref_name = sam_toks[2], ref_name2 = "";
549 gpertea 200 char cigar1[1024] = {0}, cigar2[1024] = {0};
550 gpertea 154 string left_seq, right_seq, left_qual, right_qual;
551 gpertea 245 int left1 = -1, left2 = -1;
552 gpertea 154 bool fusion_alignment = false;
553     size_t XF_tok_idx = 11;
554     for (; XF_tok_idx < sam_toks.size(); ++XF_tok_idx)
555     {
556 gpertea 200 string& tok = sam_toks[XF_tok_idx];
557 gpertea 154 if (strncmp(tok.c_str(), "XF", 2) == 0)
558     {
559     fusion_alignment = true;
560 gpertea 166
561 gpertea 154 vector<string> tuple_fields;
562     tokenize(tok.c_str(), " ", tuple_fields);
563     vector<string> contigs;
564     tokenize(tuple_fields[1].c_str(), "-", contigs);
565     if (contigs.size() >= 2)
566     {
567     ref_name = contigs[0];
568     ref_name2 = contigs[1];
569 gpertea 166 }
570    
571 gpertea 154 extract_partial_hits(bh, tuple_fields[4].c_str(), tuple_fields[5].c_str(),
572     cigar1, cigar2, left_seq, right_seq,
573     left_qual, right_qual, left1, left2);
574 gpertea 166
575 gpertea 154 break;
576     }
577 gpertea 200
578     else if (strncmp(tok.c_str(), "AS", 2) == 0)
579     {
580     char AS_score[128] = {0};
581 gpertea 245 sprintf(AS_score, "AS:i:%d", min(0, bh.alignment_score()));
582 gpertea 200 tok = AS_score;
583     }
584 gpertea 154 }
585 gpertea 166
586 gpertea 154 int gpos=isdigit(sam_toks[3][0]) ? atoi(sam_toks[3].c_str()) : 0;
587 gpertea 236 int mapQ = 50;
588 gpertea 154 if (grade.num_alignments > 1) {
589     double err_prob = 1 - (1.0 / grade.num_alignments);
590     mapQ = (int)(-10.0 * log(err_prob) / log(10.0));
591     }
592     int tlen=0; //TLEN
593     int mate_pos=atoi(sam_toks[7].c_str());
594     if (partner) {
595     if (partner->ref_id()==bh.ref_id()) {
596     sam_toks[6] = "="; //same chromosome
597     //TLEN:
598     tlen = bh.left() < partner->left() ? partner->right() - bh.left() :
599     partner->left() - bh.right();
600     }
601 gpertea 166
602 gpertea 154 else { //partner on different chromosome/contig
603     sam_toks[6] = rt.get_name(partner->ref_id());
604 gpertea 29 }
605 gpertea 154 mate_pos = partner->left() + 1;
606     if (grade.happy())
607     flag |= 0x0002;
608     if (partner->antisense_align())
609     flag |= 0x0020;
610 gpertea 29
611 gpertea 154 if (partner->fusion_opcode() != FUSION_NOTHING)
612     {
613     char partner_pos[1024];
614     sprintf(partner_pos, "XP:Z:%s-%s %d", rt.get_name(partner->ref_id()), rt.get_name(partner->ref_id2()), partner->left() + 1);
615     sam_toks.push_back(partner_pos);
616     }
617     }
618     else {
619     sam_toks[6] = "*";
620     mate_pos = 0;
621     flag |= 0x0008;
622     }
623    
624 gpertea 245 string rg_aux = "";
625     if (!sam_readgroup_id.empty())
626     rg_aux = string("RG:Z:") + sam_readgroup_id;
627    
628 gpertea 154 GBamRecord* bamrec=NULL;
629     if (fusion_alignment) {
630     vector<string> auxdata;
631     add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex);
632 gpertea 245 if (rg_aux != "")
633     auxdata.push_back(rg_aux);
634 gpertea 154 bamrec=bam_writer.new_record(qname.c_str(), flag, ref_name.c_str(), left1 + 1, mapQ,
635     cigar1, sam_toks[6].c_str(), mate_pos,
636     tlen, left_seq.c_str(), left_qual.c_str(), &auxdata);
637     bam_writer.write(bamrec);
638     delete bamrec;
639    
640     auxdata.clear();
641     sam_toks[XF_tok_idx][5] = '2';
642     add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex);
643 gpertea 245 if (rg_aux != "")
644     auxdata.push_back(rg_aux);
645 gpertea 154 bamrec=bam_writer.new_record(qname.c_str(), flag, ref_name2.c_str(), left2 + 1, mapQ,
646     cigar2, sam_toks[6].c_str(), mate_pos,
647     tlen, right_seq.c_str(), right_qual.c_str(), &auxdata);
648     bam_writer.write(bamrec);
649     delete bamrec;
650     } else {
651     vector<string> auxdata;
652     add_auxData(auxdata, sam_toks, rt, bh, insert_side, num_hits, next_hit, hitIndex);
653 gpertea 245 if (rg_aux != "")
654     auxdata.push_back(rg_aux);
655 gpertea 154 bamrec=bam_writer.new_record(qname.c_str(), flag, sam_toks[2].c_str(), gpos, mapQ,
656     sam_toks[5].c_str(), sam_toks[6].c_str(), mate_pos,
657     tlen, sam_toks[9].c_str(), sam_toks[10].c_str(), &auxdata);
658     bam_writer.write(bamrec);
659     delete bamrec;
660     }
661    
662     return true;
663 gpertea 29 }
664    
665     struct lex_hit_sort
666     {
667     lex_hit_sort(const RefSequenceTable& rt, const HitsForRead& hits)
668     : _rt(rt), _hits(hits)
669     {}
670 gpertea 166
671 gpertea 29 bool operator()(const uint32_t& l, const uint32_t& r) const
672     {
673     const BowtieHit& lhs = _hits.hits[l];
674     const BowtieHit& rhs = _hits.hits[r];
675 gpertea 154
676 gpertea 29 uint32_t l_id = lhs.ref_id();
677     uint32_t r_id = rhs.ref_id();
678     if (l_id != r_id)
679 gpertea 154 return l_id < r_id;
680    
681 gpertea 29 return lhs.left() < rhs.left();
682     }
683 gpertea 166
684 gpertea 29 const RefSequenceTable& _rt;
685     const HitsForRead& _hits;
686     };
687    
688 gpertea 135 void print_sam_for_single(const RefSequenceTable& rt,
689 gpertea 154 const HitsForRead& hits,
690 gpertea 159 FragmentType frag_type,
691 gpertea 237 const string& alt_name,
692 gpertea 159 GBamWriter& bam_writer,
693 gpertea 237 boost::random::mt19937& rng)
694 gpertea 29 {
695 gpertea 246 //assert(!read.alt_name.empty());
696 gpertea 166 if (hits.hits.empty())
697     return;
698    
699 gpertea 29 lex_hit_sort s(rt, hits);
700     vector<uint32_t> index_vector;
701 gpertea 154
702     size_t i;
703     for (i = 0; i < hits.hits.size(); ++i)
704 gpertea 29 index_vector.push_back(i);
705 gpertea 166
706 gpertea 29 sort(index_vector.begin(), index_vector.end(), s);
707 gpertea 154
708 gpertea 166 size_t primaryHit = 0;
709     if (!report_secondary_alignments)
710 gpertea 236 primaryHit = rng() % hits.hits.size();
711 gpertea 166
712 gpertea 29 bool multipleHits = (hits.hits.size() > 1);
713 gpertea 154 for (i = 0; i < hits.hits.size(); ++i)
714     {
715     size_t index = index_vector[i];
716 gpertea 166 bool primary = (index == primaryHit);
717 gpertea 154 const BowtieHit& bh = hits.hits[index];
718     rewrite_sam_record(bam_writer, rt,
719     bh,
720     bh.hitfile_rec().c_str(),
721 gpertea 237 alt_name.c_str(),
722 gpertea 154 frag_type,
723     hits.hits.size(),
724     (i < hits.hits.size()-1) ? &(hits.hits[index_vector[i+1]]) : NULL,
725     primary,
726     (multipleHits? i: -1));
727     }
728 gpertea 29 }
729    
730 gpertea 135 void print_sam_for_pair(const RefSequenceTable& rt,
731 gpertea 166 const vector<pair<BowtieHit, BowtieHit> >& best_hits,
732 gpertea 29 const InsertAlignmentGrade& grade,
733 gpertea 135 GBamWriter& bam_writer,
734 gpertea 237 const string& left_alt_name,
735     const string& right_alt_name,
736 gpertea 236 boost::random::mt19937& rng,
737 gpertea 154 uint64_t begin_id = 0,
738     uint64_t end_id = std::numeric_limits<uint64_t>::max())
739 gpertea 29 {
740 gpertea 135 Read left_read;
741     Read right_read;
742 gpertea 166 if (best_hits.empty())
743     return;
744 gpertea 163
745 gpertea 166 size_t i;
746     HitsForRead right_hits;
747     for (i = 0; i < best_hits.size(); ++i)
748     right_hits.hits.push_back(best_hits[i].second);
749    
750 gpertea 29 size_t primaryHit = 0;
751     vector<uint32_t> index_vector;
752 gpertea 166 lex_hit_sort s(rt, right_hits);
753     for (i = 0; i < right_hits.hits.size(); ++i)
754     index_vector.push_back(i);
755    
756     sort(index_vector.begin(), index_vector.end(), s);
757 gpertea 135
758 gpertea 166 if (!report_secondary_alignments)
759 gpertea 236 primaryHit = rng() % right_hits.hits.size();
760 gpertea 166
761     bool multipleHits = (best_hits.size() > 1);
762     for (i = 0; i < best_hits.size(); ++i)
763     {
764     size_t index = index_vector[i];
765     bool primary = (index == primaryHit);
766     const BowtieHit& right_bh = best_hits[index].second;
767     const BowtieHit& left_bh = best_hits[index].first;
768    
769     rewrite_sam_record(bam_writer, rt,
770     right_bh,
771     right_bh.hitfile_rec().c_str(),
772 gpertea 237 right_alt_name.c_str(),
773 gpertea 166 grade,
774     FRAG_RIGHT,
775     &left_bh,
776     best_hits.size(),
777     (i < best_hits.size() - 1) ? &(best_hits[index_vector[i+1]].second) : NULL,
778     primary,
779     (multipleHits? i: -1));
780     rewrite_sam_record(bam_writer, rt,
781     left_bh,
782     left_bh.hitfile_rec().c_str(),
783 gpertea 237 left_alt_name.c_str(),
784 gpertea 166 grade,
785     FRAG_LEFT,
786     &right_bh,
787     best_hits.size(),
788     (i < best_hits.size() - 1) ? &(best_hits[index_vector[i+1]].first) : NULL,
789     primary,
790     (multipleHits? i: -1));
791     }
792 gpertea 29 }
793    
794     /**
795     * Given all of the hits for a particular read, update the read counts for insertions and deletions.
796     * @param hits hits The alignments for a particular read
797     * @param insertions Maps from an insertion to the number of supporting reads for that insertion
798     * @param deletions Maps from a deletion to the number of supporting reads for that deletion
799     */
800     void update_insertions_and_deletions(const HitsForRead& hits,
801     InsertionSet& insertions,
802     DeletionSet& deletions)
803     {
804 gpertea 200 for (size_t i = 0; i < hits.hits.size(); ++i)
805     {
806     const BowtieHit& bh = hits.hits[i];
807     insertions_from_alignment(bh, insertions);
808     deletions_from_alignment(bh, deletions);
809     }
810     }
811    
812     void update_coverage(const HitsForRead& hits,
813     Coverage& coverage)
814     {
815     for (size_t i = 0; i < hits.hits.size(); ++i)
816     {
817     const BowtieHit& hit = hits.hits[i];
818     const vector<CigarOp>& cigar = hit.cigar();
819     unsigned int positionInGenome = hit.left();
820     RefID ref_id = hit.ref_id();
821    
822     for(size_t c = 0; c < cigar.size(); ++c)
823 gpertea 29 {
824 gpertea 200 int opcode = cigar[c].opcode;
825     int length = cigar[c].length;
826     switch(opcode)
827     {
828     case REF_SKIP:
829     case MATCH:
830     case DEL:
831     if (opcode == MATCH)
832     coverage.add_coverage(ref_id, positionInGenome, length);
833    
834     positionInGenome += length;
835     break;
836     case rEF_SKIP:
837     case mATCH:
838     case dEL:
839     positionInGenome -= length;
840    
841     if (opcode == mATCH)
842     coverage.add_coverage(ref_id, positionInGenome + 1, length);
843     break;
844     case FUSION_FF:
845     case FUSION_FR:
846     case FUSION_RF:
847     case FUSION_RR:
848     positionInGenome = length;
849     ref_id = hit.ref_id2();
850     break;
851     default:
852     break;
853     }
854 gpertea 29 }
855 gpertea 200 }
856 gpertea 29 }
857    
858    
859 gpertea 154 void update_fusions(const HitsForRead& hits,
860     RefSequenceTable& rt,
861     FusionSet& fusions,
862 gpertea 200 const FusionSet& fusions_ref = empty_fusions)
863 gpertea 154 {
864 gpertea 222 if (!fusion_search)
865     return;
866    
867 gpertea 154 if (hits.hits.size() > fusion_multireads)
868     return;
869 gpertea 29
870 gpertea 154 bool update_stat = fusions_ref.size() > 0;
871     for (size_t i = 0; i < hits.hits.size(); ++i)
872     {
873     const BowtieHit& bh = hits.hits[i];
874    
875     if (bh.edit_dist() > fusion_read_mismatches)
876     continue;
877    
878     fusions_from_alignment(bh, fusions, rt, update_stat);
879    
880     if (update_stat)
881     unsupport_fusions(bh, fusions, fusions_ref);
882     }
883     }
884    
885 gpertea 29 void update_junctions(const HitsForRead& hits,
886 gpertea 154 JunctionSet& junctions)
887 gpertea 29 {
888 gpertea 200 for (size_t i = 0; i < hits.hits.size(); ++i)
889     {
890     const BowtieHit& bh = hits.hits[i];
891     junctions_from_alignment(bh, junctions);
892     }
893 gpertea 29 }
894    
895     // Extracts junctions from all the SAM hits (based on REF_SKIPs) in the hit file
896     // resets the stream when finished.
897 gpertea 154 void exclude_hits_on_filtered_junctions(const JunctionSet& junctions, HitsForRead& hits)
898 gpertea 29 {
899 gpertea 154 HitsForRead remaining;
900     remaining.insert_id = hits.insert_id;
901 gpertea 166
902 gpertea 154 for (size_t i = 0; i < hits.hits.size(); ++i)
903     {
904     BowtieHit& bh = hits.hits[i];
905 gpertea 166 if (bh.edit_dist() > max_read_mismatches)
906     continue;
907    
908 gpertea 154 bool filter_hit = false;
909     if (!bh.contiguous())
910 gpertea 29 {
911 gpertea 154 JunctionSet bh_juncs;
912     junctions_from_alignment(bh, bh_juncs);
913     for (JunctionSet::iterator itr = bh_juncs.begin();
914     itr != bh_juncs.end();
915     itr++)
916     {
917     const Junction& j = itr->first;
918     JunctionSet::const_iterator target = junctions.find(j);
919     if (target == junctions.end() || !target->second.accepted)
920 gpertea 29 {
921 gpertea 154 filter_hit = true;
922     break;
923 gpertea 29 }
924 gpertea 154 }
925 gpertea 29 }
926 gpertea 154 if (!filter_hit)
927     remaining.hits.push_back(bh);
928     }
929     hits = remaining;
930 gpertea 29 }
931    
932 gpertea 245 void realign_reads(HitsForRead& hits,
933     const RefSequenceTable& rt,
934     const JunctionSet& junctions,
935     const JunctionSet& rev_junctions,
936     const InsertionSet& insertions,
937     const DeletionSet& deletions,
938     const DeletionSet& rev_deletions,
939     const FusionSet& fusions)
940     {
941 gpertea 254 if (color)
942     return;
943 gpertea 256
944 gpertea 245 vector<BowtieHit> additional_hits;
945     for (size_t i = 0; i < hits.hits.size(); ++i)
946     {
947     BowtieHit& bh = hits.hits[i];
948 gpertea 254 if (fusion_search && bh.fusion_opcode() != FUSION_NOTHING)
949     return;
950 gpertea 245
951     const vector<CigarOp>& cigars = bh.cigar();
952     int pos = bh.left();
953     int refid = bh.ref_id();
954     for (size_t j = 0; j < cigars.size(); ++j)
955     {
956     const CigarOp& op = cigars[j];
957     if (j == 0 || j == cigars.size() - 1)
958     {
959     // let's do this for MATCH case only,
960     if (op.opcode == MATCH || op.opcode == mATCH)
961     {
962     int left1, left2;
963     if (op.opcode == MATCH)
964     {
965     left1 = pos;
966     left2 = pos + op.length - 1;
967     }
968     else
969     {
970     left1 = pos - op.length + 1;
971     left2 = pos;
972     }
973    
974     {
975     JunctionSet::const_iterator lb, ub;
976     JunctionSet temp_junctions;
977     if (j == 0)
978     {
979     lb = rev_junctions.upper_bound(Junction(refid, left1, 0, true));
980     ub = rev_junctions.lower_bound(Junction(refid, left2, left2, false));
981     while (lb != ub && lb != rev_junctions.end())
982     {
983     Junction temp_junction = lb->first;
984     temp_junction.left = lb->first.right;
985     temp_junction.right = lb->first.left;
986     temp_junctions[temp_junction] = lb->second;
987     ++lb;
988     }
989     }
990    
991     if (j == cigars.size() - 1)
992     {
993     int common_right = left2 + max_report_intron_length;
994     lb = junctions.upper_bound(Junction(refid, left1, common_right, true));
995     ub = junctions.lower_bound(Junction(refid, left2, common_right, false));
996     while (lb != ub && lb != junctions.end())
997     {
998     temp_junctions[lb->first] = lb->second;
999     ++lb;
1000     }
1001     }
1002    
1003 gpertea 256 if (temp_junctions.size() > 10)
1004     continue;
1005    
1006 gpertea 245 JunctionSet::const_iterator junc_iter = temp_junctions.begin();
1007     for (; junc_iter != temp_junctions.end(); ++junc_iter)
1008     {
1009     Junction junc = junc_iter->first;
1010    
1011     /*
1012 gpertea 254 fprintf(stderr, "%d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n",
1013 gpertea 245 bh.insert_id(), bh.left(), bh.right(),
1014     print_cigar(bh.cigar()).c_str(),
1015     bh.alignment_score(), bh.edit_dist(),
1016     junc.left, junc.right);
1017     //*/
1018    
1019     int new_left = bh.left();
1020     int intron_length = junc.right - junc.left - 1;
1021     vector<CigarOp> new_cigars;
1022     bool anchored = false;
1023     if (j == 0 && bh.left() > (int)junc.left)
1024     {
1025     new_left -= intron_length;
1026     int before_match_length = junc.left - new_left + 1;;
1027     int after_match_length = op.length - before_match_length;
1028    
1029     if (before_match_length > 0 && after_match_length)
1030     {
1031     anchored = true;
1032     new_cigars.push_back(CigarOp(MATCH, before_match_length));
1033     new_cigars.push_back(CigarOp(REF_SKIP, intron_length));
1034     new_cigars.push_back(CigarOp(MATCH, after_match_length));
1035    
1036     new_cigars.insert(new_cigars.end(), cigars.begin() + 1, cigars.end());
1037     }
1038     }
1039     else
1040     {
1041     new_cigars.insert(new_cigars.end(), cigars.begin(), cigars.end() - 1);
1042    
1043     int before_match_length = junc.left - pos + 1;
1044     int after_match_length = op.length - before_match_length;
1045    
1046     if (before_match_length > 0 && after_match_length > 0)
1047     {
1048     anchored = true;
1049     new_cigars.push_back(CigarOp(MATCH, before_match_length));
1050     new_cigars.push_back(CigarOp(REF_SKIP, intron_length));
1051     new_cigars.push_back(CigarOp(MATCH, after_match_length));
1052     }
1053     }
1054    
1055 gpertea 254 BowtieHit new_bh(bh.ref_id(),
1056     bh.ref_id2(),
1057     bh.insert_id(),
1058     new_left,
1059     new_cigars,
1060     bh.antisense_align(),
1061     junc.antisense,
1062     0, /* edit_dist - needs to be recalculated */
1063     0, /* splice_mms - needs to be recalculated */
1064     false);
1065    
1066     new_bh.seq(bh.seq());
1067     new_bh.qual(bh.qual());
1068    
1069     const RefSequenceTable::Sequence* ref_str = rt.get_seq(bh.ref_id());
1070    
1071     if (new_left >= 0 &&
1072     new_bh.right() <= (int)length(*ref_str) &&
1073     anchored)
1074 gpertea 245 {
1075     vector<string> aux_fields;
1076     bowtie_sam_extra(new_bh, rt, aux_fields);
1077    
1078     vector<string>::const_iterator aux_iter = aux_fields.begin();
1079     for (; aux_iter != aux_fields.end(); ++aux_iter)
1080     {
1081     const string& aux_field = *aux_iter;
1082     if (strncmp(aux_field.c_str(), "AS", 2) == 0)
1083     {
1084     int alignment_score = atoi(aux_field.c_str() + 5);
1085     new_bh.alignment_score(alignment_score);
1086     }
1087     else if (strncmp(aux_field.c_str(), "XM", 2) == 0)
1088     {
1089     int XM_value = atoi(aux_field.c_str() + 5);
1090     new_bh.edit_dist(XM_value);
1091     }
1092     }
1093    
1094     vector<string> sam_toks;
1095     tokenize(bh.hitfile_rec().c_str(), "\t", sam_toks);
1096    
1097     char coord[20] = {0,};
1098     sprintf(coord, "%d", new_bh.left() + 1);
1099     sam_toks[3] = coord;
1100     sam_toks[5] = print_cigar(new_bh.cigar());
1101     for (size_t a = 11; a < sam_toks.size(); ++a)
1102     {
1103     string& sam_tok = sam_toks[a];
1104     for (size_t b = 0; b < aux_fields.size(); ++b)
1105     {
1106     const string& aux_tok = aux_fields[b];
1107     if (strncmp(sam_tok.c_str(), aux_tok.c_str(), 5) == 0)
1108     {
1109     sam_tok = aux_tok;
1110     break;
1111     }
1112     }
1113     }
1114    
1115     if (!bh.is_spliced())
1116     {
1117     if (junc.antisense)
1118     sam_toks.push_back("XS:A:-");
1119     else
1120     sam_toks.push_back("XS:A:+");
1121     }
1122    
1123    
1124     string new_rec = "";
1125     for (size_t d = 0; d < sam_toks.size(); ++d)
1126     {
1127     new_rec += sam_toks[d];
1128     if (d < sam_toks.size() - 1)
1129     new_rec += "\t";
1130     }
1131    
1132     new_bh.hitfile_rec(new_rec);
1133    
1134     if (new_bh.edit_dist() <= bh.edit_dist())
1135     additional_hits.push_back(new_bh);
1136    
1137     /*
1138     fprintf(stderr, "\t%d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n",
1139     new_bh.insert_id(), new_bh.left(), new_bh.right(),
1140     print_cigar(new_bh.cigar()).c_str(),
1141     new_bh.alignment_score(), new_bh.edit_dist(),
1142     junc.left, junc.right);
1143     //*/
1144     }
1145     }
1146     }
1147    
1148     #if 0
1149     {
1150     DeletionSet::const_iterator lb, ub;
1151     bool use_rev_deletions = (j == 0);
1152     const DeletionSet& curr_deletions = (use_rev_deletions ? rev_deletions : deletions);
1153     if (use_rev_deletions)
1154     {
1155     lb = curr_deletions.upper_bound(Deletion(refid, left1, 0, true));
1156     ub = curr_deletions.lower_bound(Deletion(refid, left2, left2, false));
1157     }
1158     else
1159     {
1160     int common_right = left2 + 100;
1161     lb = curr_deletions.upper_bound(Deletion(refid, left1, common_right, true));
1162     ub = curr_deletions.lower_bound(Deletion(refid, left2, common_right, false));
1163     }
1164    
1165     while (lb != curr_deletions.end() && lb != ub)
1166     {
1167     Deletion del = lb->first;
1168     if (use_rev_deletions)
1169     {
1170     int temp = del.left;
1171     del.left = del.right;
1172     del.right = temp;
1173     }
1174    
1175     // daehwan - for debuggin purposes
1176     /*
1177     fprintf(stderr, "(type%d) %d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n",
1178     !use_rev_junctions,
1179     bh.insert_id(), bh.left(), bh.right(),
1180     print_cigar(bh.cigar()).c_str(),
1181     bh.alignment_score(), bh.edit_dist(),
1182     junc.left, junc.right);
1183     */
1184    
1185     int del_length = del.right - del.left - 1;
1186     int new_left = bh.left();
1187     if (j == 0)
1188     new_left -= del_length;
1189    
1190     vector<CigarOp> new_cigars;
1191     if (j == 0)
1192     {
1193     int before_match_length = del.left - new_left + 1;;
1194     int after_match_length = op.length - before_match_length;
1195    
1196     if (before_match_length > 0)
1197     new_cigars.push_back(CigarOp(MATCH, before_match_length));
1198     new_cigars.push_back(CigarOp(DEL, del_length));
1199     if (after_match_length > 0)
1200     new_cigars.push_back(CigarOp(MATCH, after_match_length));
1201    
1202     new_cigars.insert(new_cigars.end(), cigars.begin() + 1, cigars.end());
1203     }
1204     else
1205     {
1206     new_cigars.insert(new_cigars.end(), cigars.begin(), cigars.end() - 1);
1207    
1208     int before_match_length = del.left - pos + 1;
1209     int after_match_length = op.length - before_match_length;
1210    
1211     if (before_match_length > 0)
1212     new_cigars.push_back(CigarOp(MATCH, before_match_length));
1213     new_cigars.push_back(CigarOp(DEL, del_length));
1214     if (after_match_length > 0)
1215     new_cigars.push_back(CigarOp(MATCH, after_match_length));
1216     }
1217    
1218     BowtieHit new_bh(bh.ref_id(),
1219     bh.ref_id2(),
1220     bh.insert_id(),
1221     new_left,
1222     new_cigars,
1223     bh.antisense_align(),
1224     bh.antisense_splice(),
1225     0, /* edit_dist - needs to be recalculated */
1226     0, /* splice_mms - needs to be recalculated */
1227     false);
1228    
1229     new_bh.seq(bh.seq());
1230     new_bh.qual(bh.qual());
1231    
1232     vector<string> aux_fields;
1233     bowtie_sam_extra(new_bh, rt, aux_fields);
1234    
1235     vector<string>::const_iterator aux_iter = aux_fields.begin();
1236     for (; aux_iter != aux_fields.end(); ++aux_iter)
1237     {
1238     const string& aux_field = *aux_iter;
1239     if (strncmp(aux_field.c_str(), "AS", 2) == 0)
1240     {
1241     int alignment_score = atoi(aux_field.c_str() + 5);
1242     new_bh.alignment_score(alignment_score);
1243     }
1244     else if (strncmp(aux_field.c_str(), "XM", 2) == 0)
1245     {
1246     int XM_value = atoi(aux_field.c_str() + 5);
1247     new_bh.edit_dist(XM_value);
1248     }
1249     }
1250    
1251     vector<string> sam_toks;
1252     tokenize(bh.hitfile_rec().c_str(), "\t", sam_toks);
1253    
1254     char coord[20] = {0,};
1255     sprintf(coord, "%d", new_bh.left() + 1);
1256     sam_toks[3] = coord;
1257     sam_toks[5] = print_cigar(new_bh.cigar());
1258     for (size_t a = 11; a < sam_toks.size(); ++a)
1259     {
1260     string& sam_tok = sam_toks[a];
1261     for (size_t b = 0; b < aux_fields.size(); ++b)
1262     {
1263     const string& aux_tok = aux_fields[b];
1264     if (strncmp(sam_tok.c_str(), aux_tok.c_str(), 5) == 0)
1265     {
1266     sam_tok = aux_tok;
1267     break;
1268     }
1269     }
1270     }
1271    
1272     string new_rec = "";
1273     for (size_t d = 0; d < sam_toks.size(); ++d)
1274     {
1275     new_rec += sam_toks[d];
1276     if (d < sam_toks.size() - 1)
1277     new_rec += "\t";
1278     }
1279    
1280     new_bh.hitfile_rec(new_rec);
1281    
1282     if (new_bh.edit_dist() <= bh.edit_dist())
1283     additional_hits.push_back(new_bh);
1284    
1285     /*
1286     fprintf(stderr, "\t%d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n",
1287     new_bh.insert_id(), new_bh.left(), new_bh.right(),
1288     print_cigar(new_bh.cigar()).c_str(),
1289     new_bh.alignment_score(), new_bh.edit_dist(),
1290     junc.left, junc.right);
1291     */
1292    
1293     ++lb;
1294     }
1295     }
1296    
1297     {
1298     InsertionSet::const_iterator lb, ub;
1299     lb = insertions.upper_bound(Insertion(refid, left1, ""));
1300     ub = insertions.lower_bound(Insertion(refid, left2, ""));
1301    
1302     while (lb != insertions.end() && lb != ub)
1303     {
1304     // daehwan - for debugging purposse
1305     break;
1306    
1307     Insertion ins = lb->first;
1308    
1309     // daehwan - for debugging purposes
1310     /*
1311     fprintf(stderr, "(type%d) %d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n",
1312     !use_rev_junctions,
1313     bh.insert_id(), bh.left(), bh.right(),
1314     print_cigar(bh.cigar()).c_str(),
1315     bh.alignment_score(), bh.edit_dist(),
1316     junc.left, junc.right);
1317     */
1318    
1319     int ins_length = ins.sequence.length();
1320     int new_left = bh.left();
1321     if (j == 0)
1322     new_left -= ins_length;
1323    
1324     vector<CigarOp> new_cigars;
1325     if (j == 0)
1326     {
1327     int before_match_length = ins.left - new_left + 1;;
1328     int after_match_length = op.length - before_match_length - ins_length;
1329    
1330     if (before_match_length > 0)
1331     new_cigars.push_back(CigarOp(MATCH, before_match_length));
1332     new_cigars.push_back(CigarOp(INS, ins_length));
1333     if (after_match_length > 0)
1334     new_cigars.push_back(CigarOp(MATCH, after_match_length));
1335    
1336     new_cigars.insert(new_cigars.end(), cigars.begin() + 1, cigars.end());
1337     }
1338     else
1339     {
1340     new_cigars.insert(new_cigars.end(), cigars.begin(), cigars.end() - 1);
1341    
1342     int before_match_length = ins.left - pos + 1;
1343     int after_match_length = op.length - before_match_length - ins_length;
1344    
1345     if (before_match_length > 0)
1346     new_cigars.push_back(CigarOp(MATCH, before_match_length));
1347     new_cigars.push_back(CigarOp(INS, ins_length));
1348     if (after_match_length > 0)
1349     new_cigars.push_back(CigarOp(MATCH, after_match_length));
1350     }
1351    
1352     BowtieHit new_bh(bh.ref_id(),
1353     bh.ref_id2(),
1354     bh.insert_id(),
1355     new_left,
1356     new_cigars,
1357     bh.antisense_align(),
1358     bh.antisense_splice(),
1359     0, /* edit_dist - needs to be recalculated */
1360     0, /* splice_mms - needs to be recalculated */
1361     false);
1362    
1363     new_bh.seq(bh.seq());
1364     new_bh.qual(bh.qual());
1365    
1366     vector<string> aux_fields;
1367     bowtie_sam_extra(new_bh, rt, aux_fields);
1368    
1369     vector<string>::const_iterator aux_iter = aux_fields.begin();
1370     for (; aux_iter != aux_fields.end(); ++aux_iter)
1371     {
1372     const string& aux_field = *aux_iter;
1373     if (strncmp(aux_field.c_str(), "AS", 2) == 0)
1374     {
1375     int alignment_score = atoi(aux_field.c_str() + 5);
1376     new_bh.alignment_score(alignment_score);
1377     }
1378     else if (strncmp(aux_field.c_str(), "XM", 2) == 0)
1379     {
1380     int XM_value = atoi(aux_field.c_str() + 5);
1381     new_bh.edit_dist(XM_value);
1382     }
1383     }
1384    
1385     /*
1386     fprintf(stderr, "\t%d %d-%d %s (AS:%d XM:%d) with junc %u-%u\n",
1387     new_bh.insert_id(), new_bh.left(), new_bh.right(),
1388     print_cigar(new_bh.cigar()).c_str(),
1389     new_bh.alignment_score(), new_bh.edit_dist(),
1390     junc.left, junc.right);
1391     */
1392    
1393     ++lb;
1394     }
1395     }
1396     #endif
1397     }
1398     }
1399    
1400     switch(op.opcode)
1401     {
1402     case REF_SKIP:
1403     pos += op.length;
1404     break;
1405     case rEF_SKIP:
1406     pos -= op.length;
1407     break;
1408     case MATCH:
1409     case DEL:
1410     pos += op.length;
1411     break;
1412     case mATCH:
1413     case dEL:
1414     pos -= op.length;
1415     break;
1416     case FUSION_FF:
1417     case FUSION_FR:
1418     case FUSION_RF:
1419     case FUSION_RR:
1420     pos = op.length;
1421     refid = bh.ref_id2();
1422     break;
1423     default:
1424     break;
1425     }
1426     }
1427     }
1428    
1429     hits.hits.insert(hits.hits.end(), additional_hits.begin(), additional_hits.end());
1430    
1431     std::sort(hits.hits.begin(), hits.hits.end());
1432     vector<BowtieHit>::iterator new_end = std::unique(hits.hits.begin(), hits.hits.end());
1433     hits.hits.erase(new_end, hits.hits.end());
1434     }
1435    
1436    
1437 gpertea 154 // events include splice junction, indels, and fusion points.
1438     struct ConsensusEventsWorker
1439 gpertea 29 {
1440 gpertea 154 void operator()()
1441     {
1442     ReadTable it;
1443     vector<BAMHitFactory*> hit_factories;
1444     hit_factories.push_back(new BAMHitFactory(it, *rt));
1445     HitStream l_hs(left_map_fname, hit_factories.back(), false, true, true, true);
1446     if (left_map_offset > 0)
1447     l_hs.seek(left_map_offset);
1448    
1449     hit_factories.push_back(new BAMHitFactory(it, *rt));
1450     HitStream r_hs(right_map_fname, hit_factories.back(), false, true, true, true);
1451     if (right_map_offset > 0)
1452     r_hs.seek(right_map_offset);
1453    
1454     HitsForRead curr_left_hit_group;
1455     HitsForRead curr_right_hit_group;
1456 gpertea 166
1457 gpertea 154 l_hs.next_read_hits(curr_left_hit_group);
1458     r_hs.next_read_hits(curr_right_hit_group);
1459 gpertea 166
1460 gpertea 154 uint32_t curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
1461     uint32_t curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
1462    
1463     // While we still have unreported hits...
1464     while((curr_left_obs_order != VMAXINT32 || curr_right_obs_order != VMAXINT32) &&
1465     (curr_left_obs_order < end_id || curr_right_obs_order < end_id))
1466     {
1467     // Chew up left singletons
1468     while (curr_left_obs_order < curr_right_obs_order &&
1469     curr_left_obs_order < end_id &&
1470     curr_left_obs_order != VMAXINT32)
1471     {
1472     HitsForRead best_hits;
1473     best_hits.insert_id = curr_left_obs_order;
1474 gpertea 166
1475 gpertea 154 // Process hits for left singleton, select best alignments
1476 gpertea 200 read_best_alignments(curr_left_hit_group, best_hits, *gtf_junctions);
1477     update_coverage(best_hits, *coverage);
1478 gpertea 154 update_junctions(best_hits, *junctions);
1479 gpertea 200 update_insertions_and_deletions(best_hits, *insertions, *deletions);
1480 gpertea 154 update_fusions(best_hits, *rt, *fusions);
1481 gpertea 222
1482 gpertea 154 // Get next hit group
1483     l_hs.next_read_hits(curr_left_hit_group);
1484     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
1485     }
1486 gpertea 166
1487 gpertea 154 // Chew up right singletons
1488     while (curr_left_obs_order > curr_right_obs_order &&
1489     curr_right_obs_order < end_id &&
1490     curr_right_obs_order != VMAXINT32)
1491     {
1492     HitsForRead best_hits;
1493     best_hits.insert_id = curr_right_obs_order;
1494     if (curr_right_obs_order >= begin_id)
1495     {
1496     // Process hit for right singleton, select best alignments
1497 gpertea 200 read_best_alignments(curr_right_hit_group, best_hits, *gtf_junctions);
1498     update_coverage(best_hits, *coverage);
1499 gpertea 154 update_junctions(best_hits, *junctions);
1500 gpertea 200 update_insertions_and_deletions(best_hits, *insertions, *deletions);
1501 gpertea 154 update_fusions(best_hits, *rt, *fusions);
1502     }
1503 gpertea 166
1504 gpertea 154 // Get next hit group
1505     r_hs.next_read_hits(curr_right_hit_group);
1506     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
1507     }
1508 gpertea 166
1509 gpertea 154 // Since we have both left hits and right hits for this insert,
1510     // Find the best pairing and print both
1511     while (curr_left_obs_order == curr_right_obs_order &&
1512     curr_left_obs_order < end_id &&
1513     curr_left_obs_order != VMAXINT32)
1514     {
1515 gpertea 166 vector<pair<BowtieHit, BowtieHit> > best_hits;
1516 gpertea 163
1517 gpertea 166 InsertAlignmentGrade grade;
1518     pair_best_alignments(curr_left_hit_group,
1519     curr_right_hit_group,
1520     grade,
1521     best_hits,
1522     *gtf_junctions);
1523 gpertea 163
1524 gpertea 200 HitsForRead best_left_hit_group; best_left_hit_group.insert_id = curr_left_obs_order;
1525     HitsForRead best_right_hit_group; best_right_hit_group.insert_id = curr_left_obs_order;
1526 gpertea 166
1527 gpertea 256 if (best_hits.size() > 0)
1528 gpertea 154 {
1529 gpertea 256 for (size_t i = 0; i < best_hits.size(); ++i)
1530     {
1531     best_left_hit_group.hits.push_back(best_hits[i].first);
1532     best_right_hit_group.hits.push_back(best_hits[i].second);
1533     }
1534 gpertea 154 }
1535 gpertea 256 else
1536     {
1537     best_left_hit_group.hits = curr_left_hit_group.hits;
1538     best_right_hit_group.hits = curr_right_hit_group.hits;
1539     }
1540 gpertea 163
1541 gpertea 200 update_coverage(best_left_hit_group, *coverage);
1542     update_junctions(best_left_hit_group, *junctions);
1543     update_insertions_and_deletions(best_left_hit_group, *insertions, *deletions);
1544     update_fusions(best_left_hit_group, *rt, *fusions);
1545    
1546     update_coverage(best_right_hit_group, *coverage);
1547     update_junctions(best_right_hit_group, *junctions);
1548     update_insertions_and_deletions(best_right_hit_group, *insertions, *deletions);
1549     update_fusions(best_right_hit_group, *rt, *fusions);
1550 gpertea 166
1551 gpertea 154 l_hs.next_read_hits(curr_left_hit_group);
1552     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
1553 gpertea 166
1554 gpertea 154 r_hs.next_read_hits(curr_right_hit_group);
1555     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
1556     }
1557     }
1558 gpertea 29
1559 gpertea 154 for (size_t i = 0; i < hit_factories.size(); ++i)
1560     delete hit_factories[i];
1561 gpertea 166
1562 gpertea 154 hit_factories.clear();
1563     }
1564 gpertea 29
1565 gpertea 154 string left_map_fname;
1566     string right_map_fname;
1567    
1568     RefSequenceTable* rt;
1569    
1570 gpertea 200 JunctionSet* gtf_junctions;
1571    
1572 gpertea 154 uint64_t begin_id;
1573     uint64_t end_id;
1574     int64_t left_map_offset;
1575     int64_t right_map_offset;
1576 gpertea 200
1577     JunctionSet* junctions;
1578     InsertionSet* insertions;
1579     DeletionSet* deletions;
1580     FusionSet* fusions;
1581    
1582     Coverage* coverage;
1583 gpertea 154 };
1584    
1585     struct ReportWorker
1586     {
1587 gpertea 256 void write_singleton_alignments(uint64_t curr_obs_order,
1588     HitsForRead& curr_hit_group,
1589     ReadStream& reads_file,
1590     GBamWriter& bam_writer,
1591     GBamWriter& um_out,
1592     FragmentType fragment_type)
1593     {
1594     HitsForRead best_hits;
1595     best_hits.insert_id = curr_obs_order;
1596    
1597     realign_reads(curr_hit_group, *rt, *junctions, *rev_junctions,
1598     *insertions, *deletions, *rev_deletions, *fusions);
1599    
1600     exclude_hits_on_filtered_junctions(*junctions, curr_hit_group);
1601    
1602     // Process hits for singleton, select best alignments
1603     const bool final_report = true;
1604     read_best_alignments(curr_hit_group, best_hits, *gtf_junctions,
1605     *junctions, *insertions, *deletions, *fusions, *coverage,
1606     final_report);
1607    
1608     if (best_hits.hits.size() > 0)
1609     {
1610     Read read;
1611     bool got_read = reads_file.getRead(curr_obs_order, read,
1612     reads_format, false, begin_id, end_id,
1613     &um_out, false);
1614     assert (got_read);
1615     if (got_read)
1616     {
1617     update_junctions(best_hits, *final_junctions);
1618     update_insertions_and_deletions(best_hits, *final_insertions, *final_deletions);
1619     update_fusions(best_hits, *rt, *final_fusions, *fusions);
1620    
1621     print_sam_for_single(*rt,
1622     best_hits,
1623     fragment_type,
1624     read.alt_name,
1625     bam_writer,
1626     rng);
1627     }
1628     }
1629     }
1630    
1631 gpertea 154 void operator()()
1632     {
1633 gpertea 236 rng.seed(1);
1634    
1635 gpertea 154 ReadTable it;
1636     GBamWriter bam_writer(bam_output_fname.c_str(), sam_header.c_str());
1637    
1638     ReadStream left_reads_file(left_reads_fname);
1639     if (left_reads_file.file() == NULL)
1640     err_die("Error: cannot open %s for reading\n", left_reads_fname.c_str());
1641 gpertea 219 if (left_reads_file.isBam()) {
1642     left_reads_file.use_alt_name();
1643     left_reads_file.ignoreQC();
1644     }
1645 gpertea 154 if (left_reads_offset > 0)
1646     left_reads_file.seek(left_reads_offset);
1647 gpertea 166
1648 gpertea 219 //if (!zpacker.empty()) left_um_fname+=".z";
1649     GBamWriter* left_um_out=new GBamWriter(left_um_fname.c_str(), sam_header.c_str());
1650     GBamWriter* right_um_out=NULL;
1651 gpertea 222
1652 gpertea 154 ReadStream right_reads_file(right_reads_fname);
1653     if (right_reads_offset > 0)
1654     right_reads_file.seek(right_reads_offset);
1655 gpertea 166
1656 gpertea 219 //FZPipe right_um_out;
1657 gpertea 218 if (!right_reads_fname.empty())
1658 gpertea 154 {
1659 gpertea 219 if (right_reads_file.isBam()) {
1660     right_reads_file.use_alt_name();
1661     right_reads_file.ignoreQC();
1662     right_um_out=new GBamWriter(right_um_fname.c_str(), sam_header.c_str());
1663 gpertea 154 }
1664 gpertea 219 //if (!zpacker.empty()) right_um_fname+=".z";
1665     //if (right_um_out.openWrite(right_um_fname.c_str(), zpacker)==NULL)
1666     // err_die("Error: cannot open file %s for writing!\n",right_um_fname.c_str());
1667     }
1668 gpertea 154
1669     vector<BAMHitFactory*> hit_factories;
1670     hit_factories.push_back(new BAMHitFactory(it, *rt));
1671     HitStream left_hs(left_map_fname, hit_factories.back(), false, true, true, true);
1672     if (left_map_offset > 0)
1673     left_hs.seek(left_map_offset);
1674    
1675     hit_factories.push_back(new BAMHitFactory(it, *rt));
1676     HitStream right_hs(right_map_fname, hit_factories.back(), false, true, true, true);
1677     if (right_map_offset > 0)
1678     right_hs.seek(right_map_offset);
1679 gpertea 166
1680 gpertea 154 HitsForRead curr_left_hit_group;
1681     HitsForRead curr_right_hit_group;
1682 gpertea 166
1683 gpertea 154 left_hs.next_read_hits(curr_left_hit_group);
1684     right_hs.next_read_hits(curr_right_hit_group);
1685 gpertea 166
1686 gpertea 154 uint64_t curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
1687     uint64_t curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
1688    
1689 gpertea 200 const bool final_report = true;
1690    
1691 gpertea 154 // While we still have unreported hits...
1692     Read l_read;
1693     Read r_read;
1694     while((curr_left_obs_order != VMAXINT32 || curr_right_obs_order != VMAXINT32) &&
1695     (curr_left_obs_order < end_id || curr_right_obs_order < end_id))
1696     {
1697     // Chew up left singletons (pairs with right reads unmapped)
1698     while (curr_left_obs_order < curr_right_obs_order &&
1699     curr_left_obs_order < end_id &&
1700     curr_left_obs_order != VMAXINT32)
1701     {
1702 gpertea 256 write_singleton_alignments(curr_left_obs_order,
1703     curr_left_hit_group,
1704     left_reads_file,
1705     bam_writer,
1706     *left_um_out,
1707     right_map_fname.empty() ? FRAG_UNPAIRED : FRAG_LEFT);
1708 gpertea 245
1709 gpertea 154 // Get next hit group
1710     left_hs.next_read_hits(curr_left_hit_group);
1711     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
1712 gpertea 166 } //left singletons
1713    
1714 gpertea 154 // Chew up right singletons
1715     while (curr_left_obs_order > curr_right_obs_order &&
1716     curr_right_obs_order < end_id &&
1717     curr_right_obs_order != VMAXINT32)
1718     {
1719 gpertea 256 write_singleton_alignments(curr_right_obs_order,
1720     curr_right_hit_group,
1721     right_reads_file,
1722     bam_writer,
1723     *right_um_out,
1724     FRAG_RIGHT);
1725 gpertea 154
1726     // Get next hit group
1727     right_hs.next_read_hits(curr_right_hit_group);
1728     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
1729     }
1730 gpertea 166
1731 gpertea 154 // Since we have both left hits and right hits for this insert,
1732     // Find the best pairing and print both
1733     while (curr_left_obs_order == curr_right_obs_order &&
1734     curr_left_obs_order < end_id &&
1735     curr_left_obs_order != VMAXINT32)
1736     {
1737 gpertea 245 realign_reads(curr_left_hit_group, *rt, *junctions, *rev_junctions,
1738     *insertions, *deletions, *rev_deletions, *fusions);
1739 gpertea 154 exclude_hits_on_filtered_junctions(*junctions, curr_left_hit_group);
1740 gpertea 256
1741 gpertea 245 realign_reads(curr_right_hit_group, *rt, *junctions, *rev_junctions,
1742     *insertions, *deletions, *rev_deletions, *fusions);
1743 gpertea 154 exclude_hits_on_filtered_junctions(*junctions, curr_right_hit_group);
1744 gpertea 237
1745 gpertea 256 vector<pair<BowtieHit, BowtieHit> > best_hits;
1746 gpertea 163
1747 gpertea 256 bool paired_alignments = curr_left_hit_group.hits.size() > 0 && curr_right_hit_group.hits.size() > 0;
1748     InsertAlignmentGrade grade;
1749 gpertea 237
1750 gpertea 256 if (paired_alignments)
1751 gpertea 154 {
1752     pair_best_alignments(curr_left_hit_group,
1753     curr_right_hit_group,
1754 gpertea 200 grade, best_hits, *gtf_junctions,
1755     *junctions, *insertions, *deletions, *fusions, *coverage,
1756     final_report);
1757 gpertea 163
1758 gpertea 256 if (best_hits.size() <= 0 ||
1759     (grade.fusion && !fusion_search && !report_discordant_pair_alignments))
1760     paired_alignments = false;
1761     }
1762    
1763     if (paired_alignments)
1764     {
1765 gpertea 200 HitsForRead best_left_hit_group; best_left_hit_group.insert_id = curr_left_obs_order;
1766     HitsForRead best_right_hit_group; best_right_hit_group.insert_id = curr_left_obs_order;
1767    
1768     for (size_t i = 0; i < best_hits.size(); ++i)
1769 gpertea 154 {
1770 gpertea 200 best_left_hit_group.hits.push_back(best_hits[i].first);
1771     best_right_hit_group.hits.push_back(best_hits[i].second);
1772 gpertea 166 }
1773 gpertea 256
1774 gpertea 200 if (best_hits.size() > 0)
1775 gpertea 166 {
1776 gpertea 237 bool got_left_read = left_reads_file.getRead(best_hits[0].first.insert_id(), l_read,
1777     reads_format, false, begin_id, end_id,
1778     left_um_out, false);
1779    
1780     bool got_right_read = right_reads_file.getRead(best_hits[0].first.insert_id(), r_read,
1781     reads_format, false, begin_id, end_id,
1782     right_um_out, false);
1783 gpertea 256
1784 gpertea 237 assert (got_left_read && got_right_read);
1785 gpertea 256
1786 gpertea 237 if (got_left_read && got_right_read)
1787     {
1788     update_junctions(best_left_hit_group, *final_junctions);
1789     update_insertions_and_deletions(best_left_hit_group, *final_insertions, *final_deletions);
1790     update_fusions(best_left_hit_group, *rt, *final_fusions, *fusions);
1791    
1792     update_junctions(best_right_hit_group, *final_junctions);
1793     update_insertions_and_deletions(best_right_hit_group, *final_insertions, *final_deletions);
1794     update_fusions(best_right_hit_group, *rt, *final_fusions, *fusions);
1795    
1796     pair_support(best_hits, *final_fusions, *fusions);
1797    
1798     print_sam_for_pair(*rt,
1799     best_hits,
1800     grade,
1801     bam_writer,
1802     l_read.alt_name,
1803     r_read.alt_name,
1804     rng,
1805     begin_id,
1806     end_id);
1807     }
1808 gpertea 154 }
1809     }
1810 gpertea 256 else
1811     {
1812     if (curr_left_hit_group.hits.size() > 0)
1813     {
1814     write_singleton_alignments(curr_left_obs_order,
1815     curr_left_hit_group,
1816     left_reads_file,
1817     bam_writer,
1818     *left_um_out,
1819     (right_map_fname.empty() ? FRAG_UNPAIRED : FRAG_LEFT));
1820     }
1821    
1822     if (curr_right_hit_group.hits.size() > 0)
1823     { //only right read mapped
1824     //write it in the mapped file with the #MAPPED# flag
1825     write_singleton_alignments(curr_right_obs_order,
1826     curr_right_hit_group,
1827     right_reads_file,
1828     bam_writer,
1829     *right_um_out,
1830     FRAG_RIGHT);
1831     }
1832     }
1833 gpertea 166
1834 gpertea 154 left_hs.next_read_hits(curr_left_hit_group);
1835     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
1836 gpertea 166
1837 gpertea 154 right_hs.next_read_hits(curr_right_hit_group);
1838     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
1839     }
1840     } //while we still have unreported hits..
1841     //print the remaining unmapped reads at the end of each reads' stream
1842 gpertea 200
1843 gpertea 154 left_reads_file.getRead(VMAXINT32, l_read,
1844 gpertea 159 reads_format, false, begin_id, end_id,
1845 gpertea 219 left_um_out, false);
1846 gpertea 154 if (right_reads_file.file())
1847     right_reads_file.getRead(VMAXINT32, r_read,
1848 gpertea 159 reads_format, false, begin_id, end_id,
1849 gpertea 219 right_um_out, false);
1850 gpertea 154
1851 gpertea 200
1852 gpertea 222 // pclose (pipe close), which waits for a process to end, seems to conflict with boost::thread::join somehow,
1853     // resulting in deadlock like behavior.
1854 gpertea 219 delete left_um_out;
1855     delete right_um_out;
1856 gpertea 154
1857 gpertea 219
1858 gpertea 154 for (size_t i = 0; i < hit_factories.size(); ++i)
1859     delete hit_factories[i];
1860    
1861     hit_factories.clear();
1862     }
1863    
1864     string bam_output_fname;
1865     string sam_header_fname;
1866    
1867     string left_reads_fname;
1868     string left_map_fname;
1869     string right_reads_fname;
1870     string right_map_fname;
1871    
1872     string left_um_fname;
1873     string right_um_fname;
1874    
1875     JunctionSet* gtf_junctions;
1876 gpertea 200
1877 gpertea 154 JunctionSet* junctions;
1878 gpertea 245 JunctionSet* rev_junctions;
1879 gpertea 200 InsertionSet* insertions;
1880     DeletionSet* deletions;
1881 gpertea 245 DeletionSet* rev_deletions;
1882 gpertea 154 FusionSet* fusions;
1883 gpertea 200 Coverage* coverage;
1884    
1885 gpertea 154 JunctionSet* final_junctions;
1886     InsertionSet* final_insertions;
1887     DeletionSet* final_deletions;
1888     FusionSet* final_fusions;
1889    
1890     RefSequenceTable* rt;
1891    
1892     uint64_t begin_id;
1893     uint64_t end_id;
1894     int64_t left_reads_offset;
1895     int64_t left_map_offset;
1896     int64_t right_reads_offset;
1897     int64_t right_map_offset;
1898 gpertea 236
1899     boost::random::mt19937 rng;
1900 gpertea 154 };
1901    
1902     void driver(const string& bam_output_fname,
1903     istream& ref_stream,
1904     const string& left_map_fname,
1905     const string& left_reads_fname,
1906     const string& right_map_fname,
1907     const string& right_reads_fname,
1908 gpertea 29 FILE* junctions_out,
1909     FILE* insertions_out,
1910 gpertea 135 FILE* deletions_out,
1911 gpertea 154 FILE* fusions_out)
1912 gpertea 29 {
1913 gpertea 154 if (!parallel)
1914     num_threads = 1;
1915    
1916 gpertea 29 RefSequenceTable rt(sam_header, true);
1917 gpertea 245 get_seqs(ref_stream, rt, true);
1918 gpertea 154
1919     srandom(1);
1920 gpertea 166
1921 gpertea 29 JunctionSet gtf_junctions;
1922 gpertea 135 if (!gtf_juncs.empty())
1923 gpertea 29 {
1924 gpertea 200 char splice_buf[4096];
1925 gpertea 29 FILE* splice_coords = fopen(gtf_juncs.c_str(), "r");
1926     if (splice_coords)
1927 gpertea 135 {
1928     while (fgets(splice_buf, sizeof(splice_buf), splice_coords))
1929     {
1930     char* nl = strrchr(splice_buf, '\n');
1931     char* buf = splice_buf;
1932     if (nl) *nl = 0;
1933 gpertea 29
1934 gpertea 135 char* ref_name = get_token((char**)&buf, "\t");
1935     char* scan_left_coord = get_token((char**)&buf, "\t");
1936     char* scan_right_coord = get_token((char**)&buf, "\t");
1937     char* orientation = get_token((char**)&buf, "\t");
1938    
1939     if (!scan_left_coord || !scan_right_coord || !orientation)
1940     {
1941     fprintf(stderr,"Error: malformed splice coordinate record in %s\n:%s\n",
1942     gtf_juncs.c_str(), buf);
1943     exit(1);
1944     }
1945    
1946     uint32_t ref_id = rt.get_id(ref_name, NULL, 0);
1947     uint32_t left_coord = atoi(scan_left_coord);
1948     uint32_t right_coord = atoi(scan_right_coord);
1949     bool antisense = *orientation == '-';
1950    
1951 gpertea 254 JunctionStats junction_stat;
1952     junction_stat.gtf_match = true;
1953     junction_stat.accepted = true;
1954    
1955     gtf_junctions.insert(make_pair<Junction, JunctionStats>(Junction(ref_id, left_coord, right_coord, antisense), junction_stat));
1956 gpertea 135 }
1957     }
1958     fprintf(stderr, "Loaded %d GFF junctions from %s.\n", (int)(gtf_junctions.size()), gtf_juncs.c_str());
1959 gpertea 29 }
1960    
1961 gpertea 154 vector<uint64_t> read_ids;
1962     vector<vector<int64_t> > offsets;
1963     if (num_threads > 1)
1964     {
1965     vector<string> fnames;
1966     if (right_map_fname != "")
1967 gpertea 29 {
1968 gpertea 154 fnames.push_back(right_reads_fname);
1969     fnames.push_back(right_map_fname);
1970     }
1971     fnames.push_back(left_reads_fname);
1972     fnames.push_back(left_map_fname);
1973     bool enough_data = calculate_offsets(fnames, read_ids, offsets);
1974     if (!enough_data)
1975     num_threads = 1;
1976     }
1977 gpertea 29
1978 gpertea 154 JunctionSet vjunctions[num_threads];
1979 gpertea 200 InsertionSet vinsertions[num_threads];
1980     DeletionSet vdeletions[num_threads];
1981 gpertea 154 FusionSet vfusions[num_threads];
1982 gpertea 200 Coverage vcoverages[num_threads];
1983    
1984 gpertea 154 vector<boost::thread*> threads;
1985     for (int i = 0; i < num_threads; ++i)
1986     {
1987     ConsensusEventsWorker worker;
1988 gpertea 29
1989 gpertea 154 worker.left_map_fname = left_map_fname;
1990     worker.right_map_fname = right_map_fname;
1991 gpertea 200 worker.rt = &rt;
1992     worker.gtf_junctions = &gtf_junctions;
1993    
1994 gpertea 154 worker.junctions = &vjunctions[i];
1995 gpertea 200 worker.insertions = &vinsertions[i];
1996     worker.deletions = &vdeletions[i];
1997 gpertea 154 worker.fusions = &vfusions[i];
1998 gpertea 200 worker.coverage = &vcoverages[i];
1999 gpertea 29
2000 gpertea 154 worker.right_map_offset = 0;
2001 gpertea 166
2002 gpertea 154 if (i == 0)
2003     {
2004     worker.begin_id = 0;
2005     worker.left_map_offset = 0;
2006     }
2007     else
2008     {
2009     size_t offsets_size = offsets[i-1].size();
2010 gpertea 29
2011 gpertea 154 worker.begin_id = read_ids[i-1];
2012     worker.left_map_offset = offsets[i-1].back();
2013     if (offsets_size == 4)
2014     worker.right_map_offset = offsets[i-1][1];
2015     }
2016 gpertea 166
2017 gpertea 154 worker.end_id = (i+1 < num_threads) ? read_ids[i] : std::numeric_limits<uint64_t>::max();
2018    
2019     if (num_threads > 1)
2020     threads.push_back(new boost::thread(worker));
2021     else
2022     worker();
2023     }
2024    
2025     for (size_t i = 0; i < threads.size(); ++i)
2026 gpertea 254 {
2027 gpertea 154 threads[i]->join();
2028     delete threads[i];
2029     threads[i] = NULL;
2030     }
2031     threads.clear();
2032    
2033 gpertea 200 JunctionSet& junctions = vjunctions[0];
2034     InsertionSet& insertions = vinsertions[0];
2035     DeletionSet& deletions = vdeletions[0];
2036     FusionSet& fusions = vfusions[0];
2037     Coverage& coverage = vcoverages[0];
2038 gpertea 245 for (int i = 1; i < num_threads; ++i)
2039 gpertea 154 {
2040     merge_with(junctions, vjunctions[i]);
2041     vjunctions[i].clear();
2042    
2043 gpertea 200 merge_with(insertions, vinsertions[i]);
2044     vinsertions[i].clear();
2045    
2046     merge_with(deletions, vdeletions[i]);
2047     vdeletions[i].clear();
2048    
2049 gpertea 154 merge_with(fusions, vfusions[i]);
2050     vfusions[i].clear();
2051 gpertea 200
2052     coverage.merge_with(vcoverages[i]);
2053     vcoverages[i].clear();
2054 gpertea 154 }
2055    
2056 gpertea 254 merge_with(junctions, gtf_junctions);
2057    
2058 gpertea 200 coverage.calculate_coverage();
2059 gpertea 245
2060     JunctionSet rev_junctions;
2061     JunctionSet::const_iterator junction_iter = junctions.begin();
2062     for (; junction_iter != junctions.end(); ++junction_iter)
2063     {
2064     const Junction& junction = junction_iter->first;
2065     Junction rev_junction = junction;
2066     rev_junction.left = junction.right;
2067     rev_junction.right = junction.left;
2068     rev_junctions[rev_junction] = junction_iter->second;
2069     }
2070 gpertea 200
2071 gpertea 245 DeletionSet rev_deletions;
2072     #if 0
2073     DeletionSet::const_iterator deletion_iter = deletions.begin();
2074     for (; deletion_iter != deletions.end(); ++deletion_iter)
2075     {
2076     const Deletion& deletion = deletion_iter->first;
2077     Deletion rev_deletion = deletion;
2078     rev_deletion.left = deletion.right;
2079     rev_deletion.right = deletion.left;
2080     rev_deletions[rev_deletion] = deletion_iter->second;
2081     }
2082     #endif
2083    
2084 gpertea 154 size_t num_unfiltered_juncs = junctions.size();
2085     fprintf(stderr, "Loaded %lu junctions\n", (long unsigned int) num_unfiltered_juncs);
2086    
2087     // Read hits, extract junctions, and toss the ones that arent strongly enough supported.
2088     filter_junctions(junctions, gtf_junctions);
2089 gpertea 166
2090 gpertea 154 //size_t num_juncs_after_filter = junctions.size();
2091     //fprintf(stderr, "Filtered %lu junctions\n",
2092     // num_unfiltered_juncs - num_juncs_after_filter);
2093    
2094     /*
2095 gpertea 200 size_t small_overhangs = 0;
2096     for (JunctionSet::iterator i = junctions.begin(); i != junctions.end(); ++i)
2097 gpertea 29 {
2098 gpertea 200 if (i->second.accepted &&
2099     (i->second.left_extent < min_anchor_len || i->second.right_extent < min_anchor_len))
2100     {
2101     small_overhangs++;
2102 gpertea 29 }
2103 gpertea 200 }
2104 gpertea 166
2105 gpertea 200 if (small_overhangs >0)
2106     fprintf(stderr, "Warning: %lu small overhang junctions!\n", (long unsigned int)small_overhangs);
2107 gpertea 135 */
2108 gpertea 154
2109     JunctionSet vfinal_junctions[num_threads];
2110     InsertionSet vfinal_insertions[num_threads];
2111     DeletionSet vfinal_deletions[num_threads];
2112     FusionSet vfinal_fusions[num_threads];
2113    
2114     for (int i = 0; i < num_threads; ++i)
2115 gpertea 29 {
2116 gpertea 154 ReportWorker worker;
2117 gpertea 135
2118 gpertea 220 worker.sam_header_fname = sam_header;
2119 gpertea 154 char filename[1024] = {0};
2120     sprintf(filename, "%s%d.bam", bam_output_fname.c_str(), i);
2121     worker.bam_output_fname = filename;
2122 gpertea 220 string tmpoutdir=getFdir(worker.bam_output_fname);
2123     worker.left_um_fname = tmpoutdir;
2124     sprintf(filename, "unmapped_left_%d.bam", i);
2125     worker.left_um_fname+=filename;
2126 gpertea 166
2127 gpertea 154 if (right_reads_fname != "")
2128     {
2129 gpertea 220 sprintf(filename, "unmapped_right_%d.bam", i);
2130     worker.right_um_fname = tmpoutdir;
2131     worker.right_um_fname += filename;
2132 gpertea 154 }
2133 gpertea 166
2134 gpertea 154 worker.left_reads_fname = left_reads_fname;
2135     worker.left_map_fname = left_map_fname;
2136     worker.right_reads_fname = right_reads_fname;
2137     worker.right_map_fname = right_map_fname;
2138 gpertea 135
2139 gpertea 154 worker.gtf_junctions = &gtf_junctions;
2140     worker.junctions = &junctions;
2141 gpertea 245 worker.rev_junctions = &rev_junctions;
2142 gpertea 200 worker.insertions = &insertions;
2143     worker.deletions = &deletions;
2144 gpertea 245 worker.rev_deletions = &rev_deletions;
2145 gpertea 154 worker.fusions = &fusions;
2146 gpertea 200 worker.coverage = &coverage;
2147    
2148 gpertea 154 worker.final_junctions = &vfinal_junctions[i];
2149     worker.final_insertions = &vfinal_insertions[i];
2150     worker.final_deletions = &vfinal_deletions[i];
2151     worker.final_fusions = &vfinal_fusions[i];
2152     worker.rt = &rt;
2153 gpertea 29
2154 gpertea 154 worker.right_reads_offset = 0;
2155     worker.right_map_offset = 0;
2156 gpertea 166
2157 gpertea 154 if (i == 0)
2158     {
2159     worker.begin_id = 0;
2160     worker.left_reads_offset = 0;
2161     worker.left_map_offset = 0;
2162     }
2163     else
2164     {
2165     size_t offsets_size = offsets[i-1].size();
2166 gpertea 166
2167 gpertea 154 worker.begin_id = read_ids[i-1];
2168     worker.left_reads_offset = offsets[i-1][offsets_size - 2];
2169     worker.left_map_offset = offsets[i-1].back();
2170     if (offsets_size == 4)
2171     {
2172     worker.right_reads_offset = offsets[i-1][0];
2173     worker.right_map_offset = offsets[i-1][1];
2174     }
2175     }
2176 gpertea 166
2177 gpertea 154 worker.end_id = (i+1 < num_threads) ? read_ids[i] : std::numeric_limits<uint64_t>::max();
2178 gpertea 29
2179 gpertea 154 if (num_threads > 1)
2180     threads.push_back(new boost::thread(worker));
2181     else
2182     worker();
2183     }
2184    
2185     for (size_t i = 0; i < threads.size(); ++i)
2186     {
2187     threads[i]->join();
2188     delete threads[i];
2189     threads[i] = NULL;
2190     }
2191     threads.clear();
2192    
2193     JunctionSet final_junctions = vfinal_junctions[0];
2194     InsertionSet final_insertions = vfinal_insertions[0];
2195     DeletionSet final_deletions = vfinal_deletions[0];
2196     FusionSet final_fusions = vfinal_fusions[0];
2197 gpertea 245 for (int i = 1; i < num_threads; ++i)
2198 gpertea 154 {
2199     merge_with(final_junctions, vfinal_junctions[i]);
2200     vfinal_junctions[i].clear();
2201    
2202     merge_with(final_insertions, vfinal_insertions[i]);
2203     vfinal_insertions[i].clear();
2204 gpertea 166
2205 gpertea 154 merge_with(final_deletions, vfinal_deletions[i]);
2206     vfinal_deletions[i].clear();
2207    
2208     merge_with(final_fusions, vfinal_fusions[i]);
2209     vfinal_fusions[i].clear();
2210     }
2211    
2212     fprintf (stderr, "Reporting final accepted alignments...");
2213     fprintf (stderr, "done.\n");
2214 gpertea 166
2215 gpertea 154 //small_overhangs = 0;
2216     for (JunctionSet::iterator i = final_junctions.begin(); i != final_junctions.end();)
2217     {
2218     if (i->second.supporting_hits == 0 || i->second.left_extent < 8 || i->second.right_extent < 8)
2219     {
2220     final_junctions.erase(i++);
2221     }
2222     else
2223     {
2224     ++i;
2225     }
2226     }
2227 gpertea 166
2228 gpertea 154 // if (small_overhangs > 0)
2229     // fprintf(stderr, "Warning: %lu small overhang junctions!\n", small_overhangs);
2230    
2231     fprintf (stderr, "Printing junction BED track...");
2232     print_junctions(junctions_out, final_junctions, rt);
2233     fprintf (stderr, "done\n");
2234 gpertea 166
2235 gpertea 154 fprintf (stderr, "Printing insertions...");
2236     print_insertions(insertions_out, final_insertions,rt);
2237     fclose(insertions_out);
2238     fprintf (stderr, "done\n");
2239 gpertea 166
2240 gpertea 154 fprintf (stderr, "Printing deletions...");
2241     print_deletions(deletions_out, final_deletions, rt);
2242     fclose(deletions_out);
2243     fprintf (stderr, "done\n");
2244    
2245     if (fusion_search)
2246     {
2247     fprintf (stderr, "Printing fusions...");
2248     print_fusions(fusions_out, final_fusions, rt);
2249     fclose(fusions_out);
2250     fprintf (stderr, "done\n");
2251     }
2252 gpertea 166
2253 gpertea 154 fprintf(stderr, "Found %lu junctions from happy spliced reads\n", (long unsigned int)final_junctions.size());
2254 gpertea 29 }
2255    
2256     void print_usage()
2257     {
2258     fprintf(stderr, "Usage: tophat_reports <junctions.bed> <insertions.vcf> <deletions.vcf> <accepted_hits.sam> <left_map1,...,left_mapN> <left_reads.fq> [right_map1,...,right_mapN] [right_reads.fq]\n");
2259 gpertea 166
2260 gpertea 29 // fprintf(stderr, "Usage: tophat_reports <coverage.wig> <junctions.bed> <accepted_hits.sam> <map1.bwtout> [splice_map1.sbwtout]\n");
2261     }
2262    
2263     int main(int argc, char** argv)
2264     {
2265 gpertea 154 fprintf(stderr, "tophat_reports v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION);
2266     fprintf(stderr, "---------------------------------------\n");
2267 gpertea 166
2268 gpertea 154 reads_format = FASTQ;
2269 gpertea 166
2270 gpertea 154 int parse_ret = parse_options(argc, argv, print_usage);
2271     if (parse_ret)
2272     return parse_ret;
2273 gpertea 166
2274 gpertea 154 if(optind >= argc)
2275     {
2276     print_usage();
2277     return 1;
2278     }
2279 gpertea 166
2280 gpertea 154 string ref_file_name = argv[optind++];
2281 gpertea 166
2282 gpertea 154 if(optind >= argc)
2283     {
2284     print_usage();
2285     return 1;
2286     }
2287 gpertea 166
2288 gpertea 154 string junctions_file_name = argv[optind++];
2289 gpertea 166
2290 gpertea 154 if(optind >= argc)
2291     {
2292     print_usage();
2293     return 1;
2294     }
2295 gpertea 166
2296 gpertea 154 string insertions_file_name = argv[optind++];
2297     if(optind >= argc)
2298     {
2299     print_usage();
2300     return 1;
2301     }
2302 gpertea 166
2303 gpertea 154 string deletions_file_name = argv[optind++];
2304     if(optind >= argc)
2305     {
2306     print_usage();
2307     return 1;
2308     }
2309 gpertea 166
2310 gpertea 154 string fusions_file_name = argv[optind++];
2311     if(optind >= argc)
2312     {
2313     print_usage();
2314     return 1;
2315     }
2316 gpertea 166
2317 gpertea 154 string accepted_hits_file_name = argv[optind++];
2318     if(optind >= argc)
2319     {
2320     print_usage();
2321     return 1;
2322     }
2323 gpertea 166
2324 gpertea 154 string left_map_filename = argv[optind++];
2325     if(optind >= argc)
2326     {
2327     print_usage();
2328     return 1;
2329     }
2330 gpertea 166
2331 gpertea 154 string left_reads_filename = argv[optind++];
2332     string unzcmd=getUnpackCmd(left_reads_filename, false);
2333     string right_map_filename;
2334     string right_reads_filename;
2335 gpertea 166
2336 gpertea 154 if (optind < argc)
2337     {
2338     right_map_filename = argv[optind++];
2339     if(optind >= argc) {
2340     print_usage();
2341     return 1;
2342     }
2343     right_reads_filename=argv[optind++];
2344     }
2345 gpertea 135
2346 gpertea 154 ifstream ref_stream(ref_file_name.c_str(), ifstream::in);
2347     if (!ref_stream.good())
2348     {
2349     fprintf(stderr, "Error: cannot open %s for reading\n",
2350     ref_file_name.c_str());
2351     exit(1);
2352     }
2353 gpertea 166
2354 gpertea 154 FILE* junctions_file = fopen(junctions_file_name.c_str(), "w");
2355     if (junctions_file == NULL)
2356     {
2357     fprintf(stderr, "Error: cannot open BED file %s for writing\n",
2358     junctions_file_name.c_str());
2359     exit(1);
2360     }
2361 gpertea 166
2362 gpertea 154 FILE* insertions_file = fopen(insertions_file_name.c_str(), "w");
2363     if (insertions_file == NULL)
2364     {
2365     fprintf(stderr, "Error: cannot open VCF file %s for writing\n",
2366     insertions_file_name.c_str());
2367     exit(1);
2368     }
2369 gpertea 166
2370 gpertea 154 FILE* deletions_file = fopen(deletions_file_name.c_str(), "w");
2371     if (deletions_file == NULL)
2372     {
2373     fprintf(stderr, "Error: cannot open VCF file %s for writing\n",
2374     deletions_file_name.c_str());
2375     exit(1);
2376     }
2377 gpertea 166
2378 gpertea 154 FILE* fusions_file = NULL;
2379     if (fusion_search)
2380     {
2381     fusions_file = fopen(fusions_file_name.c_str(), "w");
2382     if (fusions_file == NULL)
2383 gpertea 29 {
2384 gpertea 154 fprintf(stderr, "Error: cannot open VCF file %s for writing\n",
2385     fusions_file_name.c_str());
2386     exit(1);
2387 gpertea 29 }
2388 gpertea 154 }
2389 gpertea 166
2390 gpertea 154 driver(accepted_hits_file_name,
2391     ref_stream,
2392     left_map_filename,
2393     left_reads_filename,
2394     right_map_filename,
2395     right_reads_filename,
2396     junctions_file,
2397     insertions_file,
2398     deletions_file,
2399     fusions_file);
2400 gpertea 166
2401 gpertea 154 return 0;
2402 gpertea 29 }