ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bwt2sam.cpp
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (13 years, 1 month ago) by gpertea
File size: 3893 byte(s)
Log Message:
adding tophat source work

Line File contents
1 #ifdef HAVE_CONFIG_H
2 #include <config.h>
3 #endif
4
5 /*
6 * long_spanning_reads.cpp
7 * TopHat
8 *
9 * Created by Cole Trapnell on 2/5/09.
10 * Copyright 2009 Cole Trapnell. All rights reserved.
11 *
12 */
13
14 #include <cassert>
15 #include <cstdio>
16 #include <vector>
17 #include <string>
18 #include <map>
19 #include <algorithm>
20 #include <set>
21 #include <cstdlib>
22 #include <cstring>
23 #include <bitset>
24
25 #include <iostream>
26 #include <fstream>
27 #include <sstream>
28 #include <getopt.h>
29
30 #include "common.h"
31 #include "bwt_map.h"
32 #include "tokenize.h"
33 #include "reads.h"
34
35 using namespace seqan;
36 using namespace std;
37
38 void print_usage()
39 {
40 fprintf(stderr, "Usage: bwt2sam --sam-header <sam_header.sam> <bwt_file>\n");
41 }
42
43 int multi_closure = 0;
44 int anchor_too_short = 0;
45 int gap_too_short = 0;
46 bool from_bowtie=false;
47
48 void print_segment_hits(string fname, HitStream& hitstream,
49 ReadTable& unmapped_reads,
50 RefSequenceTable& rt) {
51 HitsForRead curr_hit_group;
52 fname+=".bam";
53 GBamWriter bam_writer(fname.c_str(), sam_header.c_str());
54 while (true) {
55 hitstream.next_read_hits(curr_hit_group);
56 if (curr_hit_group.hits.size()==0) break;
57 for (size_t i = 0; i < curr_hit_group.hits.size(); i++) {
58 BowtieHit& bh = curr_hit_group.hits[i];
59 const char* ref_name = rt.get_name(bh.ref_id());
60 string read_name(""); //stoopid STL
61 str_appendInt(read_name, bh.insert_id());
62 print_hit(stdout, read_name.c_str(), curr_hit_group.hits[i], ref_name,
63 bh.seq().c_str(),bh.qual().c_str(),
64 hitstream.fromBowtie());
65 print_bamhit(bam_writer, read_name.c_str(), curr_hit_group.hits[i], ref_name,
66 bh.seq().c_str(),bh.qual().c_str(),hitstream.fromBowtie());
67 }
68 }
69 }
70
71
72 //void driver(GBamWriter& bam_writer, vector<FZPipe>& seg_files)
73 //void driver(vector<FZPipe>& seg_files)
74 void driver(string& seg_file_name)
75 {
76
77 RefSequenceTable rt(sam_header, true);
78 //fprintf (stderr, "Loading reference sequences...\n");
79 //get_seqs(ref_stream, rt, true, false);
80 // fprintf (stderr, " reference sequences loaded.\n");
81 ReadTable it;
82
83 bool need_seq = true;
84 bool need_qual = true;
85
86 HitFactory* fac = NULL;
87 string fext=getFext(seg_file_name);
88 from_bowtie=true;
89 if (fext=="sam")
90 fac = new SAMHitFactory(it, rt);
91 else if (fext=="bam")
92 fac = new BAMHitFactory(it, rt);
93 else {
94 fac= new BowtieHitFactory(it, rt);
95 //from_bowtie=true;
96 }
97 //HitStream hs(seg_files[i].file, fac, false, false, false, need_seq, need_qual);
98 HitStream hs(seg_file_name, fac, false, false, false, need_seq, need_qual, from_bowtie);
99
100 print_segment_hits(seg_file_name, hs, it, rt);
101
102 delete fac;
103
104 } //driver
105
106 int main(int argc, char** argv)
107 {
108 fprintf(stderr, "bwt2sam v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION);
109 fprintf(stderr, "--------------------------------------------\n");
110
111 int parse_ret = parse_options(argc, argv, print_usage);
112 if (parse_ret)
113 return parse_ret;
114
115 if(optind >= argc)
116 {
117 print_usage();
118 return 1;
119 }
120
121 string segment_file_name = argv[optind++];
122
123 string unzcmd=getUnpackCmd(segment_file_name,false);
124 /*
125 vector<FZPipe> segment_files;
126 for (size_t i = 0; i < segment_file_names.size(); ++i)
127 {
128 fprintf(stderr, "Opening %s for reading\n",
129 segment_file_names[i].c_str());
130 FZPipe seg_file(segment_file_names[i], unzcmd);
131 if (seg_file.file == NULL)
132 {
133 err_die("Error: cannot open %s for reading\n",
134 segment_file_names[i].c_str());
135 }
136 segment_files.push_back(seg_file);
137 }
138
139 //writeSamHeader(stdout);
140 //GBamWriter bam_writer("-", sam_header.c_str());
141 //driver(bam_writer, ref_stream, juncs_files, insertions_files, deletions_files, spliced_segment_files, segment_files, reads_file);
142 driver(segment_files);
143 */
144 driver(segment_file_name);
145 return 0;
146 }