ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/reads.h
Revision: 200
Committed: Fri Mar 9 22:25:25 2012 UTC (12 years, 5 months ago) by gpertea
File size: 5578 byte(s)
Log Message:
Line User Rev File contents
1 gpertea 29 #ifndef READS_H
2     #define READS_H
3     /*
4     * reads.h
5     * TopHat
6     *
7     * Created by Cole Trapnell on 9/2/08.
8     * Copyright 2008 Cole Trapnell. All rights reserved.
9     *
10     */
11    
12     #include <string>
13     #include <sstream>
14 gpertea 154 #include <queue>
15     #include <limits>
16 gpertea 29 #include <seqan/sequence.h>
17     #include "common.h"
18    
19 gpertea 154
20 gpertea 29 using std::string;
21    
22    
23     // Note: qualities are not currently used by TopHat
24     struct Read
25     {
26     Read()
27     {
28 gpertea 200 //seq.reserve(MAX_READ_LEN);
29     //qual.reserve(MAX_READ_LEN);
30 gpertea 29 }
31    
32     string name;
33     string seq;
34     string alt_name;
35     string qual;
36    
37     bool lengths_equal() { return seq.length() == qual.length(); }
38     void clear()
39     {
40     name.clear();
41     seq.clear();
42     qual.clear();
43     alt_name.clear();
44     }
45     };
46    
47     void reverse_complement(string& seq);
48     string convert_color_to_bp(const string& color);
49     seqan::String<char> convert_color_to_bp(char base, const seqan::String<char>& color);
50    
51     string convert_bp_to_color(const string& bp, bool remove_primer = false);
52     seqan::String<char> convert_bp_to_color(const seqan::String<char>& bp, bool remove_primer = false);
53    
54     /*
55     This is a dynamic programming to decode a colorspace read, which is from BWA paper.
56    
57     Heng Li and Richard Durbin
58     Fast and accurate short read alignment with Burrows-Wheeler transform
59     */
60     void BWA_decode(const string& color, const string& qual, const string& ref, string& decode);
61    
62    
63     template <class Type>
64     string DnaString_to_string(const Type& dnaString)
65     {
66     std::string result;
67     std::stringstream ss(std::stringstream::in | std::stringstream::out);
68     ss << dnaString >> result;
69     return result;
70     }
71    
72     class ReadTable;
73    
74     class FLineReader { //simple text line reader class, buffering last line read
75     int len;
76     int allocated;
77     char* buf;
78     bool isEOF;
79     FILE* file;
80     bool is_pipe;
81     bool pushed; //pushed back
82     int lcount; //counting all lines read by the object
83 gpertea 154
84     public:
85     // daehwan - this is not a good place to store the last read ...
86     Read last_read;
87     bool pushed_read;
88    
89 gpertea 29 public:
90     char* chars() { return buf; }
91     char* line() { return buf; }
92     int readcount() { return lcount; } //number of lines read
93     int length() { return len; } //length of the last line read
94     bool isEof() {return isEOF; }
95     char* nextLine();
96     FILE* fhandle() { return file; }
97     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
98     // so the next call will in fact return the same line
99 gpertea 154 void pushBack_read() { if(!last_read.name.empty()) pushed_read=true;}
100 gpertea 29 FLineReader(FILE* stream=NULL) {
101     len=0;
102     isEOF=false;
103     is_pipe=false;
104     allocated=512;
105     buf=(char*)malloc(allocated);
106     lcount=0;
107     buf[0]=0;
108     file=stream;
109     pushed=false;
110 gpertea 154 pushed_read=false;
111 gpertea 29 }
112     FLineReader(FZPipe& fzpipe) {
113     len=0;
114     isEOF=false;
115     allocated=512;
116     buf=(char*)malloc(allocated);
117     lcount=0;
118     buf[0]=0;
119     file=fzpipe.file;
120     is_pipe=!fzpipe.pipecmd.empty();
121     pushed=false;
122 gpertea 154 pushed_read=false;
123 gpertea 29 }
124     void close() {
125     if (file==NULL) return;
126     if (is_pipe) pclose(file);
127     else fclose(file);
128     }
129 gpertea 154
130 gpertea 29 ~FLineReader() {
131     free(buf); //does not call close() -- we might reuse the file handle
132     }
133     };
134    
135 gpertea 154 bool get_read_from_stream(uint64_t insert_id,
136     FLineReader& fr,
137     ReadFormat reads_format,
138     bool strip_slash,
139     Read& read,
140     FILE* um_out=NULL, //unmapped reads output
141     bool um_write_found=false);
142 gpertea 135
143 gpertea 29 void skip_lines(FLineReader& fr);
144     bool next_fasta_record(FLineReader& fr, string& defline, string& seq, ReadFormat reads_format);
145     bool next_fastq_record(FLineReader& fr, const string& seq, string& alt_name, string& qual, ReadFormat reads_format);
146 gpertea 135 bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format=FASTQ,
147 gpertea 29 FLineReader* frq=NULL);
148    
149 gpertea 135 class ReadStream {
150     protected:
151     struct ReadOrdering
152     {
153     bool operator()(std::pair<uint64_t, Read>& lhs, std::pair<uint64_t, Read>& rhs)
154     {
155     return (lhs.first > rhs.first);
156     }
157     };
158     FZPipe fstream;
159     std::priority_queue< std::pair<uint64_t, Read>,
160     std::vector<std::pair<uint64_t, Read> >,
161     ReadOrdering > read_pq;
162     uint64_t last_id; //keep track of last requested ID, for consistency check
163     bool r_eof;
164     bool next_read(Read& read, ReadFormat read_format); //get top read from the queue
165    
166     public:
167     ReadStream():fstream(), read_pq(), last_id(0), r_eof(false) { }
168    
169 gpertea 154 ReadStream(const string& fname):fstream(fname, false),
170 gpertea 135 read_pq(), last_id(0), r_eof(false) { }
171    
172     void init(string& fname) {
173     fstream.openRead(fname, false);
174     }
175     const char* filename() {
176     return fstream.filename.c_str();
177     }
178     //read_ids must ALWAYS be requested in increasing order
179     bool getRead(uint64_t read_id, Read& read,
180 gpertea 154 ReadFormat read_format=FASTQ,
181     bool strip_slash=false,
182 gpertea 159 uint64_t begin_id = 0,
183     uint64_t end_id=std::numeric_limits<uint64_t>::max(),
184 gpertea 154 FILE* um_out=NULL, //unmapped reads output
185 gpertea 159 bool um_write_found=false
186     );
187 gpertea 135
188     void rewind() {
189     fstream.rewind();
190     clear();
191     }
192 gpertea 154 void seek(int64_t offset) {
193     clear();
194     fstream.seek(offset);
195     }
196 gpertea 135 FILE* file() {
197     return fstream.file;
198     }
199     void clear() {
200     /* while (read_pq.size()) {
201     const std::pair<uint64_t, Read>& t = read_pq.top();
202     //free(t.second);
203     read_pq.pop();
204     } */
205     read_pq=std::priority_queue< std::pair<uint64_t, Read>,
206     std::vector<std::pair<uint64_t, Read> >,
207     ReadOrdering > ();
208     }
209     void close() {
210     clear();
211     fstream.close();
212     }
213     ~ReadStream() {
214     close();
215     }
216     };
217 gpertea 29 #endif