ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/reads.h
Revision: 154
Committed: Tue Jan 24 02:29:21 2012 UTC (12 years, 7 months ago) by gpertea
File size: 5604 byte(s)
Log Message:
massive update with Daehwan's work

Line File contents
1 #ifndef READS_H
2 #define READS_H
3 /*
4 * reads.h
5 * TopHat
6 *
7 * Created by Cole Trapnell on 9/2/08.
8 * Copyright 2008 Cole Trapnell. All rights reserved.
9 *
10 */
11
12 #include <string>
13 #include <sstream>
14 #include <queue>
15 #include <limits>
16 #include <seqan/sequence.h>
17 #include "common.h"
18
19
20 using std::string;
21
22 static const int max_read_bp = 256;
23
24 // Note: qualities are not currently used by TopHat
25 struct Read
26 {
27 Read()
28 {
29 seq.reserve(max_read_bp);
30 qual.reserve(max_read_bp);
31 }
32
33 string name;
34 string seq;
35 string alt_name;
36 string qual;
37
38 bool lengths_equal() { return seq.length() == qual.length(); }
39 void clear()
40 {
41 name.clear();
42 seq.clear();
43 qual.clear();
44 alt_name.clear();
45 }
46 };
47
48 void reverse_complement(string& seq);
49 string convert_color_to_bp(const string& color);
50 seqan::String<char> convert_color_to_bp(char base, const seqan::String<char>& color);
51
52 string convert_bp_to_color(const string& bp, bool remove_primer = false);
53 seqan::String<char> convert_bp_to_color(const seqan::String<char>& bp, bool remove_primer = false);
54
55 /*
56 This is a dynamic programming to decode a colorspace read, which is from BWA paper.
57
58 Heng Li and Richard Durbin
59 Fast and accurate short read alignment with Burrows-Wheeler transform
60 */
61 void BWA_decode(const string& color, const string& qual, const string& ref, string& decode);
62
63
64 template <class Type>
65 string DnaString_to_string(const Type& dnaString)
66 {
67 std::string result;
68 std::stringstream ss(std::stringstream::in | std::stringstream::out);
69 ss << dnaString >> result;
70 return result;
71 }
72
73 class ReadTable;
74
75 class FLineReader { //simple text line reader class, buffering last line read
76 int len;
77 int allocated;
78 char* buf;
79 bool isEOF;
80 FILE* file;
81 bool is_pipe;
82 bool pushed; //pushed back
83 int lcount; //counting all lines read by the object
84
85 public:
86 // daehwan - this is not a good place to store the last read ...
87 Read last_read;
88 bool pushed_read;
89
90 public:
91 char* chars() { return buf; }
92 char* line() { return buf; }
93 int readcount() { return lcount; } //number of lines read
94 int length() { return len; } //length of the last line read
95 bool isEof() {return isEOF; }
96 char* nextLine();
97 FILE* fhandle() { return file; }
98 void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
99 // so the next call will in fact return the same line
100 void pushBack_read() { if(!last_read.name.empty()) pushed_read=true;}
101 FLineReader(FILE* stream=NULL) {
102 len=0;
103 isEOF=false;
104 is_pipe=false;
105 allocated=512;
106 buf=(char*)malloc(allocated);
107 lcount=0;
108 buf[0]=0;
109 file=stream;
110 pushed=false;
111 pushed_read=false;
112 }
113 FLineReader(FZPipe& fzpipe) {
114 len=0;
115 isEOF=false;
116 allocated=512;
117 buf=(char*)malloc(allocated);
118 lcount=0;
119 buf[0]=0;
120 file=fzpipe.file;
121 is_pipe=!fzpipe.pipecmd.empty();
122 pushed=false;
123 pushed_read=false;
124 }
125 void close() {
126 if (file==NULL) return;
127 if (is_pipe) pclose(file);
128 else fclose(file);
129 }
130
131 ~FLineReader() {
132 free(buf); //does not call close() -- we might reuse the file handle
133 }
134 };
135
136 bool get_read_from_stream(uint64_t insert_id,
137 FLineReader& fr,
138 ReadFormat reads_format,
139 bool strip_slash,
140 Read& read,
141 FILE* um_out=NULL, //unmapped reads output
142 bool um_write_found=false);
143
144 void skip_lines(FLineReader& fr);
145 bool next_fasta_record(FLineReader& fr, string& defline, string& seq, ReadFormat reads_format);
146 bool next_fastq_record(FLineReader& fr, const string& seq, string& alt_name, string& qual, ReadFormat reads_format);
147 bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format=FASTQ,
148 FLineReader* frq=NULL);
149
150 class ReadStream {
151 protected:
152 struct ReadOrdering
153 {
154 bool operator()(std::pair<uint64_t, Read>& lhs, std::pair<uint64_t, Read>& rhs)
155 {
156 return (lhs.first > rhs.first);
157 }
158 };
159 FZPipe fstream;
160 std::priority_queue< std::pair<uint64_t, Read>,
161 std::vector<std::pair<uint64_t, Read> >,
162 ReadOrdering > read_pq;
163 uint64_t last_id; //keep track of last requested ID, for consistency check
164 bool r_eof;
165 bool next_read(Read& read, ReadFormat read_format); //get top read from the queue
166
167 public:
168 ReadStream():fstream(), read_pq(), last_id(0), r_eof(false) { }
169
170 ReadStream(const string& fname):fstream(fname, false),
171 read_pq(), last_id(0), r_eof(false) { }
172
173 void init(string& fname) {
174 fstream.openRead(fname, false);
175 }
176 const char* filename() {
177 return fstream.filename.c_str();
178 }
179 //read_ids must ALWAYS be requested in increasing order
180 bool getRead(uint64_t read_id, Read& read,
181 ReadFormat read_format=FASTQ,
182 bool strip_slash=false,
183 FILE* um_out=NULL, //unmapped reads output
184 bool um_write_found=false,
185 uint64_t begin_id = 0,
186 uint64_t end_id=std::numeric_limits<uint64_t>::max());
187
188 void rewind() {
189 fstream.rewind();
190 clear();
191 }
192 void seek(int64_t offset) {
193 clear();
194 fstream.seek(offset);
195 }
196 FILE* file() {
197 return fstream.file;
198 }
199 void clear() {
200 /* while (read_pq.size()) {
201 const std::pair<uint64_t, Read>& t = read_pq.top();
202 //free(t.second);
203 read_pq.pop();
204 } */
205 read_pq=std::priority_queue< std::pair<uint64_t, Read>,
206 std::vector<std::pair<uint64_t, Read> >,
207 ReadOrdering > ();
208 }
209 void close() {
210 clear();
211 fstream.close();
212 }
213 ~ReadStream() {
214 close();
215 }
216 };
217 #endif