gclib/tophat_cpp/prep_reads.cpp

/*
 *  polyA_reads.cpp
 *  TopHat
 *
 *  Created by Cole Trapnell on 9/2/08.
 *  Copyright 2008 Cole Trapnell. All rights reserved.
 *      Derived from maq "catfilter", by Heng Li at Sanger
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <cassert>
#include <vector>
#include <cstring>
#include <cstdlib>

#include "common.h"
#include "reads.h"
#include "tokenize.h"
#include "qual.h"

//bool fastq_db = true;

using namespace std;

void format_qual_string(string& qual_str)
{
  for (size_t i = 0; i < qual_str.size(); ++i)
    {
      qual_str[i] = charToPhred33(qual_str[i], 
                                      solexa_quals, 
                                      phred64_quals);
    }
}

bool readmap_loaded=false;
vector<bool> readmap; //for filter_multihits

void load_readmap() {
  readmap_loaded=false;
  if (flt_reads.empty()) return;
  //FZPipe(filter_multihits, false);
  FILE* rfile=fopen(flt_reads.c_str(),"r");
  if (rfile==NULL)
     err_die("Error: cannot read file %s\n", flt_reads.c_str());
  FLineReader fr(rfile);
  skip_lines(fr); //should not be needed, bowtie doesn't add extra header lines for this
  Read read;
  while (next_fastx_read(fr, read, reads_format)) {
    uint32_t rnum=(uint32_t)atol(read.name.c_str());
    if (rnum>=(uint32_t) readmap.size())
         readmap.resize(rnum+1, false);
    readmap[rnum] = true;
    }
  readmap_loaded=true;
  fclose(rfile);
}

bool check_readmap(uint32_t read_id) {
 if (read_id>=readmap.size()) return false;
    else return readmap[read_id];
}
void flt_reads_and_hits(vector<FZPipe>& reads_files) {
  if (!readmap_loaded)
      err_die("Error: filtering reads not enabled, aborting.");
  if (aux_outfile.empty())
       err_die("Error: auxiliary output file not provided.");
  if (index_outfile.empty())
    err_die("Error: index output file not provided.");
  
  // -- filter mappings:
  string fext=getFext(flt_mappings);
  if (fext=="bam") {
    //TODO
    //WRITEME: use samtools C API to write a quick bam filter
    err_die("Error: cannot handle BAM files yet![WRITEME]");
    return;
    }
  bool is_sam=false;
  string s(flt_mappings);
  for(size_t i=0; i!=s.length(); ++i)
          s[i] = std::tolower(s[i]);
  if (fext=="sam" || s.rfind(".sam.")+fext.length()+5==s.length()) is_sam=true;
  string unzcmd=getUnpackCmd(flt_mappings);
  FZPipe hitsfile(flt_mappings, unzcmd);
  FLineReader fh(hitsfile);
  FZPipe outfile;
  outfile.openWrite(aux_outfile.c_str(), zpacker);
  if (outfile.file==NULL) err_die("Error: cannot create file %s", aux_outfile.c_str());
  const char* line;
  while ((line=fh.nextLine())) {
    if (is_sam && line[0]=='@') {
        fprintf(outfile.file, "%s\n", line);
        continue;
        }
    char* tab=strchr((char*)line, '\t');
    if (tab==NULL)
        err_die("Error: cannot find tab character in %s mappings line:\n%s",
            flt_mappings.c_str(),line);
    *tab=0;
    uint32_t rid = (uint32_t) atol(line);
    if (rid==0)
      err_die("Error: invalid read ID (%s) parsed from mapping file %s",
            line, flt_mappings.c_str());
    *tab='\t';
    if (check_readmap(rid)) {
       fprintf(outfile.file, "%s\n", line);
       }
    }
  outfile.close();
  hitsfile.close();
  // -- now filter reads
  for (size_t fi = 0; fi < reads_files.size(); ++fi) {
      //only one file expected here, this is not the initial prep_reads
      Read read;
      FLineReader fr(reads_files[fi]);
      //skip_lines(fr);
      while (next_fastx_read(fr, read)) {
        uint32_t rnum=(uint32_t)atol(read.name.c_str());
        if (check_readmap(rnum))
          printf("@%s\n%s\n+%s\n%s\n",
             read.name.c_str(),
             read.seq.c_str(),
             read.alt_name.c_str(),
             read.qual.c_str());
        }
    }
}


void process_reads(vector<FZPipe>& reads_files, vector<FZPipe>& quals_files)
{       
   //TODO: add the option to write the garbage reads into separate file(s)
  int num_reads_chucked = 0;
  int multimap_chucked = 0;
  int min_read_len = 20000000;
  int max_read_len = 0;
  uint32_t next_id = 0;
  uint64_t file_offset = 0;
  FILE* fw=NULL;
  if (!aux_outfile.empty()) {
    fw=fopen(aux_outfile.c_str(), "w");
    if (fw==NULL)
       err_die("Error: cannot create file %s\n",aux_outfile.c_str());
    }

  FILE* findex = NULL;
  if (!index_outfile.empty()) {
    findex = fopen(index_outfile.c_str(), "w");
    if (findex == NULL)
       err_die("Error: cannot create file %s\n", index_outfile.c_str());
    }

  for (size_t fi = 0; fi < reads_files.size(); ++fi)
    {
      Read read;
      FLineReader fr(reads_files[fi]);
      skip_lines(fr);
      FZPipe fq;
      if (quals)
         fq = quals_files[fi];

      FLineReader frq(fq);
      skip_lines(frq);
      
      while (!fr.isEof()) {
            //read.clear();
            // Get the next read from the file
        if (!next_fastx_read(fr, read, reads_format, ((quals) ? &frq : NULL)))
            break;

        ++next_id;
        //IMPORTANT: to keep paired reads in sync, this must be
        //incremented BEFORE any reads are chucked !

        if (read.seq.length()<12) {
            ++num_reads_chucked;
            continue;
            }
        if ((int)read.seq.length()<min_read_len)
             min_read_len=read.seq.length();
        if ((int)read.seq.length()>max_read_len)
             max_read_len=read.seq.length();

        // daehwan - check this later, it's due to bowtie
        if (color && read.seq[1] == '4') {
          ++num_reads_chucked;
          continue;
          }

        if (readmap_loaded && check_readmap(next_id)) {
          ++num_reads_chucked;
          ++multimap_chucked;
          continue;
          }
        format_qual_string(read.qual);
        std::transform(read.seq.begin(), read.seq.end(), read.seq.begin(), ::toupper);
        char counts[256];
        memset(counts, 0, sizeof(counts));
        // Count up the bad characters
        for (unsigned int i = 0; i != read.seq.length(); ++i)
          {
            char c = (char)toupper(read.seq[i]);
            counts[(size_t)c]++;
          }

        double percent_A = (double)(counts[(size_t)'A']) / read.seq.length();
        double percent_C = (double)(counts[(size_t)'C']) / read.seq.length();
        double percent_G = (double)(counts[(size_t)'G']) / read.seq.length();
        double percent_T = (double)(counts[(size_t)'T']) / read.seq.length();
        double percent_N = (double)(counts[(size_t)'N']) / read.seq.length();
        double percent_4 = (double)(counts[(size_t)'4']) / read.seq.length();

        // Chuck the read if there are at least 5 'N's or if it's mostly
        // (>90%) 'N's and 'A's

        if (percent_A > 0.9 ||
            percent_C > 0.9 ||
            percent_G > 0.9 ||
            percent_T > 0.9 ||
            percent_N >= 0.1 ||
            percent_4 >= 0.1)
          {
            ++num_reads_chucked;
          }
        else
          {
            // daehwan - we should not use buf in printf function
            // because it may contain some control characters such as "\" from quality values.
            // Here, buf is only used for calculating the file offset
            char buf[2048] = {0};
            if (reads_format == FASTQ or (reads_format == FASTA && quals))
              {
                sprintf(buf, "@%u\n%s\n+%s\n%s\n",
                        next_id,
                        read.seq.c_str(),
                        read.name.c_str(),
                        read.qual.c_str());

                printf("@%u\n%s\n+%s\n%s\n",
                       next_id,
                       read.seq.c_str(),
                       read.name.c_str(),
                       read.qual.c_str());
              }
            else if (reads_format == FASTA)
              {
                string qual;
                if (color)
                  qual = string(read.seq.length()-1, 'I').c_str();
                else
                  qual = string(read.seq.length(), 'I').c_str();

                sprintf(buf, "@%u\n%s\n+%s\n%s\n",
                        next_id,
                        read.seq.c_str(),
                        read.name.c_str(),
                        qual.c_str());

                printf("@%u\n%s\n+%s\n%s\n",
                        next_id,
                        read.seq.c_str(),
                        read.name.c_str(),
                        qual.c_str());
              }
            else
              {
                assert(0);
              }

            if (findex != NULL)
              {
                if ((next_id - num_reads_chucked) % 1000 == 0)
                  fprintf(findex, "%d\t%lu\n", next_id, file_offset);
              }

            file_offset += strlen(buf);
          }
      } //while !fr.isEof()
    fr.close();
    frq.close();
    } //for each input file
  fprintf(stderr, "%u out of %u reads have been filtered out\n",
          num_reads_chucked, next_id);
  if (readmap_loaded)
    fprintf(stderr, "\t(%u filtered out due to %s)\n",
        multimap_chucked, flt_reads.c_str());
  if (fw!=NULL) {
    fprintf(fw, "min_read_len=%d\n",min_read_len - (color ? 1 : 0));
    fprintf(fw, "max_read_len=%d\n",max_read_len - (color ? 1 : 0));
    fprintf(fw, "reads_in =%d\n",next_id);
    fprintf(fw, "reads_out=%d\n",next_id-num_reads_chucked);
    fclose(fw);
    }

  if (findex != NULL)
    fclose(findex);
}

void print_usage()
{
  fprintf(stderr, "Usage:\n prep_reads [--filter-multi <multi.fq>] <reads1.fa/fq,...,readsN.fa/fq>\n");
  //alternate usage (--ctv_to_num) : doesn't filter out any reads,
  //but simply convert read names to numeric ids
}


int main(int argc, char *argv[])
{
  fprintf(stderr, "prep_reads v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION); 
  fprintf(stderr, "---------------------------\n");
  
  int parse_ret = parse_options(argc, argv, print_usage);
  if (parse_ret)
    return parse_ret;
  
  if(optind >= argc)
    {
      print_usage();
      return 1;
    }
  
  string reads_file_list = argv[optind++];
  
  vector<string> reads_file_names;
  vector<FZPipe> reads_files;
  tokenize(reads_file_list, ",",reads_file_names);
  for (size_t i = 0; i < reads_file_names.size(); ++i)
    {
      string fname=reads_file_names[i];
      string pipecmd=guess_packer(fname, true);
      if (!pipecmd.empty()) pipecmd.append(" -cd");
      FZPipe seg_file(fname,pipecmd);
      if (seg_file.file == NULL) {
          fprintf(stderr, "Error: cannot open reads file %s for reading\n",
              reads_file_names[i].c_str());
              exit(1);
          }
      reads_files.push_back(seg_file);
    }
  
  vector<string> quals_file_names;
  vector<FZPipe> quals_files;
  if (quals)
    {
      string quals_file_list = argv[optind++];
      tokenize(quals_file_list, ",", quals_file_names);
      for (size_t i = 0; i < quals_file_names.size(); ++i)
        {
      string fname(quals_file_names[i]);
      string pipecmd=guess_packer(fname, true);
      FZPipe seg_file(fname, pipecmd);
          if (seg_file.file == NULL)
            {
              fprintf(stderr, "Error: cannot open reads file %s for reading\n",
                      quals_file_names[i].c_str());
              exit(1);
              }
          quals_files.push_back(seg_file);
        }
    }
  load_readmap();
  // Only print to standard out the good reads
  //TODO: a better, more generic read filtering protocol
  if (flt_mappings.empty())
     process_reads(reads_files, quals_files);
  else //special use case: filter previous bowtie results
     flt_reads_and_hits(reads_files);
    
  return 0;
}
Revision:	206
Committed:	Mon Mar 12 18:02:35 2012 UTC (12 years, 5 months ago) by gpertea
File size:	10965 byte(s)
Log Message:	updated
Line	File contents
1	/*
2	* polyA_reads.cpp
3	* TopHat
4	*
5	* Created by Cole Trapnell on 9/2/08.
6	* Copyright 2008 Cole Trapnell. All rights reserved.
7	* Derived from maq "catfilter", by Heng Li at Sanger
8	*/
9
10	#ifdef HAVE_CONFIG_H
11	#include <config.h>
12	#endif
13
14	#include <stdio.h>
15	#include <cassert>
16	#include <vector>
17	#include <cstring>
18	#include <cstdlib>
19
20	#include "common.h"
21	#include "reads.h"
22	#include "tokenize.h"
23	#include "qual.h"
24
25	//bool fastq_db = true;
26
27	using namespace std;
28
29	void format_qual_string(string& qual_str)
30	{
31	for (size_t i = 0; i < qual_str.size(); ++i)
32	{
33	qual_str[i] = charToPhred33(qual_str[i],
34	solexa_quals,
35	phred64_quals);
36	}
37	}
38
39	bool readmap_loaded=false;
40	vector<bool> readmap; //for filter_multihits
41
42	void load_readmap() {
43	readmap_loaded=false;
44	if (flt_reads.empty()) return;
45	//FZPipe(filter_multihits, false);
46	FILE* rfile=fopen(flt_reads.c_str(),"r");
47	if (rfile==NULL)
48	err_die("Error: cannot read file %s\n", flt_reads.c_str());
49	FLineReader fr(rfile);
50	skip_lines(fr); //should not be needed, bowtie doesn't add extra header lines for this
51	Read read;
52	while (next_fastx_read(fr, read, reads_format)) {
53	uint32_t rnum=(uint32_t)atol(read.name.c_str());
54	if (rnum>=(uint32_t) readmap.size())
55	readmap.resize(rnum+1, false);
56	readmap[rnum] = true;
57	}
58	readmap_loaded=true;
59	fclose(rfile);
60	}
61
62	bool check_readmap(uint32_t read_id) {
63	if (read_id>=readmap.size()) return false;
64	else return readmap[read_id];
65	}
66	void flt_reads_and_hits(vector<FZPipe>& reads_files) {
67	if (!readmap_loaded)
68	err_die("Error: filtering reads not enabled, aborting.");
69	if (aux_outfile.empty())
70	err_die("Error: auxiliary output file not provided.");
71	if (index_outfile.empty())
72	err_die("Error: index output file not provided.");
73
74	// -- filter mappings:
75	string fext=getFext(flt_mappings);
76	if (fext=="bam") {
77	//TODO
78	//WRITEME: use samtools C API to write a quick bam filter
79	err_die("Error: cannot handle BAM files yet![WRITEME]");
80	return;
81	}
82	bool is_sam=false;
83	string s(flt_mappings);
84	for(size_t i=0; i!=s.length(); ++i)
85	s[i] = std::tolower(s[i]);
86	if (fext=="sam" \|\| s.rfind(".sam.")+fext.length()+5==s.length()) is_sam=true;
87	string unzcmd=getUnpackCmd(flt_mappings);
88	FZPipe hitsfile(flt_mappings, unzcmd);
89	FLineReader fh(hitsfile);
90	FZPipe outfile;
91	outfile.openWrite(aux_outfile.c_str(), zpacker);
92	if (outfile.file==NULL) err_die("Error: cannot create file %s", aux_outfile.c_str());
93	const char* line;
94	while ((line=fh.nextLine())) {
95	if (is_sam && line[0]=='@') {
96	fprintf(outfile.file, "%s\n", line);
97	continue;
98	}
99	char* tab=strchr((char*)line, '\t');
100	if (tab==NULL)
101	err_die("Error: cannot find tab character in %s mappings line:\n%s",
102	flt_mappings.c_str(),line);
103	*tab=0;
104	uint32_t rid = (uint32_t) atol(line);
105	if (rid==0)
106	err_die("Error: invalid read ID (%s) parsed from mapping file %s",
107	line, flt_mappings.c_str());
108	*tab='\t';
109	if (check_readmap(rid)) {
110	fprintf(outfile.file, "%s\n", line);
111	}
112	}
113	outfile.close();
114	hitsfile.close();
115	// -- now filter reads
116	for (size_t fi = 0; fi < reads_files.size(); ++fi) {
117	//only one file expected here, this is not the initial prep_reads
118	Read read;
119	FLineReader fr(reads_files[fi]);
120	//skip_lines(fr);
121	while (next_fastx_read(fr, read)) {
122	uint32_t rnum=(uint32_t)atol(read.name.c_str());
123	if (check_readmap(rnum))
124	printf("@%s\n%s\n+%s\n%s\n",
125	read.name.c_str(),
126	read.seq.c_str(),
127	read.alt_name.c_str(),
128	read.qual.c_str());
129	}
130	}
131	}
132
133
134	void process_reads(vector<FZPipe>& reads_files, vector<FZPipe>& quals_files)
135	{
136	//TODO: add the option to write the garbage reads into separate file(s)
137	int num_reads_chucked = 0;
138	int multimap_chucked = 0;
139	int min_read_len = 20000000;
140	int max_read_len = 0;
141	uint32_t next_id = 0;
142	uint64_t file_offset = 0;
143	FILE* fw=NULL;
144	if (!aux_outfile.empty()) {
145	fw=fopen(aux_outfile.c_str(), "w");
146	if (fw==NULL)
147	err_die("Error: cannot create file %s\n",aux_outfile.c_str());
148	}
149
150	FILE* findex = NULL;
151	if (!index_outfile.empty()) {
152	findex = fopen(index_outfile.c_str(), "w");
153	if (findex == NULL)
154	err_die("Error: cannot create file %s\n", index_outfile.c_str());
155	}
156
157	for (size_t fi = 0; fi < reads_files.size(); ++fi)
158	{
159	Read read;
160	FLineReader fr(reads_files[fi]);
161	skip_lines(fr);
162	FZPipe fq;
163	if (quals)
164	fq = quals_files[fi];
165
166	FLineReader frq(fq);
167	skip_lines(frq);
168
169	while (!fr.isEof()) {
170	//read.clear();
171	// Get the next read from the file
172	if (!next_fastx_read(fr, read, reads_format, ((quals) ? &frq : NULL)))
173	break;
174
175	++next_id;
176	//IMPORTANT: to keep paired reads in sync, this must be
177	//incremented BEFORE any reads are chucked !
178
179	if (read.seq.length()<12) {
180	++num_reads_chucked;
181	continue;
182	}
183	if ((int)read.seq.length()<min_read_len)
184	min_read_len=read.seq.length();
185	if ((int)read.seq.length()>max_read_len)
186	max_read_len=read.seq.length();
187
188	// daehwan - check this later, it's due to bowtie
189	if (color && read.seq[1] == '4') {
190	++num_reads_chucked;
191	continue;
192	}
193
194	if (readmap_loaded && check_readmap(next_id)) {
195	++num_reads_chucked;
196	++multimap_chucked;
197	continue;
198	}
199	format_qual_string(read.qual);
200	std::transform(read.seq.begin(), read.seq.end(), read.seq.begin(), ::toupper);
201	char counts[256];
202	memset(counts, 0, sizeof(counts));
203	// Count up the bad characters
204	for (unsigned int i = 0; i != read.seq.length(); ++i)
205	{
206	char c = (char)toupper(read.seq[i]);
207	counts[(size_t)c]++;
208	}
209
210	double percent_A = (double)(counts[(size_t)'A']) / read.seq.length();
211	double percent_C = (double)(counts[(size_t)'C']) / read.seq.length();
212	double percent_G = (double)(counts[(size_t)'G']) / read.seq.length();
213	double percent_T = (double)(counts[(size_t)'T']) / read.seq.length();
214	double percent_N = (double)(counts[(size_t)'N']) / read.seq.length();
215	double percent_4 = (double)(counts[(size_t)'4']) / read.seq.length();
216
217	// Chuck the read if there are at least 5 'N's or if it's mostly
218	// (>90%) 'N's and 'A's
219
220	if (percent_A > 0.9 \|\|
221	percent_C > 0.9 \|\|
222	percent_G > 0.9 \|\|
223	percent_T > 0.9 \|\|
224	percent_N >= 0.1 \|\|
225	percent_4 >= 0.1)
226	{
227	++num_reads_chucked;
228	}
229	else
230	{
231	// daehwan - we should not use buf in printf function
232	// because it may contain some control characters such as "\" from quality values.
233	// Here, buf is only used for calculating the file offset
234	char buf[2048] = {0};
235	if (reads_format == FASTQ or (reads_format == FASTA && quals))
236	{
237	sprintf(buf, "@%u\n%s\n+%s\n%s\n",
238	next_id,
239	read.seq.c_str(),
240	read.name.c_str(),
241	read.qual.c_str());
242
243	printf("@%u\n%s\n+%s\n%s\n",
244	next_id,
245	read.seq.c_str(),
246	read.name.c_str(),
247	read.qual.c_str());
248	}
249	else if (reads_format == FASTA)
250	{
251	string qual;
252	if (color)
253	qual = string(read.seq.length()-1, 'I').c_str();
254	else
255	qual = string(read.seq.length(), 'I').c_str();
256
257	sprintf(buf, "@%u\n%s\n+%s\n%s\n",
258	next_id,
259	read.seq.c_str(),
260	read.name.c_str(),
261	qual.c_str());
262
263	printf("@%u\n%s\n+%s\n%s\n",
264	next_id,
265	read.seq.c_str(),
266	read.name.c_str(),
267	qual.c_str());
268	}
269	else
270	{
271	assert(0);
272	}
273
274	if (findex != NULL)
275	{
276	if ((next_id - num_reads_chucked) % 1000 == 0)
277	fprintf(findex, "%d\t%lu\n", next_id, file_offset);
278	}
279
280	file_offset += strlen(buf);
281	}
282	} //while !fr.isEof()
283	fr.close();
284	frq.close();
285	} //for each input file
286	fprintf(stderr, "%u out of %u reads have been filtered out\n",
287	num_reads_chucked, next_id);
288	if (readmap_loaded)
289	fprintf(stderr, "\t(%u filtered out due to %s)\n",
290	multimap_chucked, flt_reads.c_str());
291	if (fw!=NULL) {
292	fprintf(fw, "min_read_len=%d\n",min_read_len - (color ? 1 : 0));
293	fprintf(fw, "max_read_len=%d\n",max_read_len - (color ? 1 : 0));
294	fprintf(fw, "reads_in =%d\n",next_id);
295	fprintf(fw, "reads_out=%d\n",next_id-num_reads_chucked);
296	fclose(fw);
297	}
298
299	if (findex != NULL)
300	fclose(findex);
301	}
302
303	void print_usage()
304	{
305	fprintf(stderr, "Usage:\n prep_reads [--filter-multi <multi.fq>] <reads1.fa/fq,...,readsN.fa/fq>\n");
306	//alternate usage (--ctv_to_num) : doesn't filter out any reads,
307	//but simply convert read names to numeric ids
308	}
309
310
311	int main(int argc, char *argv[])
312	{
313	fprintf(stderr, "prep_reads v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION);
314	fprintf(stderr, "---------------------------\n");
315
316	int parse_ret = parse_options(argc, argv, print_usage);
317	if (parse_ret)
318	return parse_ret;
319
320	if(optind >= argc)
321	{
322	print_usage();
323	return 1;
324	}
325
326	string reads_file_list = argv[optind++];
327
328	vector<string> reads_file_names;
329	vector<FZPipe> reads_files;
330	tokenize(reads_file_list, ",",reads_file_names);
331	for (size_t i = 0; i < reads_file_names.size(); ++i)
332	{
333	string fname=reads_file_names[i];
334	string pipecmd=guess_packer(fname, true);
335	if (!pipecmd.empty()) pipecmd.append(" -cd");
336	FZPipe seg_file(fname,pipecmd);
337	if (seg_file.file == NULL) {
338	fprintf(stderr, "Error: cannot open reads file %s for reading\n",
339	reads_file_names[i].c_str());
340	exit(1);
341	}
342	reads_files.push_back(seg_file);
343	}
344
345	vector<string> quals_file_names;
346	vector<FZPipe> quals_files;
347	if (quals)
348	{
349	string quals_file_list = argv[optind++];
350	tokenize(quals_file_list, ",", quals_file_names);
351	for (size_t i = 0; i < quals_file_names.size(); ++i)
352	{
353	string fname(quals_file_names[i]);
354	string pipecmd=guess_packer(fname, true);
355	FZPipe seg_file(fname, pipecmd);
356	if (seg_file.file == NULL)
357	{
358	fprintf(stderr, "Error: cannot open reads file %s for reading\n",
359	quals_file_names[i].c_str());
360	exit(1);
361	}
362	quals_files.push_back(seg_file);
363	}
364	}
365	load_readmap();
366	// Only print to standard out the good reads
367	//TODO: a better, more generic read filtering protocol
368	if (flt_mappings.empty())
369	process_reads(reads_files, quals_files);
370	else //special use case: filter previous bowtie results
371	flt_reads_and_hits(reads_files);
372
373	return 0;
374	}