ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/reads.cpp
(Generate patch)
# Line 120 | Line 120
120      if (fline==NULL) return false;
121      }
122    //must be on '+' line here
123 <  if (fline==NULL || (reads_format == FASTQ && fline[0] != '+') || (reads_format == FASTA && quals && fline[0] != '>')) {
123 >  if (fline==NULL || (reads_format == FASTQ && fline[0] != '+') ||
124 >      (reads_format == FASTA && quals && fline[0] != '>')) {
125       err_exit("Error: '+' not found for fastq record %s\n",fline);
126       return false;
127       }
# Line 131 | Line 132
132    while ((fline=fr.nextLine())!=NULL) {
133      if (integer_quals)
134        {
135 <        vector<string> integer_qual_values;
136 <        tokenize(string(fline), " ", integer_qual_values);
135 >      vector<string> integer_qual_values;
136 >      tokenize(string(fline), " ", integer_qual_values);
137  
138 <        string temp_qual;
139 <        for (size_t i = 0; i < integer_qual_values.size(); ++i)
140 <          {
141 <            int qual_value = atoi(integer_qual_values[i].c_str());
142 <            if (qual_value < 0) qual_value = 0;
143 <            temp_qual.push_back((char)(qual_value + 33));
144 <          }
138 >      string temp_qual;
139 >      for (size_t i = 0; i < integer_qual_values.size(); ++i)
140 >        {
141 >          int qual_value = atoi(integer_qual_values[i].c_str());
142 >          if (qual_value < 0) qual_value = 0;
143 >          temp_qual.push_back((char)(qual_value + 33));
144 >        }
145  
146 <        qual.append(temp_qual);
146 >      qual.append(temp_qual);
147        }
148      else
149        qual.append(fline);
150 <    
150 <    if ((!color && qual.length()>=seq.length()) || (color && qual.length()+1>=seq.length())) break;
150 >      if (qual.length()>=seq.length()-1) break;
151       }
152    // final check
153 <  if ((!color && seq.length()!=qual.length()) || (color && seq.length()!=qual.length()+1)) {
154 <           err_exit("Error: qual length (%d) differs from seq length (%d) for fastq record %s!\n",
153 >  if (color) {
154 >     if (seq.length()==qual.length()) {
155 >        //discard first qv
156 >        qual=qual.substr(1);
157 >        }
158 >     if (seq.length()!=qual.length()+1) {
159 >        err_exit("Error: length of quality string does not match seq length (%d) for color read %s!\n",
160 >           seq.length(), alt_name.c_str());
161 >        }
162 >     }
163 >  else {
164 >    if (seq.length()!=qual.length()) {
165 >           err_exit("Error: qual string length (%d) differs from seq length (%d) for read %s!\n",
166                 qual.length(), seq.length(), alt_name.c_str());
167 <           return false;
167 >           //return false;
168             }
169 +    }
170    //
159
171    return !(qual.empty());
172   }
173  
174   bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format,
175                          FLineReader* frq) {
176 +  if (fr.pushed_read)
177 +    {
178 +      read = fr.last_read;
179 +      fr.pushed_read = false;
180 +      return true;
181 +    }
182 +  
183    read.clear();
184    char* buf=NULL;
185    while ((buf=fr.nextLine())!=NULL) {
# Line 219 | Line 237
237          read.qual.append(temp_qual);
238        }
239      else {
240 <      // if (color && read.qual.length()==0 && buf[0]=='!')
223 <      //   read.qual.append(&(buf[1])); //some color qual strings start with '!' for the adaptor
224 <      //  else
225 <         read.qual.append(buf);
240 >        read.qual.append(buf);
241        }
242 <    if ((!color && read.qual.length()>=read.seq.length())
243 <          || (color && read.qual.length()+1>=read.seq.length())) break;
242 >    if (read.qual.length()>=read.seq.length()-1)
243 >          break;
244      } //while qv lines
245    
246    // final check
247 <  if ((!color && read.seq.length()!=read.qual.length()) || (color && read.seq.length()!=read.qual.length()+1)) {
247 >  if (color) {
248 >     if (read.seq.length()==read.qual.length()) {
249 >        //discard first qv
250 >        read.qual=read.qual.substr(1);
251 >        }
252 >     if (read.seq.length()!=read.qual.length()+1) {
253 >        err_exit("Error: length of quality string does not match sequence length (%d) for color read %s!\n",
254 >            read.seq.length(), read.alt_name.c_str());
255 >        }
256 >     }
257 >  else {
258 >   if (read.seq.length()!=read.qual.length()) {
259             err_exit("Error: qual length (%d) differs from seq length (%d) for fastq record %s!\n",
260                 read.qual.length(), read.seq.length(), read.alt_name.c_str());
261             return false;
262             }
263 <  //
264 <  return !(read.qual.empty());
263 >    }
264 >
265 >  fr.last_read = read;  
266 >  return !(read.seq.empty());
267   }
268  
269   // This could be faster.
# Line 565 | Line 593
593      }
594   }
595  
596 +
597 + bool ReadStream::next_read(Read& r, ReadFormat read_format) {
598 +  FLineReader fr(fstream.file);
599 +  while (read_pq.size()<100000 && !r_eof) {
600 +    //keep the queue topped off
601 +    Read rf;
602 +    if (!next_fastx_read(fr, rf, read_format)) {
603 +        r_eof=true;
604 +        break;
605 +        }
606 +    //Read read=Read(rf);
607 +    uint64_t id = (uint64_t)atol(rf.name.c_str());
608 +    read_pq.push(make_pair(id, rf));
609 +    }
610 +  if (read_pq.size()==0)
611 +     return false;
612 +  const pair<uint64_t, Read>& t = read_pq.top();
613 +  r=t.second; //copy strings
614 +  //free(t.second);
615 +  read_pq.pop();
616 +  return true;
617 + }
618 +
619 + // reads must ALWAYS requested in increasing order of their ID
620 + bool ReadStream::getRead(uint64_t r_id,
621 +                         Read& read,
622 +                         ReadFormat read_format,
623 +                         bool strip_slash,
624 +                         FILE* um_out, //unmapped reads output
625 +                         bool um_write_found,
626 +                         uint64_t begin_id,
627 +                         uint64_t end_id) {
628 +  if (!fstream.file)
629 +       err_die("Error: calling ReadStream::getRead() with no file handle!");
630 +  if (r_id<last_id)
631 +      err_die("Error: ReadStream::getRead() called with out-of-order id#!");
632 +  last_id=r_id;
633 +  bool found=false;
634 +  while (!found) {
635 +    read.clear();
636 +      // Get the next read from the file
637 +    if (!next_read(read, read_format))
638 +        break;
639 +
640 +    if (strip_slash) {
641 +       string::size_type slash = read.name.rfind("/");
642 +       if (slash != string::npos)
643 +          read.name.resize(slash);
644 +       }
645 +    uint64_t id = (uint64_t)atol(read.name.c_str());
646 +    if (id >= end_id)
647 +      return false;
648 +
649 +    if (id < begin_id)
650 +      continue;
651 +
652 +    if (id == r_id)
653 +      {
654 +        found=true;
655 +      }
656 +    else if (id > r_id)
657 +      {
658 +        read_pq.push(make_pair(id, read));
659 +        break;
660 +      }
661 +
662 +    if (um_out && (um_write_found || !found)) {
663 +     //write unmapped reads
664 +      fprintf(um_out, "@%s\n%s\n+\n%s\n", read.alt_name.c_str(),
665 +                              read.seq.c_str(), read.qual.c_str());
666 +      }
667 +    } //while reads
668 +  return found;
669 + }
670 +
671 +
672   bool get_read_from_stream(uint64_t insert_id,
673 <                          FILE* reads_file,
673 >                          FLineReader& fr,
674                            ReadFormat reads_format,
675                            bool strip_slash,
676 <                          char read_name [],
677 <                          char read_seq  [],
678 <                          char read_alt_name [],
575 <                          char read_qual [])
676 >                          Read& read,
677 >                          FILE* um_out,
678 >                          bool um_write_found)
679   {
680 <  Read read;
681 <  FLineReader fr(reads_file);
682 <  while(!fr.isEof())
683 <        {
581 <        read.clear();
582 <          
583 <          // Get the next read from the file
584 <        if (!next_fastx_read(fr, read, reads_format))
585 <            break;
680 >  bool found=false;
681 >  while(!found && !fr.isEof())
682 >    {
683 >    read.clear();
684  
685 <        if (strip_slash)
686 <        {
687 <          string::size_type slash = read.name.rfind("/");
688 <          if (slash != string::npos)
689 <            read.name.resize(slash);
690 <        }
691 <      
692 <        if ((uint64_t)atoi(read.name.c_str()) == insert_id)
693 <        {
694 <          if (read_name) strcpy(read_name, read.name.c_str());
695 <          if (read_seq) strcpy(read_seq, read.seq.c_str());
696 <          if (read_alt_name) strcpy(read_alt_name, read.alt_name.c_str());
697 <          if (read_qual) strcpy(read_qual, read.qual.c_str());
698 <          return true;
699 <        }
700 <                
701 <      //rt.get_id(read.name, ref_str);
702 <    }  
703 <  
704 <  return false;
685 >      // Get the next read from the file
686 >    if (!next_fastx_read(fr, read, reads_format))
687 >        break;
688 >    if (strip_slash)
689 >      {
690 >        string::size_type slash = read.name.rfind("/");
691 >        if (slash != string::npos)
692 >          read.name.resize(slash);
693 >      }
694 >    uint64_t read_id = (uint64_t)atoi(read.name.c_str());
695 >    if (read_id == insert_id)
696 >      {
697 >        found=true;
698 >      }
699 >    else if (read_id > insert_id)
700 >      {
701 >        fr.pushBack_read();
702 >        break;
703 >      }
704 >
705 >    if (um_out && (um_write_found || !found)) {
706 >     //write unmapped reads
707 >      fprintf(um_out, "@%s\n%s\n+\n%s\n", read.alt_name.c_str(),
708 >                              read.seq.c_str(), read.qual.c_str());
709 >      }
710 >    //rt.get_id(read.name, ref_str);
711 >    } //while reads
712 >  return found;
713   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines