ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/reads.cpp
(Generate patch)
# Line 120 | Line 120
120      if (fline==NULL) return false;
121      }
122    //must be on '+' line here
123 <  if (fline==NULL || (reads_format == FASTQ && fline[0] != '+') || (reads_format == FASTA && quals && fline[0] != '>')) {
123 >  if (fline==NULL || (reads_format == FASTQ && fline[0] != '+') ||
124 >      (reads_format == FASTA && quals && fline[0] != '>')) {
125       err_exit("Error: '+' not found for fastq record %s\n",fline);
126       return false;
127       }
# Line 131 | Line 132
132    while ((fline=fr.nextLine())!=NULL) {
133      if (integer_quals)
134        {
135 <        vector<string> integer_qual_values;
136 <        tokenize(string(fline), " ", integer_qual_values);
135 >      vector<string> integer_qual_values;
136 >      tokenize(string(fline), " ", integer_qual_values);
137  
138 <        string temp_qual;
139 <        for (size_t i = 0; i < integer_qual_values.size(); ++i)
140 <          {
141 <            int qual_value = atoi(integer_qual_values[i].c_str());
142 <            if (qual_value < 0) qual_value = 0;
143 <            temp_qual.push_back((char)(qual_value + 33));
144 <          }
138 >      string temp_qual;
139 >      for (size_t i = 0; i < integer_qual_values.size(); ++i)
140 >        {
141 >          int qual_value = atoi(integer_qual_values[i].c_str());
142 >          if (qual_value < 0) qual_value = 0;
143 >          temp_qual.push_back((char)(qual_value + 33));
144 >        }
145  
146 <        qual.append(temp_qual);
146 >      qual.append(temp_qual);
147        }
148      else
149        qual.append(fline);
150 <    
150 <    if ((!color && qual.length()>=seq.length()) || (color && qual.length()+1>=seq.length())) break;
150 >      if (qual.length()>=seq.length()-1) break;
151       }
152    // final check
153 <  if ((!color && seq.length()!=qual.length()) || (color && seq.length()!=qual.length()+1)) {
154 <           err_exit("Error: qual length (%d) differs from seq length (%d) for fastq record %s!\n",
153 >  if (color) {
154 >     if (seq.length()==qual.length()) {
155 >        //discard first qv
156 >        qual=qual.substr(1);
157 >        }
158 >     if (seq.length()!=qual.length()+1) {
159 >        err_exit("Error: length of quality string does not match seq length (%d) for color read %s!\n",
160 >           seq.length(), alt_name.c_str());
161 >        }
162 >     }
163 >  else {
164 >    if (seq.length()!=qual.length()) {
165 >           err_exit("Error: qual string length (%d) differs from seq length (%d) for read %s!\n",
166                 qual.length(), seq.length(), alt_name.c_str());
167 <           return false;
167 >           //return false;
168             }
169 +    }
170    //
159
171    return !(qual.empty());
172   }
173  
174   bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format,
175                          FLineReader* frq) {
176 +  if (fr.pushed_read)
177 +    {
178 +      read = fr.last_read;
179 +      fr.pushed_read = false;
180 +      return true;
181 +    }
182 +  
183    read.clear();
184    char* buf=NULL;
185    while ((buf=fr.nextLine())!=NULL) {
# Line 219 | Line 237
237          read.qual.append(temp_qual);
238        }
239      else {
240 <      // if (color && read.qual.length()==0 && buf[0]=='!')
223 <      //   read.qual.append(&(buf[1])); //some color qual strings start with '!' for the adaptor
224 <      //  else
225 <         read.qual.append(buf);
240 >        read.qual.append(buf);
241        }
242 <    if ((!color && read.qual.length()>=read.seq.length())
243 <          || (color && read.qual.length()+1>=read.seq.length())) break;
242 >    if (read.qual.length()>=read.seq.length()-1)
243 >          break;
244      } //while qv lines
245    
246    // final check
247 <  if ((!color && read.seq.length()!=read.qual.length()) || (color && read.seq.length()!=read.qual.length()+1)) {
247 >  if (color) {
248 >     if (read.seq.length()==read.qual.length()) {
249 >        //discard first qv
250 >        read.qual=read.qual.substr(1);
251 >        }
252 >     if (read.seq.length()!=read.qual.length()+1) {
253 >        err_exit("Error: length of quality string does not match sequence length (%d) for color read %s!\n",
254 >            read.seq.length(), read.alt_name.c_str());
255 >        }
256 >     }
257 >  else {
258 >   if (read.seq.length()!=read.qual.length()) {
259             err_exit("Error: qual length (%d) differs from seq length (%d) for fastq record %s!\n",
260                 read.qual.length(), read.seq.length(), read.alt_name.c_str());
261             return false;
262             }
263 <  //
264 <  return !(read.qual.empty());
263 >    }
264 >
265 >  fr.last_read = read;  
266 >  return !(read.seq.empty());
267   }
268  
269   // This could be faster.
# Line 476 | Line 504
504   {
505    assert(color.length() == ref.length() - 1);
506    
507 <  const size_t max_length = 256;
507 >  static const size_t max_length = MAX_READ_LEN;
508    const unsigned int max_value = max_length * 0xff;
509    size_t length = color.length();
510    if (length < 1 || length + 1 > max_length)
# Line 565 | Line 593
593      }
594   }
595  
596 +
597 + bool ReadStream::next_read(Read& r, ReadFormat read_format) {
598 +  FLineReader fr(fstream.file);
599 +  while (read_pq.size()<500000 && !r_eof) {
600 +    //keep the queue topped off
601 +    Read rf;
602 +    if (!next_fastx_read(fr, rf, read_format)) {
603 +        r_eof=true;
604 +        break;
605 +        }
606 +    //Read read=Read(rf);
607 +    uint64_t id = (uint64_t)atol(rf.name.c_str());
608 +    read_pq.push(make_pair(id, rf));
609 +    }
610 +  if (read_pq.size()==0)
611 +     return false;
612 +  const pair<uint64_t, Read>& t = read_pq.top();
613 +  r=t.second; //copy strings
614 +  //free(t.second);
615 +  read_pq.pop();
616 +  return true;
617 + }
618 +
619 + // reads must ALWAYS requested in increasing order of their ID
620 + bool ReadStream::getRead(uint64_t r_id,
621 +                         Read& read,
622 +                         ReadFormat read_format,
623 +                         bool strip_slash,
624 +                         uint64_t begin_id,
625 +                         uint64_t end_id,
626 +                         FILE* um_out, //unmapped reads output
627 +                         bool um_write_found //write the found ones
628 +                         ) {
629 +  if (!fstream.file)
630 +       err_die("Error: calling ReadStream::getRead() with no file handle!");
631 +  if (r_id<last_id)
632 +      err_die("Error: ReadStream::getRead() called with out-of-order id#!");
633 +  last_id=r_id;
634 +  bool found=false;
635 +  while (!found) {
636 +    read.clear();
637 +      // Get the next read from the file
638 +    if (!next_read(read, read_format))
639 +        break;
640 +
641 +    if (strip_slash) {
642 +       string::size_type slash = read.name.rfind("/");
643 +       if (slash != string::npos)
644 +          read.name.resize(slash);
645 +       }
646 +    uint64_t id = (uint64_t)atol(read.name.c_str());
647 +    if (id >= end_id)
648 +      return false;
649 +
650 +    if (id < begin_id)
651 +      continue;
652 +
653 +    if (id == r_id)
654 +      {
655 +          found=true;
656 +      }
657 +    else if (id > r_id)
658 +      { //can't find it, went too far
659 +      //only happens when reads [mates] were removed for some reason
660 +      read_pq.push(make_pair(id, read));
661 +      break;
662 +      }
663 +    if (um_out && ((um_write_found && found) ||
664 +                   (!um_write_found && !found))) {
665 +     //write unmapped reads
666 +      fprintf(um_out, "@%s\n%s\n+\n%s\n", read.alt_name.c_str(),
667 +                              read.seq.c_str(), read.qual.c_str());
668 +      }
669 +    } //while reads
670 +  return found;
671 + }
672 +
673 +
674   bool get_read_from_stream(uint64_t insert_id,
675 <                          FILE* reads_file,
675 >                          FLineReader& fr,
676                            ReadFormat reads_format,
677                            bool strip_slash,
678 <                          char read_name [],
679 <                          char read_seq  [],
680 <                          char read_alt_name [],
575 <                          char read_qual [])
678 >                          Read& read,
679 >                          FILE* um_out,
680 >                          bool um_write_found)
681   {
682 <  Read read;
683 <  FLineReader fr(reads_file);
684 <  while(!fr.isEof())
685 <        {
581 <        read.clear();
582 <          
583 <          // Get the next read from the file
584 <        if (!next_fastx_read(fr, read, reads_format))
585 <            break;
682 >  bool found=false;
683 >  while(!found && !fr.isEof())
684 >    {
685 >    read.clear();
686  
687 <        if (strip_slash)
688 <        {
689 <          string::size_type slash = read.name.rfind("/");
690 <          if (slash != string::npos)
691 <            read.name.resize(slash);
692 <        }
693 <      
694 <        if ((uint64_t)atoi(read.name.c_str()) == insert_id)
695 <        {
696 <          if (read_name) strcpy(read_name, read.name.c_str());
697 <          if (read_seq) strcpy(read_seq, read.seq.c_str());
698 <          if (read_alt_name) strcpy(read_alt_name, read.alt_name.c_str());
699 <          if (read_qual) strcpy(read_qual, read.qual.c_str());
700 <          return true;
701 <        }
702 <                
703 <      //rt.get_id(read.name, ref_str);
704 <    }  
705 <  
706 <  return false;
687 >      // Get the next read from the file
688 >    if (!next_fastx_read(fr, read, reads_format))
689 >        break;
690 >    if (strip_slash)
691 >      {
692 >        string::size_type slash = read.name.rfind("/");
693 >        if (slash != string::npos)
694 >          read.name.resize(slash);
695 >      }
696 >    uint64_t id = (uint64_t)atoi(read.name.c_str());
697 >    if (id == insert_id)
698 >      {
699 >        found=true;
700 >      }
701 >    else if (id > insert_id)
702 >      {
703 >          fr.pushBack_read();
704 >          break;
705 >      }
706 >
707 >    if (um_out && ((um_write_found && found) ||
708 >                   (!um_write_found && !found))) {
709 >     //write unmapped reads
710 >      fprintf(um_out, "@%s\n%s\n+\n%s\n", read.alt_name.c_str(),
711 >                              read.seq.c_str(), read.qual.c_str());
712 >      }
713 >    //rt.get_id(read.name, ref_str);
714 >    } //while reads
715 >  return found;
716   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines