ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/reads.cpp
(Generate patch)
# Line 173 | Line 173
173  
174   bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format,
175                          FLineReader* frq) {
176 +  /*
177    if (fr.pushed_read)
178      {
179        read = fr.last_read;
180        fr.pushed_read = false;
181        return true;
182      }
183 <  
183 >  */
184    read.clear();
185    char* buf=NULL;
186    while ((buf=fr.nextLine())!=NULL) {
# Line 262 | Line 263
263             }
264      }
265  
266 <  fr.last_read = read;  
266 >  //fr.last_read = read;
267    return !(read.seq.empty());
268   }
269  
# Line 594 | Line 595
595   }
596  
597  
598 < bool ReadStream::next_read(Read& r, ReadFormat read_format) {
599 <  FLineReader fr(fstream.file);
600 <  while (read_pq.size()<500000 && !r_eof) {
598 > void bam2Read(bam1_t *b, Read& rd, bool alt_name=false) {
599 >  GBamRecord bamrec(b);
600 >  rd.clear();
601 >  rd.seq=bamrec.seqData(&rd.qual);
602 >  rd.name=bam1_qname(b);
603 >  if (alt_name)
604 >    rd.alt_name=bamrec.tag_str("ZN");
605 > }
606 >
607 >
608 > bool ReadStream::next_read(QReadData& rdata, ReadFormat read_format) {
609 >  while (read_pq.size()<ReadBufSize && !r_eof) {
610      //keep the queue topped off
611      Read rf;
612 <    if (!next_fastx_read(fr, rf, read_format)) {
613 <        r_eof=true;
614 <        break;
615 <        }
616 <    //Read read=Read(rf);
607 <    uint64_t id = (uint64_t)atol(rf.name.c_str());
608 <    read_pq.push(make_pair(id, rf));
612 >    if (get_direct(rf, read_format)) {
613 >      uint64_t id = (uint64_t)atol(rf.name.c_str());
614 >      QReadData rdata(id, rf, last_b());
615 >      read_pq.push(rdata);
616 >      }
617      }
618    if (read_pq.size()==0)
619       return false;
620 <  const pair<uint64_t, Read>& t = read_pq.top();
621 <  r=t.second; //copy strings
614 <  //free(t.second);
620 >  const QReadData& t = read_pq.top();
621 >  rdata=t; //copy strings and duplicate b pointer!
622    read_pq.pop();
623    return true;
624   }
625  
626 < // reads must ALWAYS requested in increasing order of their ID
626 > bool ReadStream::get_direct(Read& r, ReadFormat read_format) {
627 >  if (fstream.file==NULL) return false;
628 >  if (fstream.is_bam) {
629 >         bool got_read=false;
630 >         while (!got_read) {
631 >       if (samread(fstream.bam_file, b) < 0) {
632 >          r_eof=true;
633 >          return false;
634 >       }
635 >       else {
636 >        if (bam_ignoreQC || (b->core.flag & BAM_FQCFAIL)==0)
637 >            got_read=true;
638 >       }
639 >         }
640 >     bam2Read(b, r, bam_alt_name);
641 >     return true;
642 >   }
643 >   if (!next_fastx_read(*flseqs, r, read_format, flquals)) {
644 >        r_eof=true;
645 >        return false;
646 >        }
647 >  return true;
648 > }
649 >
650 > // reads must ALWAYS be requested in increasing order of their ID
651   bool ReadStream::getRead(uint64_t r_id,
652                           Read& read,
653                           ReadFormat read_format,
654                           bool strip_slash,
655                           uint64_t begin_id,
656                           uint64_t end_id,
657 <                         FILE* um_out, //unmapped reads output
657 >                         GBamWriter* um_out, //unmapped reads output
658                           bool um_write_found //write the found ones
659                           ) {
660    if (!fstream.file)
# Line 632 | Line 663
663        err_die("Error: ReadStream::getRead() called with out-of-order id#!");
664    last_id=r_id;
665    bool found=false;
666 +  read.clear();
667    while (!found) {
668 <    read.clear();
669 <      // Get the next read from the file
638 <    if (!next_read(read, read_format))
668 >    QReadData rdata;
669 >    if (!next_read(rdata, read_format))
670          break;
671 <
671 >    /*
672      if (strip_slash) {
673 <       string::size_type slash = read.name.rfind("/");
673 >       string::size_type slash = rdata.read.name.rfind("/");
674         if (slash != string::npos)
675 <          read.name.resize(slash);
675 >          rdata.read.name.resize(slash);
676         }
677      uint64_t id = (uint64_t)atol(read.name.c_str());
678 <    if (id >= end_id)
678 >    */
679 >    if (rdata.id >= end_id)
680        return false;
681  
682 <    if (id < begin_id)
682 >    if (rdata.id < begin_id)
683        continue;
684  
685 <    if (id == r_id)
685 >    if (rdata.id == r_id)
686        {
687 +      read=rdata.read;
688            found=true;
689        }
690 <    else if (id > r_id)
690 >    else if (rdata.id > r_id)
691        { //can't find it, went too far
692        //only happens when reads [mates] were removed for some reason
693 <      read_pq.push(make_pair(id, read));
693 >      //read_pq.push(make_pair(id, read));
694 >      read_pq.push(rdata);
695        break;
696        }
697      if (um_out && ((um_write_found && found) ||
698                     (!um_write_found && !found))) {
699 <     //write unmapped reads
700 <      fprintf(um_out, "@%s\n%s\n+\n%s\n", read.alt_name.c_str(),
701 <                              read.seq.c_str(), read.qual.c_str());
702 <      }
703 <    } //while reads
704 <  return found;
705 < }
706 <
707 <
708 < bool get_read_from_stream(uint64_t insert_id,
709 <                          FLineReader& fr,
710 <                          ReadFormat reads_format,
711 <                          bool strip_slash,
712 <                          Read& read,
713 <                          FILE* um_out,
714 <                          bool um_write_found)
715 < {
716 <  bool found=false;
717 <  while(!found && !fr.isEof())
718 <    {
719 <    read.clear();
720 <
721 <      // Get the next read from the file
722 <    if (!next_fastx_read(fr, read, reads_format))
723 <        break;
724 <    if (strip_slash)
725 <      {
726 <        string::size_type slash = read.name.rfind("/");
727 <        if (slash != string::npos)
694 <          read.name.resize(slash);
695 <      }
696 <    uint64_t id = (uint64_t)atoi(read.name.c_str());
697 <    if (id == insert_id)
698 <      {
699 <        found=true;
700 <      }
701 <    else if (id > insert_id)
702 <      {
703 <          fr.pushBack_read();
704 <          break;
705 <      }
706 <
707 <    if (um_out && ((um_write_found && found) ||
708 <                   (!um_write_found && !found))) {
709 <     //write unmapped reads
710 <      fprintf(um_out, "@%s\n%s\n+\n%s\n", read.alt_name.c_str(),
711 <                              read.seq.c_str(), read.qual.c_str());
699 >       //write unmapped reads
700 >       //fprintf(um_out, "@%s\n%s\n+\n%s\n", read.alt_name.c_str(),
701 >       //                        read.seq.c_str(), read.qual.c_str());
702 >        string rname(rdata.read.alt_name);
703 >        int matenum=0;
704 >        size_t pl=rname.length()-1;
705 >        if (rname[pl-2]=='/') {
706 >                if (rname[pl]=='1')
707 >                        matenum=1;
708 >                else if (rname[pl]=='2') {
709 >                        matenum=2;
710 >                }
711 >                if (matenum) rname.resize(pl-1);
712 >        }
713 >
714 >        GBamRecord bamrec(rname.c_str(), -1, 0, false, rdata.read.seq.c_str(),
715 >                NULL, rdata.read.qual.c_str());
716 >        if (matenum) {
717 >          if (matenum==1) bamrec.set_flag(BAM_FREAD1);
718 >          else bamrec.set_flag(BAM_FREAD2);
719 >        }
720 >        if (rdata.trashCode) {
721 >          if (rdata.trashCode!='M') {
722 >                   //multi-mapped reads did not really QC-fail
723 >                       bamrec.set_flag(BAM_FQCFAIL);
724 >          }
725 >          bamrec.add_aux("ZT", 'A', 1, (uint8_t*)&rdata.trashCode);
726 >        }
727 >        um_out->write(&bamrec);
728        }
713    //rt.get_id(read.name, ref_str);
729      } //while reads
730    return found;
731   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines