ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/map2gtf.h
Revision: 200
Committed: Fri Mar 9 22:25:25 2012 UTC (12 years, 5 months ago) by gpertea
File size: 2872 byte(s)
Log Message:
Line File contents
1 /*
2 * Author: Harold Pimentel
3 * Contact: http://cs.berkeley.edu/~pimentel
4 * Date: June 10, 2011
5 */
6
7 #ifndef _MAP2GTF_H_
8 #define _MAP2GTF_H_
9
10 #ifdef HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <cassert>
15 #include <cstdlib>
16 #include <cstring>
17 #include <fstream>
18 #include <iostream>
19 #include <string>
20 #include <sstream>
21 #include <vector>
22
23 #include <bam/bam.h>
24 #include <bam/sam.h>
25
26 #include <seqan/sequence.h>
27
28 #include <getopt.h>
29 #include <unistd.h>
30
31 #include "bwt_map.h"
32 #include "common.h"
33 #include "gff.h"
34
35 #define MAX_READ_NAME_LEN 2048
36
37 /*
38 * XXX: This class currently assumes someone used the script in TopHat to map
39 * the reads already. It also depends on that same format.
40 */
41 class TranscriptomeHit;
42 class Map2GTF
43 {
44 public:
45 Map2GTF(const std::string& gtf_fname, const std::string& sam_fname);
46 ~Map2GTF();
47 // Write out to a BAM file
48 bool next_read_hits(std::vector<bam1_t*>& hits, size_t& num_hits);
49 void convert_coords(const std::string& out_fname, const std::string& sam_header);
50 void trans_to_genomic_coords(TranscriptomeHit& hit);
51
52 private:
53 GffReader gtfReader_;
54
55 std::string gtf_fname_;
56 std::string in_fname_;
57
58 FILE* gtf_fhandle_;
59 samfile_t* in_fhandle_;
60 bam_header_t* in_sam_header_;
61
62 map<string, int> ref_to_id_;
63 bam_header_t* out_sam_header_;
64
65 ReadTable readTable_;
66 RefSequenceTable refSeqTable_;
67
68 Map2GTF(); // Don't want anyone calling the constructor w/o options
69 };
70
71 class TranscriptomeHit
72 {
73 public:
74 bam1_t* hit;
75 GffObj* trans;
76 TranscriptomeHit(bam1_t* h = NULL, GffObj* t = NULL)
77 {
78 hit = h;
79 trans = t;
80 }
81 bool operator==(const TranscriptomeHit& th) const
82 {
83 if (hit->core.tid != th.hit->core.tid)
84 return false;
85
86 if (hit->core.pos != th.hit->core.pos)
87 return false;
88
89 if (hit->core.n_cigar != th.hit->core.n_cigar)
90 return false;
91
92 for (int i = 0; i < hit->core.n_cigar; ++i)
93 {
94 if (bam1_cigar(hit)[i] != bam1_cigar(th.hit)[i])
95 return false;
96 }
97
98 return true;
99 }
100 bool operator<(const TranscriptomeHit& th) const
101 {
102 if (hit->core.tid != th.hit->core.tid)
103 return hit->core.tid < th.hit->core.tid;
104
105 if (hit->core.pos != th.hit->core.pos)
106 return hit->core.pos < th.hit->core.pos;
107
108 if (hit->core.n_cigar != th.hit->core.n_cigar)
109 return hit->core.n_cigar < th.hit->core.n_cigar;
110
111 for (int i = 0; i < hit->core.n_cigar; ++i)
112 {
113 if (bam1_cigar(hit)[i] != bam1_cigar(th.hit)[i])
114 return bam1_cigar(hit)[i] < bam1_cigar(th.hit)[i];
115 }
116
117 return false;
118 }
119 };
120
121 bool get_read_start(GList<GffExon>* exon_list, size_t gtf_start,
122 size_t& genome_start, int& exon_idx);
123
124 void print_trans(GffObj* trans, const bam1_t* in, size_t rem_len,
125 size_t match_len, size_t cur_pos, size_t start_pos);
126
127 #endif /* _MAP2GTF_H_ */

Properties

Name Value
svn:executable *