ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/insertions.h
Revision: 236
Committed: Fri May 4 22:20:02 2012 UTC (12 years, 4 months ago) by gpertea
File size: 2859 byte(s)
Log Message:
sync, wip prep_reads

Line File contents
1 #ifndef INSERTIONS_H
2 #define INSERTIONS_H
3 /*
4 * insertions.h
5 * TopHat
6 *
7 * Adapted from junctions.h
8 */
9
10 #include <cstdio>
11 #include <vector>
12 #include <string>
13 #include <set>
14 #include <iostream>
15 #include <fstream>
16 #include <cstring>
17 #include <seqan/sequence.h>
18 #include <seqan/find.h>
19 #include <seqan/file.h>
20
21 #include "bwt_map.h"
22
23 using namespace std;
24
25
26 /**
27 * Data structure to represent an insertion.
28 * Need to keep track of the actual position of the insertion
29 * as well the actual inserted sequence.
30 */
31 struct Insertion
32 {
33 Insertion (uint32_t ref, uint32_t l, const std::string& seq)
34 : refid(ref), left(l), sequence(seq){}
35 Insertion() : refid(0), left(0), sequence("") {}
36 /**
37 * The ID of the assoicated reference sequence (eg, chr22). In order to actually turn this into
38 * a string, need to use the map associated with the reference table
39 */
40 uint32_t refid;
41 /**
42 * The position of the insertion.
43 * This is the 0-based position of the last genomic nucleotide before the insertion
44 */
45 uint32_t left;
46
47 /**
48 * The actual inserted sequence.
49 */
50 std::string sequence;
51
52 bool operator<(const Insertion& rhs) const
53 {
54 if (refid < rhs.refid)
55 return true;
56 else if (refid > rhs.refid)
57 return false;
58
59 if (left < rhs.left)
60 return true;
61 else if (left > rhs.left)
62 return false;
63
64 if (sequence.length() < rhs.sequence.length())
65 return true;
66 return false;
67 }
68
69
70 bool operator==(const Insertion& rhs) const
71 {
72 return (refid == rhs.refid && left == rhs.left && sequence == rhs.sequence);
73 }
74 };
75
76 struct InsertionStats
77 {
78 InsertionStats() : left_extent(0), right_extent(0), supporting_hits(0) {}
79
80 InsertionStats& merge_with(const InsertionStats& other)
81 {
82 if (this == &other)
83 return *this;
84
85 left_extent = max(left_extent, other.left_extent);
86 right_extent = max(right_extent, other.right_extent);
87 supporting_hits += other.supporting_hits;
88
89 return *this;
90 }
91
92 int left_extent;
93 int right_extent;
94 int supporting_hits;
95 };
96
97
98 /**
99 * A function used to compare Insertions, specifically for use
100 * with std::sets. My C++ is a little weak, but I imagine
101 * there should be someway to directly address the overloaded
102 * operator function and ditch this code.
103 */
104 struct insertion_comparison
105 {
106 bool operator()(const Insertion& lhs, const Insertion& rhs)
107 {
108 return lhs < rhs;
109 }
110 };
111
112 typedef std::map<Insertion, InsertionStats> InsertionSet;
113
114 void insertions_from_alignment(const BowtieHit& bh, InsertionSet& insertions);
115 void print_insertions(FILE* insertions_out, const InsertionSet& insertions, RefSequenceTable& ref_sequences);
116 void insertions_from_spliced_hit(const BowtieHit& bh, vector<pair<Insertion, InsertionStats> >& insertions);
117 void merge_with(InsertionSet& insertions, const InsertionSet& other);
118
119 #endif