ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/alphabet.h
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (13 years, 1 month ago) by gpertea
File size: 3019 byte(s)
Log Message:
adding tophat source work

Line File contents
1 #ifndef ALPHABETS_H_
2 #define ALPHABETS_H_
3
4 #include <stdexcept>
5 #include <string>
6 #include <seqan/sequence.h>
7 #include <sstream>
8 #include "assert_helpers.h"
9
10 using namespace std;
11 using namespace seqan;
12
13 /**
14 * Helper function to print a uint32_t as a DNA string where each 2-bit
15 * stretch is a character and more significiant bits appear to the left
16 * of less singificant bits.
17 */
18 static inline std::string u32ToDna(uint32_t a, int len) {
19 char buf[17]; // TODO: return a new string; by value I guess
20 assert_leq(len, 16);
21 for(int i = 0; i < len; i++) {
22 buf[len-i-1] = "ACGT"[a & 3];
23 a >>= 2;
24 }
25 buf[len] = '\0';
26 return std::string(buf);
27 }
28
29 /**
30 * Return a new TStr containing the reverse-complement of s.
31 */
32 template<typename TStr>
33 static inline TStr reverseComplement(const TStr& s) {
34 typedef typename Value<TStr>::Type TVal;
35 TStr s_rc;
36 size_t slen = length(s);
37 resize(s_rc, slen);
38 for(size_t i = 0; i < slen; i++) {
39 int sv = (int)s[slen-i-1];
40 if(sv == 4) {
41 s_rc[i] = (TVal)4;
42 } else {
43 s_rc[i] = (TVal)(sv ^ 3);
44 }
45 }
46 return s_rc;
47 }
48
49 /// Reverse a string in-place
50 template <typename TStr>
51 static inline void reverseInPlace(TStr& s) {
52 typedef typename Value<TStr>::Type TVal;
53 size_t len = length(s);
54 for(size_t i = 0; i < (len>>1); i++) {
55 TVal tmp = s[i];
56 s[i] = s[len-i-1];
57 s[len-i-1] = tmp;
58 }
59 }
60
61 /**
62 * Return the reverse-complement of s.
63 */
64 template<typename TStr>
65 static inline TStr reverseCopy(const TStr& s) {
66 typedef typename Value<TStr>::Type TVal;
67 TStr s_rc;
68 size_t slen = length(s);
69 resize(s_rc, slen);
70 for(size_t i = 0; i < slen; i++) {
71 s_rc[i] = (TVal)((int)s[slen-i-1]);
72 }
73 return s_rc;
74 }
75
76 /**
77 * Return the reverse-complement of s.
78 */
79 static inline bool isReverseComplement(const String<Dna5>& s1,
80 const String<Dna5>& s2)
81 {
82 if(length(s1) != length(s2)) return false;
83 size_t slen = length(s1);
84 for(size_t i = 0; i < slen; i++) {
85 int i1 = (int)s1[i];
86 int i2 = (int)s2[slen - i - 1];
87 if(i1 == 4) {
88 if(i2 != 4) return false;
89 }
90 else if(i1 != (i2 ^ 3)) return false;
91 }
92 return true;
93 }
94
95 /**
96 * Return true iff the first string is dollar-less-than the second.
97 * This means that we pretend that a 'dollar sign' character,
98 * lexicographically larger than all other characters, exists at the
99 * end of both strings.
100 */
101 template <typename TStr>
102 static inline bool
103 dollarLt(const TStr& l, const TStr& r) {
104 return isPrefix(r, l) || (l < r && !isPrefix(l, r));
105 }
106
107 /**
108 * Return true iff the first string is dollar-greater-than the second.
109 * This means that we pretend that a 'dollar sign' character,
110 * lexicographically larger than all other characters, exists at the
111 * end of both strings.
112 */
113 template <typename TStr>
114 static inline bool
115 dollarGt(const TStr& l, const TStr& r) {
116 return !dollarLt(l, r);
117 }
118
119
120 extern uint8_t dna4Cat[];
121 extern uint8_t charToDna5[];
122 extern uint8_t rcCharToDna5[];
123
124 extern uint8_t dna4Cat[];
125 extern uint8_t charToDna5[];
126 extern uint8_t rcCharToDna5[];
127 #endif /*ALPHABETS_H_*/