1 |
gpertea |
29 |
#ifndef ALPHABETS_H_ |
2 |
|
|
#define ALPHABETS_H_ |
3 |
|
|
|
4 |
|
|
#include <stdexcept> |
5 |
|
|
#include <string> |
6 |
|
|
#include <seqan/sequence.h> |
7 |
|
|
#include <sstream> |
8 |
|
|
#include "assert_helpers.h" |
9 |
|
|
|
10 |
|
|
using namespace std; |
11 |
|
|
using namespace seqan; |
12 |
|
|
|
13 |
|
|
/** |
14 |
|
|
* Helper function to print a uint32_t as a DNA string where each 2-bit |
15 |
|
|
* stretch is a character and more significiant bits appear to the left |
16 |
|
|
* of less singificant bits. |
17 |
|
|
*/ |
18 |
|
|
static inline std::string u32ToDna(uint32_t a, int len) { |
19 |
|
|
char buf[17]; // TODO: return a new string; by value I guess |
20 |
|
|
assert_leq(len, 16); |
21 |
|
|
for(int i = 0; i < len; i++) { |
22 |
|
|
buf[len-i-1] = "ACGT"[a & 3]; |
23 |
|
|
a >>= 2; |
24 |
|
|
} |
25 |
|
|
buf[len] = '\0'; |
26 |
|
|
return std::string(buf); |
27 |
|
|
} |
28 |
|
|
|
29 |
|
|
/** |
30 |
|
|
* Return a new TStr containing the reverse-complement of s. |
31 |
|
|
*/ |
32 |
|
|
template<typename TStr> |
33 |
|
|
static inline TStr reverseComplement(const TStr& s) { |
34 |
|
|
typedef typename Value<TStr>::Type TVal; |
35 |
|
|
TStr s_rc; |
36 |
|
|
size_t slen = length(s); |
37 |
|
|
resize(s_rc, slen); |
38 |
|
|
for(size_t i = 0; i < slen; i++) { |
39 |
|
|
int sv = (int)s[slen-i-1]; |
40 |
|
|
if(sv == 4) { |
41 |
|
|
s_rc[i] = (TVal)4; |
42 |
|
|
} else { |
43 |
|
|
s_rc[i] = (TVal)(sv ^ 3); |
44 |
|
|
} |
45 |
|
|
} |
46 |
|
|
return s_rc; |
47 |
|
|
} |
48 |
|
|
|
49 |
|
|
/// Reverse a string in-place |
50 |
|
|
template <typename TStr> |
51 |
|
|
static inline void reverseInPlace(TStr& s) { |
52 |
|
|
typedef typename Value<TStr>::Type TVal; |
53 |
|
|
size_t len = length(s); |
54 |
|
|
for(size_t i = 0; i < (len>>1); i++) { |
55 |
|
|
TVal tmp = s[i]; |
56 |
|
|
s[i] = s[len-i-1]; |
57 |
|
|
s[len-i-1] = tmp; |
58 |
|
|
} |
59 |
|
|
} |
60 |
|
|
|
61 |
|
|
/** |
62 |
|
|
* Return the reverse-complement of s. |
63 |
|
|
*/ |
64 |
|
|
template<typename TStr> |
65 |
|
|
static inline TStr reverseCopy(const TStr& s) { |
66 |
|
|
typedef typename Value<TStr>::Type TVal; |
67 |
|
|
TStr s_rc; |
68 |
|
|
size_t slen = length(s); |
69 |
|
|
resize(s_rc, slen); |
70 |
|
|
for(size_t i = 0; i < slen; i++) { |
71 |
|
|
s_rc[i] = (TVal)((int)s[slen-i-1]); |
72 |
|
|
} |
73 |
|
|
return s_rc; |
74 |
|
|
} |
75 |
|
|
|
76 |
|
|
/** |
77 |
|
|
* Return the reverse-complement of s. |
78 |
|
|
*/ |
79 |
|
|
static inline bool isReverseComplement(const String<Dna5>& s1, |
80 |
|
|
const String<Dna5>& s2) |
81 |
|
|
{ |
82 |
|
|
if(length(s1) != length(s2)) return false; |
83 |
|
|
size_t slen = length(s1); |
84 |
|
|
for(size_t i = 0; i < slen; i++) { |
85 |
|
|
int i1 = (int)s1[i]; |
86 |
|
|
int i2 = (int)s2[slen - i - 1]; |
87 |
|
|
if(i1 == 4) { |
88 |
|
|
if(i2 != 4) return false; |
89 |
|
|
} |
90 |
|
|
else if(i1 != (i2 ^ 3)) return false; |
91 |
|
|
} |
92 |
|
|
return true; |
93 |
|
|
} |
94 |
|
|
|
95 |
|
|
/** |
96 |
|
|
* Return true iff the first string is dollar-less-than the second. |
97 |
|
|
* This means that we pretend that a 'dollar sign' character, |
98 |
|
|
* lexicographically larger than all other characters, exists at the |
99 |
|
|
* end of both strings. |
100 |
|
|
*/ |
101 |
|
|
template <typename TStr> |
102 |
|
|
static inline bool |
103 |
|
|
dollarLt(const TStr& l, const TStr& r) { |
104 |
|
|
return isPrefix(r, l) || (l < r && !isPrefix(l, r)); |
105 |
|
|
} |
106 |
|
|
|
107 |
|
|
/** |
108 |
|
|
* Return true iff the first string is dollar-greater-than the second. |
109 |
|
|
* This means that we pretend that a 'dollar sign' character, |
110 |
|
|
* lexicographically larger than all other characters, exists at the |
111 |
|
|
* end of both strings. |
112 |
|
|
*/ |
113 |
|
|
template <typename TStr> |
114 |
|
|
static inline bool |
115 |
|
|
dollarGt(const TStr& l, const TStr& r) { |
116 |
|
|
return !dollarLt(l, r); |
117 |
|
|
} |
118 |
|
|
|
119 |
|
|
|
120 |
|
|
extern uint8_t dna4Cat[]; |
121 |
|
|
extern uint8_t charToDna5[]; |
122 |
|
|
extern uint8_t rcCharToDna5[]; |
123 |
|
|
|
124 |
|
|
extern uint8_t dna4Cat[]; |
125 |
|
|
extern uint8_t charToDna5[]; |
126 |
|
|
extern uint8_t rcCharToDna5[]; |
127 |
|
|
#endif /*ALPHABETS_H_*/ |