ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/alphabet.h
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (13 years, 2 months ago) by gpertea
File size: 3019 byte(s)
Log Message:
adding tophat source work

Line User Rev File contents
1 gpertea 29 #ifndef ALPHABETS_H_
2     #define ALPHABETS_H_
3    
4     #include <stdexcept>
5     #include <string>
6     #include <seqan/sequence.h>
7     #include <sstream>
8     #include "assert_helpers.h"
9    
10     using namespace std;
11     using namespace seqan;
12    
13     /**
14     * Helper function to print a uint32_t as a DNA string where each 2-bit
15     * stretch is a character and more significiant bits appear to the left
16     * of less singificant bits.
17     */
18     static inline std::string u32ToDna(uint32_t a, int len) {
19     char buf[17]; // TODO: return a new string; by value I guess
20     assert_leq(len, 16);
21     for(int i = 0; i < len; i++) {
22     buf[len-i-1] = "ACGT"[a & 3];
23     a >>= 2;
24     }
25     buf[len] = '\0';
26     return std::string(buf);
27     }
28    
29     /**
30     * Return a new TStr containing the reverse-complement of s.
31     */
32     template<typename TStr>
33     static inline TStr reverseComplement(const TStr& s) {
34     typedef typename Value<TStr>::Type TVal;
35     TStr s_rc;
36     size_t slen = length(s);
37     resize(s_rc, slen);
38     for(size_t i = 0; i < slen; i++) {
39     int sv = (int)s[slen-i-1];
40     if(sv == 4) {
41     s_rc[i] = (TVal)4;
42     } else {
43     s_rc[i] = (TVal)(sv ^ 3);
44     }
45     }
46     return s_rc;
47     }
48    
49     /// Reverse a string in-place
50     template <typename TStr>
51     static inline void reverseInPlace(TStr& s) {
52     typedef typename Value<TStr>::Type TVal;
53     size_t len = length(s);
54     for(size_t i = 0; i < (len>>1); i++) {
55     TVal tmp = s[i];
56     s[i] = s[len-i-1];
57     s[len-i-1] = tmp;
58     }
59     }
60    
61     /**
62     * Return the reverse-complement of s.
63     */
64     template<typename TStr>
65     static inline TStr reverseCopy(const TStr& s) {
66     typedef typename Value<TStr>::Type TVal;
67     TStr s_rc;
68     size_t slen = length(s);
69     resize(s_rc, slen);
70     for(size_t i = 0; i < slen; i++) {
71     s_rc[i] = (TVal)((int)s[slen-i-1]);
72     }
73     return s_rc;
74     }
75    
76     /**
77     * Return the reverse-complement of s.
78     */
79     static inline bool isReverseComplement(const String<Dna5>& s1,
80     const String<Dna5>& s2)
81     {
82     if(length(s1) != length(s2)) return false;
83     size_t slen = length(s1);
84     for(size_t i = 0; i < slen; i++) {
85     int i1 = (int)s1[i];
86     int i2 = (int)s2[slen - i - 1];
87     if(i1 == 4) {
88     if(i2 != 4) return false;
89     }
90     else if(i1 != (i2 ^ 3)) return false;
91     }
92     return true;
93     }
94    
95     /**
96     * Return true iff the first string is dollar-less-than the second.
97     * This means that we pretend that a 'dollar sign' character,
98     * lexicographically larger than all other characters, exists at the
99     * end of both strings.
100     */
101     template <typename TStr>
102     static inline bool
103     dollarLt(const TStr& l, const TStr& r) {
104     return isPrefix(r, l) || (l < r && !isPrefix(l, r));
105     }
106    
107     /**
108     * Return true iff the first string is dollar-greater-than the second.
109     * This means that we pretend that a 'dollar sign' character,
110     * lexicographically larger than all other characters, exists at the
111     * end of both strings.
112     */
113     template <typename TStr>
114     static inline bool
115     dollarGt(const TStr& l, const TStr& r) {
116     return !dollarLt(l, r);
117     }
118    
119    
120     extern uint8_t dna4Cat[];
121     extern uint8_t charToDna5[];
122     extern uint8_t rcCharToDna5[];
123    
124     extern uint8_t dna4Cat[];
125     extern uint8_t charToDna5[];
126     extern uint8_t rcCharToDna5[];
127     #endif /*ALPHABETS_H_*/