1 |
gpertea |
29 |
#include "codons.h" |
2 |
|
|
|
3 |
|
|
static char codonTable[32768]; //32K table for fasta codon decoding |
4 |
|
|
// codons are encoded as triplets of 5-bit-encoded nucleotides |
5 |
|
|
// (so any codon can be encoded/decoded as a unique 15-bit value) |
6 |
|
|
|
7 |
|
|
static char codonData[]={ //long list of 3+1 characters (codon+translation) |
8 |
|
|
'A','A','A','K', 'A','A','C','N', 'A','A','G','K', 'A','A','R','K', 'A','A','T','N', |
9 |
|
|
'A','A','Y','N', 'A','C','A','T', 'A','C','B','T', 'A','C','C','T', 'A','C','D','T', |
10 |
|
|
'A','C','G','T', 'A','C','H','T', 'A','C','K','T', 'A','C','M','T', 'A','C','N','T', |
11 |
|
|
'A','C','R','T', 'A','C','S','T', 'A','C','T','T', 'A','C','V','T', 'A','C','W','T', |
12 |
|
|
'A','C','Y','T', 'A','G','A','R', 'A','G','C','S', 'A','G','G','R', 'A','G','R','R', |
13 |
|
|
'A','G','T','S', 'A','G','Y','S', 'A','T','A','I', 'A','T','C','I', 'A','T','G','M', |
14 |
|
|
'A','T','H','I', 'A','T','M','I', 'A','T','T','I', 'A','T','W','I', 'A','T','Y','I', |
15 |
|
|
'C','A','A','Q', 'C','A','C','H', 'C','A','G','Q', 'C','A','R','Q', 'C','A','T','H', |
16 |
|
|
'C','A','Y','H', 'C','C','A','P', 'C','C','B','P', 'C','C','C','P', 'C','C','D','P', |
17 |
|
|
'C','C','G','P', 'C','C','H','P', 'C','C','K','P', 'C','C','M','P', 'C','C','N','P', |
18 |
|
|
'C','C','R','P', 'C','C','S','P', 'C','C','T','P', 'C','C','V','P', 'C','C','W','P', |
19 |
|
|
'C','C','Y','P', 'C','G','A','R', 'C','G','B','R', 'C','G','C','R', 'C','G','D','R', |
20 |
|
|
'C','G','G','R', 'C','G','H','R', 'C','G','K','R', 'C','G','M','R', 'C','G','N','R', |
21 |
|
|
'C','G','R','R', 'C','G','S','R', 'C','G','T','R', 'C','G','V','R', 'C','G','W','R', |
22 |
|
|
'C','G','Y','R', 'C','T','A','L', 'C','T','B','L', 'C','T','C','L', 'C','T','D','L', |
23 |
|
|
'C','T','G','L', 'C','T','H','L', 'C','T','K','L', 'C','T','M','L', 'C','T','N','L', |
24 |
|
|
'C','T','R','L', 'C','T','S','L', 'C','T','T','L', 'C','T','V','L', 'C','T','W','L', |
25 |
|
|
'C','T','Y','L', 'G','A','A','E', 'G','A','C','D', 'G','A','G','E', 'G','A','R','E', |
26 |
|
|
'G','A','T','D', 'G','A','Y','D', 'G','C','A','A', 'G','C','B','A', 'G','C','C','A', |
27 |
|
|
'G','C','D','A', 'G','C','G','A', 'G','C','H','A', 'G','C','K','A', 'G','C','M','A', |
28 |
|
|
'G','C','N','A', 'G','C','R','A', 'G','C','S','A', 'G','C','T','A', 'G','C','V','A', |
29 |
|
|
'G','C','W','A', 'G','C','Y','A', 'G','G','A','G', 'G','G','B','G', 'G','G','C','G', |
30 |
|
|
'G','G','D','G', 'G','G','G','G', 'G','G','H','G', 'G','G','K','G', 'G','G','M','G', |
31 |
|
|
'G','G','N','G', 'G','G','R','G', 'G','G','S','G', 'G','G','T','G', 'G','G','V','G', |
32 |
|
|
'G','G','W','G', 'G','G','Y','G', 'G','T','A','V', 'G','T','B','V', 'G','T','C','V', |
33 |
|
|
'G','T','D','V', 'G','T','G','V', 'G','T','H','V', 'G','T','K','V', 'G','T','M','V', |
34 |
|
|
'G','T','N','V', 'G','T','R','V', 'G','T','S','V', 'G','T','T','V', 'G','T','V','V', |
35 |
|
|
'G','T','W','V', 'G','T','Y','V', 'M','G','A','R', 'M','G','G','R', 'M','G','R','R', |
36 |
|
|
'N','N','N','X', 'R','A','Y','B', 'S','A','R','Z', 'T','A','A','.', 'T','A','C','Y', |
37 |
|
|
'T','A','G','.', 'T','A','R','.', 'T','A','T','Y', 'T','A','Y','Y', 'T','C','A','S', |
38 |
|
|
'T','C','B','S', 'T','C','C','S', 'T','C','D','S', 'T','C','G','S', 'T','C','H','S', |
39 |
|
|
'T','C','K','S', 'T','C','M','S', 'T','C','N','S', 'T','C','R','S', 'T','C','S','S', |
40 |
|
|
'T','C','T','S', 'T','C','V','S', 'T','C','W','S', 'T','C','Y','S', 'T','G','A','.', |
41 |
|
|
'T','G','C','C', 'T','G','G','W', 'T','G','T','C', 'T','G','Y','C', 'T','R','A','.', |
42 |
|
|
'T','T','A','L', 'T','T','C','F', 'T','T','G','L', 'T','T','R','L', 'T','T','T','F', |
43 |
|
|
'T','T','Y','F', 'X','X','X','X', 'Y','T','A','L', 'Y','T','G','L', 'Y','T','R','L' |
44 |
|
|
}; |
45 |
|
|
|
46 |
|
|
|
47 |
|
|
static bool isCodonTableReady=codonTableInit(); |
48 |
|
|
|
49 |
|
|
unsigned short packCodon(char n1, char n2, char n3) { |
50 |
|
|
//assumes they are uppercase already! |
51 |
|
|
byte b1=n1-'A'; |
52 |
|
|
byte b2=n2-'A'; |
53 |
|
|
byte b3=n3-'A'; |
54 |
|
|
b1 |= (b2 << 5); |
55 |
|
|
b2 = (b2 >> 3) | (b3 << 2); |
56 |
|
|
return ( ((unsigned short)b2) << 8) + b1; |
57 |
|
|
} |
58 |
|
|
|
59 |
|
|
bool codonTableInit() { |
60 |
|
|
memset((void*)codonTable, 'X', 32768); |
61 |
|
|
int cdsize=sizeof(codonData); |
62 |
|
|
for (int i=0;i<cdsize;i+=4) { |
63 |
|
|
unsigned short aacode=packCodon(codonData[i], codonData[i+1], codonData[i+2]); |
64 |
|
|
codonTable[aacode]=codonData[i+3]; |
65 |
|
|
} |
66 |
|
|
return true; |
67 |
|
|
} |
68 |
|
|
|
69 |
|
|
|
70 |
|
|
char Codon::translate() { |
71 |
|
|
for (byte i=0;i<3;i++) nuc[i]=toupper(nuc[i]); |
72 |
|
|
unsigned short aacode=packCodon(nuc[0], nuc[1], nuc[2]); |
73 |
|
|
return codonTable[aacode]; |
74 |
|
|
} |
75 |
|
|
|
76 |
|
|
//simple 1st frame forward translation of a given DNA string |
77 |
|
|
// allocate and returns the translation string |
78 |
|
|
char* translateDNA(const char* dnastr, int& aalen, int dnalen) { |
79 |
|
|
if (dnastr==NULL || *dnastr==0) return NULL; |
80 |
|
|
if (dnalen==0) dnalen=strlen(dnastr); |
81 |
|
|
aalen=dnalen/3; |
82 |
|
|
char* r=NULL; |
83 |
|
|
GMALLOC(r, aalen+1); |
84 |
|
|
r[aalen]=0; |
85 |
|
|
int ai=0; |
86 |
|
|
for (int i=0;i+2<dnalen;i+=3,ai++) { |
87 |
|
|
r[ai]=codonTable[packCodon(toupper(dnastr[i]),toupper(dnastr[i+1]),toupper(dnastr[i+2]))]; |
88 |
|
|
} |
89 |
|
|
return r; |
90 |
|
|
} |