ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/codons.cpp
Revision: 2
Committed: Mon Mar 22 22:03:27 2010 UTC (14 years, 5 months ago) by gpertea
File size: 4582 byte(s)
Log Message:
added my gclib source files

Line User Rev File contents
1 gpertea 2 #include "codons.h"
2    
3     static char codonTable[32768]; //32K table for fasta codon decoding
4     // codons are encoded as triplets of 5-bit-encoded nucleotides
5     // (so any codon can be encoded/decoded as a unique 15-bit value)
6    
7     static char codonData[]={ //long list of 3+1 characters (codon+translation)
8     'A','A','A','K', 'A','A','C','N', 'A','A','G','K', 'A','A','R','K', 'A','A','T','N',
9     'A','A','Y','N', 'A','C','A','T', 'A','C','B','T', 'A','C','C','T', 'A','C','D','T',
10     'A','C','G','T', 'A','C','H','T', 'A','C','K','T', 'A','C','M','T', 'A','C','N','T',
11     'A','C','R','T', 'A','C','S','T', 'A','C','T','T', 'A','C','V','T', 'A','C','W','T',
12     'A','C','Y','T', 'A','G','A','R', 'A','G','C','S', 'A','G','G','R', 'A','G','R','R',
13     'A','G','T','S', 'A','G','Y','S', 'A','T','A','I', 'A','T','C','I', 'A','T','G','M',
14     'A','T','H','I', 'A','T','M','I', 'A','T','T','I', 'A','T','W','I', 'A','T','Y','I',
15     'C','A','A','Q', 'C','A','C','H', 'C','A','G','Q', 'C','A','R','Q', 'C','A','T','H',
16     'C','A','Y','H', 'C','C','A','P', 'C','C','B','P', 'C','C','C','P', 'C','C','D','P',
17     'C','C','G','P', 'C','C','H','P', 'C','C','K','P', 'C','C','M','P', 'C','C','N','P',
18     'C','C','R','P', 'C','C','S','P', 'C','C','T','P', 'C','C','V','P', 'C','C','W','P',
19     'C','C','Y','P', 'C','G','A','R', 'C','G','B','R', 'C','G','C','R', 'C','G','D','R',
20     'C','G','G','R', 'C','G','H','R', 'C','G','K','R', 'C','G','M','R', 'C','G','N','R',
21     'C','G','R','R', 'C','G','S','R', 'C','G','T','R', 'C','G','V','R', 'C','G','W','R',
22     'C','G','Y','R', 'C','T','A','L', 'C','T','B','L', 'C','T','C','L', 'C','T','D','L',
23     'C','T','G','L', 'C','T','H','L', 'C','T','K','L', 'C','T','M','L', 'C','T','N','L',
24     'C','T','R','L', 'C','T','S','L', 'C','T','T','L', 'C','T','V','L', 'C','T','W','L',
25     'C','T','Y','L', 'G','A','A','E', 'G','A','C','D', 'G','A','G','E', 'G','A','R','E',
26     'G','A','T','D', 'G','A','Y','D', 'G','C','A','A', 'G','C','B','A', 'G','C','C','A',
27     'G','C','D','A', 'G','C','G','A', 'G','C','H','A', 'G','C','K','A', 'G','C','M','A',
28     'G','C','N','A', 'G','C','R','A', 'G','C','S','A', 'G','C','T','A', 'G','C','V','A',
29     'G','C','W','A', 'G','C','Y','A', 'G','G','A','G', 'G','G','B','G', 'G','G','C','G',
30     'G','G','D','G', 'G','G','G','G', 'G','G','H','G', 'G','G','K','G', 'G','G','M','G',
31     'G','G','N','G', 'G','G','R','G', 'G','G','S','G', 'G','G','T','G', 'G','G','V','G',
32     'G','G','W','G', 'G','G','Y','G', 'G','T','A','V', 'G','T','B','V', 'G','T','C','V',
33     'G','T','D','V', 'G','T','G','V', 'G','T','H','V', 'G','T','K','V', 'G','T','M','V',
34     'G','T','N','V', 'G','T','R','V', 'G','T','S','V', 'G','T','T','V', 'G','T','V','V',
35     'G','T','W','V', 'G','T','Y','V', 'M','G','A','R', 'M','G','G','R', 'M','G','R','R',
36     'N','N','N','X', 'R','A','Y','B', 'S','A','R','Z', 'T','A','A','.', 'T','A','C','Y',
37     'T','A','G','.', 'T','A','R','.', 'T','A','T','Y', 'T','A','Y','Y', 'T','C','A','S',
38     'T','C','B','S', 'T','C','C','S', 'T','C','D','S', 'T','C','G','S', 'T','C','H','S',
39     'T','C','K','S', 'T','C','M','S', 'T','C','N','S', 'T','C','R','S', 'T','C','S','S',
40     'T','C','T','S', 'T','C','V','S', 'T','C','W','S', 'T','C','Y','S', 'T','G','A','.',
41     'T','G','C','C', 'T','G','G','W', 'T','G','T','C', 'T','G','Y','C', 'T','R','A','.',
42     'T','T','A','L', 'T','T','C','F', 'T','T','G','L', 'T','T','R','L', 'T','T','T','F',
43     'T','T','Y','F', 'X','X','X','X', 'Y','T','A','L', 'Y','T','G','L', 'Y','T','R','L'
44     };
45    
46    
47     static bool isCodonTableReady=codonTableInit();
48    
49     unsigned short packCodon(char n1, char n2, char n3) {
50     //assumes they are uppercase already!
51     byte b1=n1-'A';
52     byte b2=n2-'A';
53     byte b3=n3-'A';
54     b1 |= (b2 << 5);
55     b2 = (b2 >> 3) | (b3 << 2);
56     return ( ((unsigned short)b2) << 8) + b1;
57     }
58    
59     bool codonTableInit() {
60     memset((void*)codonTable, 'X', 32768);
61     int cdsize=sizeof(codonData);
62     for (int i=0;i<cdsize;i+=4) {
63     unsigned short aacode=packCodon(codonData[i], codonData[i+1], codonData[i+2]);
64     codonTable[aacode]=codonData[i+3];
65     }
66     return true;
67     }
68    
69    
70     char Codon::translate() {
71     for (byte i=0;i<3;i++) nuc[i]=toupper(nuc[i]);
72     unsigned short aacode=packCodon(nuc[0], nuc[1], nuc[2]);
73     return codonTable[aacode];
74     }
75    
76     //simple 1st frame forward translation of a given DNA string
77     // allocate and returns the translation string
78     char* translateDNA(const char* dnastr, int& aalen, int dnalen) {
79     if (dnastr==NULL || *dnastr==0) return NULL;
80     if (dnalen==0) dnalen=strlen(dnastr);
81     aalen=dnalen/3;
82     char* r=NULL;
83     GMALLOC(r, aalen+1);
84     r[aalen]=0;
85     int ai=0;
86     for (int i=0;i+2<dnalen;i+=3,ai++) {
87     r[ai]=codonTable[packCodon(toupper(dnastr[i]),toupper(dnastr[i+1]),toupper(dnastr[i+2]))];
88     }
89     return r;
90     }