ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/gdna.cpp
Revision: 173
Committed: Wed Feb 15 03:34:29 2012 UTC (12 years, 6 months ago) by gpertea
File size: 2081 byte(s)
Log Message:
wip fqtrim

Line User Rev File contents
1 gpertea 2 #include "gdna.h"
2     #include <string.h>
3    
4 gpertea 171 const char* IUPAC_2BIT ="AACCTTGGTTAAAAAACCCCGGAAAAAACCAAAAAA";
5     const char* IUPAC_2BITN ="001133223300000011112200000011000000";
6     const char* IUPAC_DEFS ="AaCcTtGgUuMmRrWwSsYyKkVvHhDdBbNnXx-*";
7     const char* IUPAC_COMP ="TtGgAaCcAaKkYyWwSsRrMmBbDdHhVvNnXx-*";
8 gpertea 2
9 gpertea 171 #define A_2BIT 0 // 00
10     #define C_2BIT 1 // 01
11     #define G_2BIT 2 // 10
12     #define T_2BIT 3 // 11
13 gpertea 2
14 gpertea 171 static byte ntCompTable[256];
15     static byte nt2bit[256]; //maps any character to a 2bit base value (with N = A)
16     static char v_2bit2nt[4] = {'A','C','G','T'};
17 gpertea 2
18 gpertea 171 //----------------------
19    
20     static bool gdna_Ready=gDnaInit();
21    
22     //----------------------
23    
24     byte gdna2bit(char* &nt, int n) {
25     // Pack n bases into a byte (n can be 1..4)
26     byte out = 0;
27     while (n && *nt) {
28     n--;
29     out <<= 2;
30     out += nt2bit[(int)*nt];
31     nt++;
32     }
33 gpertea 173 #ifdef GDEBUG
34     if (n) {
35     GError("Error: attempt to read 6-mer beyond the end of the string!\n");
36     }
37     #endif
38 gpertea 171 return out;
39     }
40    
41    
42 gpertea 2 char ntComplement(char c) {
43     return ntCompTable[(int)c];
44     }
45    
46 gpertea 171 char g2bit2base(byte v2bit) {
47     return v_2bit2nt[v2bit & 0x03 ];
48     }
49    
50 gpertea 2 //in place reverse complement of nucleotide (sub)sequence
51     char* reverseComplement(char* seq, int slen) {
52     if (slen==0) slen=strlen(seq);
53     //reverseChars(seq,len);
54     int l=0;
55     int r=slen-1;
56     register char c;
57     while (l<r) {
58     c=seq[l];seq[l]=seq[r];
59 gpertea 144 seq[r]=c; //this was: Gswap(str[l],str[r]);
60 gpertea 2 l++;r--;
61     }
62     for (int i=0;i<slen;i++) seq[i]=ntComplement(seq[i]);
63     return seq;
64     }
65    
66 gpertea 171 bool gDnaInit() {
67     if (gdna_Ready) return true;
68 gpertea 2 int l=strlen(IUPAC_DEFS);
69     ntCompTable[0]=0;
70 gpertea 171 nt2bit[0]=0;
71 gpertea 2 for (int ch=1;ch<256;ch++) {
72     ntCompTable[ch]=0;
73 gpertea 171 nt2bit[ch]=0;
74 gpertea 2 for (int i=0;i<l;i++)
75 gpertea 171 if (ch==IUPAC_DEFS[i]) {
76     ntCompTable[ch]=IUPAC_COMP[i];
77     nt2bit[ch] = IUPAC_2BITN[i]-'0';
78 gpertea 2 break;
79     }
80 gpertea 171 if (ntCompTable[ch]==0) {
81 gpertea 2 ntCompTable[ch]='N';
82 gpertea 171 }
83 gpertea 2 }
84 gpertea 171 gdna_Ready=true;
85 gpertea 2 return true;
86     }