ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/cdbfasta/gcdbz.cpp
Revision: 15
Committed: Mon Jul 18 20:53:45 2011 UTC (13 years, 1 month ago) by gpertea
File size: 8926 byte(s)
Log Message:
sync with local src

Line File contents
1 #include "gcdbz.h"
2
3 GCdbz::GCdbz(FILE* azf, bool uc, int zrsize) {
4 uncompress=uc;
5 zrecsize=-1;
6 zpos=0;
7 defline_cap=1024;
8 begin_defline();
9 GMALLOC(defline, defline_cap);
10 zf=azf;
11 // FULL_FLUSH method instead of finish:
12 if (uncompress)
13 decomp_start(zrsize);
14 else
15 compress_start();
16 }
17
18 GCdbz::~GCdbz() {
19 //if (zf!=NULL && zf!=stdout && zf!=stdin) fclose(zf);
20 // FULL_FLUSH method instead of finish
21 if (uncompress) decomp_end();
22 else
23 if (!zclosed) compress_end();
24 GFREE(defline);
25 }
26
27
28
29 void GCdbz::extend_defline(int ch) {
30 if (defline_len+1 >= defline_cap) {
31 defline_cap+=(defline_cap>>2);
32 GREALLOC(defline, defline_cap);
33 }
34 defline[defline_len]= ch;
35 defline_len++;
36 }
37
38
39 #define DUMMY_ZREC ">AA1234567890 DNA protein\n\
40 ACGTTGCTAGCT\n\
41 NRMTPYYHEIEP\n\
42 RTASNTSPTPNS\n\
43 IKSAHPAEPPKR\n"
44
45 void GCdbz::compress_start() {
46 //initialize zstream compression
47 zstream.zalloc = (alloc_func)0; //no alloc function to use
48 zstream.zfree = (free_func)0; //no free function to use
49 zstream.opaque = (voidpf)0; //no private object to pass to zalloc/zfree
50
51 int err=deflateInit(&zstream, Z_DEFAULT_COMPRESSION);
52 if (err!=Z_OK)
53 GError("GCdbz error: deflateInit failed!(err=%d)\n",err);
54 zclosed=false;
55 //write a dummy record as the first record,
56 //so we can use random access (FULL_FLUSH style) later
57 char ztag[5];strcpy(ztag, "CDBZ");
58 uint32 zsize=0;
59 zstream.next_in = (Bytef*)sbuf;
60 strcpy(sbuf, DUMMY_ZREC);
61 zstream.avail_in=strlen(sbuf);
62 zstream.next_out = (Bytef*)lbuf;
63 zstream.avail_out = GCDBZ_LBUF_LEN;
64 uLong t_out=zstream.total_out;
65 err = deflate(&zstream, Z_FULL_FLUSH);
66 zsize=zstream.total_out-t_out;
67 if ((err !=Z_OK && err!=Z_STREAM_END) || zsize<=0)
68 GError("GCdbz error: deflate 1st record failed! (err=%d)\n", err);
69 //now write the header and the dummy record
70 //in case this was not done before:
71 gcvt_endian_setup();
72 //gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86;
73 uint32 zfv = gcvt_uint(&zsize);
74 if (fwrite(ztag, 1, 4, zf)<4 ||
75 fwrite(&zfv,1,sizeof(uint32), zf) < sizeof(uint32) ||
76 fwrite(lbuf, 1, zsize, zf) < zsize)
77 GError("Error writing 1st deflated record!\n");
78 zpos+=4+sizeof(uint32)+zsize;
79 }
80
81 void GCdbz::compress_end() {
82 zstream.next_out = (Bytef*)lbuf;
83 zstream.avail_out = GCDBZ_LBUF_LEN;
84 zstream.avail_in = 0;
85 uLong t_out=zstream.total_out;
86 int err = deflate(&zstream, Z_FINISH);
87 if (err != Z_STREAM_END) {
88 GError("GCdbz error: deflate/Z_FINISH() failed! (err=%d) \n", err);
89 }
90 uLong toWrite=zstream.total_out-t_out;
91 if (toWrite>0) {
92 if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
93 GError("Error writing FINISH deflate chunk!\n");
94 //GError("GCdbz error: out data after Z_FINISH (%d bytes)\n",
95 // zstream.total_out-t_out);
96 }
97 err=deflateEnd(&zstream);
98 if (err!=Z_OK)
99 GError("GCdbz error: deflateEnd() failed! (err=%d) \n", err);
100 zclosed=true;
101 }
102
103 char* GCdbz::compress(GReadBuf *readbuf, char* delim) {
104 //compress everything coming from the input stream inf
105 //until \n is encountered followed by delim
106 //returns this->defline or NULL if error encountered
107
108 //-- WARNING: this subrutine assumes that inf file position
109 // is at the beginning of the record, right AFTER the delim
110 // (exactly as left after a previous call)
111 if (zf==NULL || uncompress)
112 GError("GCdbz Error: cannot use compress() method !\n");
113 unsigned int total_out=0;
114 int c=0;
115 bool in_rec=true;
116 int delimlen=strlen(delim);
117 zrecsize=0;
118 if ((c=readbuf->peekCmp(delim, delimlen))!=0) {
119 if (c<-1) return NULL; //end of file reached
120 GError("GCdbZ::compress error: delimiter '%s' expected at record start!\n",
121 delim);
122 }
123 bool bol=false; //beginning of line flag
124 int deflate_flag=0;
125 begin_defline();
126 int rec_pos=0;
127 int err=0;
128 while (in_rec) { // main read loop
129 int bytes_read=0;
130 while ((c=readbuf->getch())>=0) {
131 sbuf[bytes_read++]=c;
132 if (c=='\n' || c=='\r') { //beginning of line
133 bol = true;
134 if (in_defline) end_defline();
135 //look_ahead for record delimiter:
136 if (readbuf->peekCmp(delim, delimlen)==0) {
137 in_rec=false;
138 break;
139 }
140 }
141 else bol = false;
142 if (rec_pos>delimlen-1 && in_defline)
143 extend_defline(c);
144 rec_pos++;
145 if (bytes_read == GCDBZ_SBUF_LEN) break;
146 }//while not EOF or space in buffer
147 /*if (bytes_read==0)
148 return NULL;*/
149 if (c==EOF) {
150 in_rec=false;
151 if (in_defline) end_defline();
152 }
153 zstream.next_in = (Bytef*)sbuf;
154 zstream.avail_in = bytes_read;
155 //deflate_flag = in_rec ? 0 : Z_FINISH;
156 deflate_flag = in_rec ? 0 : Z_FULL_FLUSH;
157 do { //compression loop
158 zstream.next_out = (Bytef*)lbuf;
159 zstream.avail_out = GCDBZ_LBUF_LEN;
160 uLong t_out=zstream.total_out;
161 err = deflate(&zstream, deflate_flag);
162 if (err !=Z_OK && err!=Z_STREAM_END)
163 GError("GCdbz error: deflate failed! (err=%d)\n", err);
164 uLong toWrite=zstream.total_out-t_out;
165 if (toWrite>0) {
166 if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
167 GError("Error writing deflate chunk!\n");
168 total_out+=toWrite;
169 zrecsize+=toWrite;
170 zpos+=toWrite;
171 }
172 } while (err!=Z_STREAM_END && zstream.avail_out==0);//compression loop
173 } //read loop
174 //if (deflate_flag!=Z_FINISH)
175 if (deflate_flag!=Z_FULL_FLUSH)
176 GError("Deflate flag not set to FINISH!\n");
177 return defline;
178 }
179
180
181 void GCdbz::decomp_start(int zrsize) {
182 zstream.zalloc = (alloc_func)0;
183 zstream.zfree = (free_func)0;
184 zstream.opaque = (voidpf)0;
185 zstream.next_in = (Bytef*)sbuf;
186 zstream.avail_in = 0;
187 zstream.next_out = (Bytef*)lbuf;
188 int err = inflateInit(&zstream);
189 if (err!=Z_OK)
190 GMessage("Error at inflateInit()\n");
191 //-- now read and discard the first record, so we can use random access later
192 // (needed by zlib)
193 int bytes_read=fread(sbuf, 1, zrsize, zf);
194 if (bytes_read<zrsize)
195 GError("Error reading 1st record from zrec file\n");
196 zstream.next_in = (Bytef*)sbuf;
197 zstream.avail_in = bytes_read;
198 //decompress first chunk
199 zstream.next_out = (Bytef*)lbuf;
200 zstream.avail_out = GCDBZ_LBUF_LEN;
201 err = inflate(&zstream, Z_SYNC_FLUSH);
202 if (err !=Z_OK && err!=Z_STREAM_END)
203 GError("GCdbz error: 1st record inflate failed! (err=%d)\n",err);
204 }
205
206 void GCdbz::decomp_end() {
207 int err = inflateEnd(&zstream);
208 if (err!=Z_OK)
209 GError("Error at inflateEnd() (err=%d)\n", err);
210
211 }
212
213
214 //record decompress
215 //returns: the number of bytes decompressed
216 int GCdbz::decompress(FILE* outf, int csize, int zfofs) {
217 if (zfofs>=0) {
218 if (fseeko(zf, zfofs, 0))
219 GError("GCdbz::decompress: error fseeko() to %d\n", zfofs);
220 }
221 else
222 if (feof(zf)) return 0;
223 bool in_rec=true;
224 int err=0;
225 int total_read=0;
226 int total_written=0;
227 while (in_rec) { // main read loop
228 int to_read=0;
229 int bytes_read=0;
230 if (csize<=0) { //read one byte at a time
231 to_read=1;
232 int c;
233 if ((c =fgetc(zf))!=EOF) {
234 bytes_read = 1;
235 sbuf[0]=c;
236 }
237 else {
238 //bytes_read=0;
239 return 0; //eof
240 }
241 total_read+=bytes_read;
242 }
243 else {
244 to_read = csize-total_read>GCDBZ_SBUF_LEN ?
245 GCDBZ_SBUF_LEN : csize-total_read;
246 // check for csize vs bytes_read match:
247 if (to_read==0) return 0;
248 bytes_read=fread(sbuf, 1, to_read, zf);
249 if (bytes_read!=to_read)
250 GError("Error reading from zrec file\n");
251 total_read+=bytes_read;
252 in_rec=(total_read<csize);
253 }
254 if (bytes_read==0) {
255 //GMessage("bytes_read = 0\n");
256 return 0;
257 }
258 if (in_rec && bytes_read<to_read) in_rec=false;
259 zstream.next_in = (Bytef*)sbuf;
260 zstream.avail_in = bytes_read;
261
262 do { //decompression loop
263 zstream.next_out = (Bytef*)lbuf;
264 zstream.avail_out = GCDBZ_LBUF_LEN;
265 uLong t_out=zstream.total_out;
266 err = inflate(&zstream, Z_SYNC_FLUSH);
267 uLong toWrite=zstream.total_out-t_out;
268 if (toWrite>0) {
269 if (fwrite(lbuf, 1, toWrite, outf)<toWrite) {
270 GError("Error writing inflated chunk!\n");
271 }
272 total_written+=toWrite;
273 }
274 if (err==Z_STREAM_END) {
275 in_rec=false;
276 if (total_written==0) {
277 GMessage("Z_STREAM_END found but total_written=0!\n");
278 }
279 break;
280 }
281 else if (err !=Z_OK)
282 GError("GCdbz error: inflate failed! (err=%d)\n",err);
283 } while (zstream.avail_in!=0); //decompression loop
284 } //read loop
285 /*if (err!=Z_STREAM_END) {
286 GError("decompress: Z_STREAM_END not found!\n");
287 }*/
288 return total_written;
289 }