ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/GBase.h
Revision: 135
Committed: Mon Dec 12 22:28:38 2011 UTC (12 years, 8 months ago) by gpertea
File size: 13996 byte(s)
Log Message:
wip - SplicedSAMHitFactory() still not implemented

Line File contents
1 #ifndef G_BASE_DEFINED
2 #define G_BASE_DEFINED
3 #ifndef _POSIX_SOURCE
4 //mostly for MinGW
5 #define _POSIX_SOURCE
6 #endif
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #include <string.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <math.h>
14 #include <limits.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <stdint.h>
18
19 #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20 #ifndef __WIN32__
21 #define __WIN32__
22 #endif
23 #include <windows.h>
24 #include <io.h>
25 #define CHPATHSEP '\\'
26 #undef off_t
27 #define off_t int64_t
28 #ifdef _fseeki64
29 #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
30 #else
31 /*
32 #define _DEFINE_WIN32_FSEEKO
33 int fseeko(FILE *stream, off_t offset, int whence);
34 */
35 #define fseeko fseek
36 #endif
37 #ifdef _ftelli64
38 #define ftello(stream) _ftelli64(stream)
39 #else
40 /*
41 #define _DEFINE_WIN32_FTELLO
42 off_t ftello(FILE *stream);
43 */
44 #define ftello ftell
45 #endif
46 #else
47 #define CHPATHSEP '/'
48 #include <unistd.h>
49 #endif
50
51 #ifndef fseeko
52 #define fseeko fseek
53 #endif
54 #ifndef ftello
55 #define ftello ftell
56 #endif
57
58 #ifdef DEBUG
59 #undef NDEBUG
60 #endif
61
62 typedef int32_t int32;
63 typedef uint32_t uint32;
64
65 typedef unsigned char uchar;
66 typedef unsigned char byte;
67
68 #ifndef MAXUINT
69 #define MAXUINT ((unsigned int)-1)
70 #endif
71
72 #ifndef MAXINT
73 #define MAXINT INT_MAX
74 #endif
75
76 #ifndef MAX_UINT
77 #define MAX_UINT ((unsigned int)-1)
78 #endif
79
80 #ifndef MAX_INT
81 #define MAX_INT INT_MAX
82 #endif
83
84 typedef int64_t int64;
85 typedef uint64_t uint64;
86
87 /****************************************************************************/
88
89 #ifndef EXIT_FAILURE
90 #define EXIT_FAILURE 1
91 #endif
92
93 #ifndef EXIT_SUCCESS
94 #define EXIT_SUCCESS 0
95 #endif
96
97 /****************************************************************************/
98 #define ERR_ALLOC "Error allocating memory.\n"
99
100 //-------------------
101
102 // Debug helpers
103 #ifndef NDEBUG
104 #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
105 #ifdef TRACE
106 #define GTRACE(exp) (GMessage exp)
107 #else
108 #define GTRACE(exp) ((void)0)
109 #endif
110 #else
111 #define GASSERT(exp) ((void)0)
112 #define GTRACE(exp) ((void)0)
113 #endif
114
115 #define GERROR(exp) (GError exp)
116 /********************************** Macros ***********************************/
117 // Abolute value
118 #define GABS(val) (((val)>=0)?(val):-(val))
119
120 // Min and Max
121 #define GMAX(a,b) (((a)>(b))?(a):(b))
122 #define GMIN(a,b) (((a)>(b))?(b):(a))
123
124 // Min of three
125 #define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
126
127 // Max of three
128 #define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
129
130 // Return minimum and maximum of a, b
131 #define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
132
133 // Clamp value x to range [lo..hi]
134 #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
135
136 typedef void* pointer;
137 typedef unsigned int uint;
138
139 typedef int GCompareProc(const pointer item1, const pointer item2);
140 typedef void GFreeProc(pointer item); //usually just delete,
141 //but may also support structures with embedded dynamic members
142
143 #define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \
144 GError(ERR_ALLOC)
145 #define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \
146 GError(ERR_ALLOC)
147 #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
148 GError(ERR_ALLOC)
149 #define GFREE(ptr) GFree((pointer*)(&ptr))
150
151 inline char* strMin(char *arg1, char *arg2) {
152 return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
153 }
154
155 inline char* strMax(char *arg1, char *arg2) {
156 return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
157 }
158
159 inline int iround(double x) {
160 return (int)floor(x + 0.5);
161 }
162
163 /****************************************************************************/
164
165 inline int Gintcmp(int a, int b) {
166 //return (a>b)? 1 : ((a==b)?0:-1);
167 return a-b;
168 }
169
170 int Gstrcmp(char* a, char* b);
171 //same as strcmp but doesn't crash on NULL pointers
172
173 int Gstricmp(const char* a, const char* b);
174
175 inline void swap(int &arg1, int &arg2){
176 //arg1 ^= arg2;
177 //arg2 ^= arg1;
178 //arg1 ^= arg2;
179 register int swp=arg1;
180 arg1=arg2; arg2=swp;
181 }
182
183 inline void swap(char* &arg1, char* &arg2){ //swap pointers!
184 register char* swp=arg1;
185 arg1=arg2; arg2=swp;
186 }
187
188 inline void swap(uint &arg1, uint &arg2) {
189 register uint swp=arg1;
190 arg1=arg2; arg2=swp;
191 }
192
193 inline void swap(short &arg1, short &arg2) {
194 register short swp=arg1;
195 arg1=arg2; arg2=swp;
196 }
197
198 inline void swap(unsigned short &arg1, unsigned short &arg2) {
199 register unsigned short swp=arg1;
200 arg1=arg2; arg2=swp;
201 }
202
203 inline void swap(long &arg1, long &arg2) {
204 register long swp=arg1;
205 arg1=arg2; arg2=swp;
206 }
207
208 inline void swap(unsigned long &arg1, unsigned long &arg2) {
209 register unsigned long swp=arg1;
210 arg1=arg2; arg2=swp;
211 }
212
213
214 inline void swap(char &arg1, char &arg2) {
215 register char swp=arg1;
216 arg1=arg2; arg2=swp;
217 }
218
219 inline void swap(unsigned char &arg1, unsigned char &arg2) {
220 register unsigned char swp=arg1;
221 arg1=arg2; arg2=swp;
222 }
223
224 /**************** Memory management ***************************/
225
226 bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
227 bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
228 bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
229 void GFree(pointer* ptr); // Free memory, resets ptr to NULL
230
231
232 int saprintf(char **retp, const char *fmt, ...);
233
234 void GError(const char* format,...); // Error routine (aborts program)
235 void GMessage(const char* format,...);// Log message to stderr
236 // Assert failed routine:- usually not called directly but through GASSERT
237 void GAssert(const char* expression, const char* filename, unsigned int lineno);
238
239 // ****************** string manipulation *************************
240 char *Gstrdup(const char* str);
241 //duplicate a string by allocating a copy for it and returning it
242 char* Gstrdup(const char* sfrom, const char* sto);
243 //same as GStrdup, but with an early termination (e.g. on delimiter)
244
245 char* Gsubstr(const char* str, char* from, char* to=NULL);
246 //extracts a substring, allocating it, including boundaries (from/to)
247
248 int strsplit(char* str, char** fields, int maxfields, const char* delim);
249 int strsplit(char* str, char** fields, int maxfields, const char delim);
250 int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
251
252 char* replaceStr(char* &str, char* newvalue);
253
254 //conversion: to Lower/Upper case
255 // creating a new string:
256 char* upCase(const char* str);
257 char* loCase(const char* str);
258 // changing string in place:
259 char* strlower(char * str);
260 char* strupper(char * str);
261
262 //strstr but for memory zones: scans a memory region
263 //for a substring:
264 void* Gmemscan(void *mem, unsigned int len,
265 void *part, unsigned int partlen);
266
267 // test if a char is in a string:
268 bool chrInStr(char c, const char* str);
269
270 char* rstrchr(char* str, char ch);
271 /* returns a pointer to the rightmost
272 occurence of ch in str - like rindex for platforms missing it*/
273
274 char* strchrs(const char* s, const char* chrs);
275 //strchr but with a set of chars instead of only one
276
277 char* rstrfind(const char* str, const char *substr);
278 // like rindex() but for strings; right side version of strstr()
279
280 char* reverseChars(char* str, int slen=0); //in place reversal of string
281
282 char* rstrstr(const char* rstart, const char *lend, const char* substr);
283 /*the reversed, rightside equivalent of strstr: starts searching
284 from right end (rstart), going back to left end (lend) and returns
285 a pointer to the last (right) matching character in str */
286
287 char* strifind(const char* str, const char* substr);
288 // the case insensitive version of strstr -- finding a string within a strin
289
290
291 //Determines if a string begins with a given prefix
292 //(returns false when any of the params is NULL,
293 // but true when prefix is '' (empty string)!)
294 bool startsWith(const char* s, const char* prefix);
295
296 bool endsWith(const char* s, const char* suffix);
297 //Note: returns true if suffix is empty string, but false if it's NULL
298
299
300 // ELF hash function for strings
301 int strhash(const char* str);
302
303
304
305 //---- generic base GSeg : genomic segment (interval) --
306 // coordinates are considered 1-based (so 0 is invalid)
307 class GSeg {
308 public:
309 uint start; //start<end always!
310 uint end;
311 GSeg(uint s=0,uint e=0) {
312 if (s>e) { start=e;end=s; }
313 else { start=s;end=e; }
314 }
315 //check for overlap with other segment
316 uint len() { return end-start+1; }
317 bool overlap(GSeg* d) {
318 //return start<d->start ? (d->start<=end) : (start<=d->end);
319 return (start<=d->end && end>=d->start);
320 }
321
322 bool overlap(GSeg& d) {
323 //return start<d.start ? (d.start<=end) : (start<=d.end);
324 return (start<=d.end && end>=d.start);
325 }
326
327 bool overlap(GSeg& d, int fuzz) {
328 //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
329 return (start<=d.end+fuzz && end+fuzz>=d.start);
330 }
331
332 bool overlap(uint s, uint e) {
333 if (s>e) { swap(s,e); }
334 //return start<s ? (s<=end) : (start<=e);
335 return (start<=e && end>=s);
336 }
337
338 //return the length of overlap between two segments
339 int overlapLen(GSeg* r) {
340 if (start<r->start) {
341 if (r->start>end) return 0;
342 return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
343 }
344 else { //r->start<=start
345 if (start>r->end) return 0;
346 return (r->end<end)? r->end-start+1 : end-start+1;
347 }
348 }
349 int overlapLen(uint rstart, uint rend) {
350 if (rstart>rend) { swap(rstart,rend); }
351 if (start<rstart) {
352 if (rstart>end) return 0;
353 return (rend>end) ? end-rstart+1 : rend-rstart+1;
354 }
355 else { //rstart<=start
356 if (start>rend) return 0;
357 return (rend<end)? rend-start+1 : end-start+1;
358 }
359 }
360
361 //fuzzy coordinate matching:
362 bool coordMatch(GSeg* s, uint fuzz=0) {
363 if (fuzz==0) return (start==s->start && end==s->end);
364 uint sd = (start>s->start) ? start-s->start : s->start-start;
365 uint ed = (end>s->end) ? end-s->end : s->end-end;
366 return (sd<=fuzz && ed<=fuzz);
367 }
368 //comparison operators required for sorting
369 bool operator==(GSeg& d){
370 return (start==d.start && end==d.end);
371 }
372 bool operator>(GSeg& d){
373 return (start==d.start)?(end>d.end):(start>d.start);
374 }
375 bool operator<(GSeg& d){
376 return (start==d.start)?(end<d.end):(start<d.start);
377 }
378 };
379
380
381
382 //--------------------------------------------------------
383 // ************** simple line reading class for text files
384
385 //GLineReader -- text line reading/buffering class
386 class GLineReader {
387 bool closeFile;
388 int len;
389 int allocated;
390 char* buf;
391 bool isEOF;
392 FILE* file;
393 off_t filepos; //current position
394 bool pushed; //pushed back
395 int lcount; //line counter (read lines)
396 public:
397 char* chars() { return buf; }
398 char* line() { return buf; }
399 int readcount() { return lcount; } //number of lines read
400 void setFile(FILE* stream) { file=stream; }
401 int length() { return len; }
402 int size() { return len; } //same as size();
403 bool isEof() {return isEOF; }
404 bool eof() { return isEOF; }
405 off_t getfpos() { return filepos; }
406 off_t getFpos() { return filepos; }
407 char* nextLine() { return getLine(); }
408 char* getLine() { if (pushed) { pushed=false; return buf; }
409 else return getLine(file); }
410 char* getLine(FILE* stream) {
411 if (pushed) { pushed=false; return buf; }
412 else return getLine(stream, filepos); }
413 char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
414 // the given file position
415 void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
416 // so the next call will in fact return the same line
417 GLineReader(const char* fname) {
418 FILE* f=fopen(fname, "rb");
419 if (f==NULL) GError("Error opening file '%s'!\n",fname);
420 closeFile=true;
421 init(f);
422 }
423 GLineReader(FILE* stream=NULL, off_t fpos=0) {
424 closeFile=false;
425 init(stream,fpos);
426 }
427 void init(FILE* stream, off_t fpos=0) {
428 len=0;
429 isEOF=false;
430 allocated=1024;
431 GMALLOC(buf,allocated);
432 lcount=0;
433 buf[0]=0;
434 file=stream;
435 filepos=fpos;
436 pushed=false;
437 }
438 ~GLineReader() {
439 GFREE(buf);
440 if (closeFile) fclose(file);
441 }
442 };
443
444
445 /* extended fgets() - to read one full line from a file and
446 update the file position correctly !
447 buf will be reallocated as necessary, to fit the whole line
448 */
449 char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
450
451
452 //print int/values nicely formatted in 3-digit groups
453 char* commaprint(uint64 n);
454
455 /*********************** File management functions *********************/
456
457 // removes the last part (file or directory name) of a full path
458 // WARNING: this is a destructive operation for the given string!
459 void delFileName(char* filepath);
460
461 // returns a pointer to the last file or directory name in a full path
462 const char* getFileName(const char* filepath);
463 // returns a pointer to the file "extension" part in a filename
464 const char* getFileExt(const char* filepath);
465
466
467 int fileExists(const char* fname);
468 //returns 0 if file entry doesn't exist
469 // 1 if it's a directory
470 // 2 if it's a regular file
471 // 3 otherwise (?)
472
473 int64 fileSize(const char* fpath);
474
475 //write a formatted fasta record, fasta formatted
476 void writeFasta(FILE *fw, const char* seqid, const char* descr,
477 const char* seq, int linelen=60, int seqlen=0);
478
479 //parses the next number found in a string at the current position
480 //until a non-digit (and not a '.', 'e','E','-','+') is encountered;
481 //updates the char* pointer to be after the last digit parsed
482 bool parseNumber(char* &p, double& v);
483 bool parseDouble(char* &p, double& v); //just an alias for parseNumber
484
485 bool parseInt(char* &p, int& i);
486 bool parseUInt(char* &p, uint& i);
487 bool parseHex(char* &p, uint& i);
488
489 #endif /* G_BASE_DEFINED */