ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/GBase.h
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (13 years, 1 month ago) by gpertea
File size: 13918 byte(s)
Log Message:
adding tophat source work

Line User Rev File contents
1 gpertea 29 #ifndef G_BASE_DEFINED
2     #define G_BASE_DEFINED
3     #ifndef _POSIX_SOURCE
4     //mostly for MinGW
5     #define _POSIX_SOURCE
6     #endif
7     #ifdef HAVE_CONFIG_H
8     #include "config.h"
9     #endif
10     #include <string.h>
11     #include <stdlib.h>
12     #include <stdio.h>
13     #include <math.h>
14     #include <limits.h>
15     #include <sys/types.h>
16     #include <sys/stat.h>
17     #include <stdint.h>
18    
19     #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20     #ifndef __WIN32__
21     #define __WIN32__
22     #endif
23     #include <windows.h>
24     #include <io.h>
25     #define CHPATHSEP '\\'
26     #undef off_t
27     #define off_t int64_t
28     #ifdef _fseeki64
29     #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
30     #else
31     /*
32     #define _DEFINE_WIN32_FSEEKO
33     int fseeko(FILE *stream, off_t offset, int whence);
34     */
35     #define fseeko fseek
36     #endif
37     #ifdef _ftelli64
38     #define ftello(stream) _ftelli64(stream)
39     #else
40     /*
41     #define _DEFINE_WIN32_FTELLO
42     off_t ftello(FILE *stream);
43     */
44     #define ftello ftell
45     #endif
46     #else
47     #define CHPATHSEP '/'
48     #include <unistd.h>
49     #endif
50    
51    
52     #ifdef DEBUG
53     #undef NDEBUG
54     #endif
55    
56     typedef int32_t int32;
57     typedef uint32_t uint32;
58    
59     typedef unsigned char uchar;
60     typedef unsigned char byte;
61    
62     #ifndef MAXUINT
63     #define MAXUINT ((unsigned int)-1)
64     #endif
65    
66     #ifndef MAXINT
67     #define MAXINT INT_MAX
68     #endif
69    
70     #ifndef MAX_UINT
71     #define MAX_UINT ((unsigned int)-1)
72     #endif
73    
74     #ifndef MAX_INT
75     #define MAX_INT INT_MAX
76     #endif
77    
78     typedef int64_t int64;
79     typedef uint64_t uint64;
80    
81     /****************************************************************************/
82    
83     #ifndef EXIT_FAILURE
84     #define EXIT_FAILURE 1
85     #endif
86    
87     #ifndef EXIT_SUCCESS
88     #define EXIT_SUCCESS 0
89     #endif
90    
91     /****************************************************************************/
92     #define ERR_ALLOC "Error allocating memory.\n"
93    
94     //-------------------
95    
96     // Debug helpers
97     #ifndef NDEBUG
98     #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
99     #ifdef TRACE
100     #define GTRACE(exp) (GMessage exp)
101     #else
102     #define GTRACE(exp) ((void)0)
103     #endif
104     #else
105     #define GASSERT(exp) ((void)0)
106     #define GTRACE(exp) ((void)0)
107     #endif
108    
109     #define GERROR(exp) (GError exp)
110     /********************************** Macros ***********************************/
111     // Abolute value
112     #define GABS(val) (((val)>=0)?(val):-(val))
113    
114     // Min and Max
115     #define GMAX(a,b) (((a)>(b))?(a):(b))
116     #define GMIN(a,b) (((a)>(b))?(b):(a))
117    
118     // Min of three
119     #define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
120    
121     // Max of three
122     #define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
123    
124     // Return minimum and maximum of a, b
125     #define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
126    
127     // Clamp value x to range [lo..hi]
128     #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
129    
130     typedef void* pointer;
131     typedef unsigned int uint;
132    
133     typedef int GCompareProc(const pointer item1, const pointer item2);
134     typedef void GFreeProc(pointer item); //usually just delete,
135     //but may also support structures with embedded dynamic members
136    
137     #define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \
138     GError(ERR_ALLOC)
139     #define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \
140     GError(ERR_ALLOC)
141     #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
142     GError(ERR_ALLOC)
143     #define GFREE(ptr) GFree((pointer*)(&ptr))
144    
145     inline char* strMin(char *arg1, char *arg2) {
146     return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
147     }
148    
149     inline char* strMax(char *arg1, char *arg2) {
150     return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
151     }
152    
153     inline int iround(double x) {
154     return (int)floor(x + 0.5);
155     }
156    
157     /****************************************************************************/
158    
159     inline int Gintcmp(int a, int b) {
160     //return (a>b)? 1 : ((a==b)?0:-1);
161     return a-b;
162     }
163    
164     int Gstrcmp(char* a, char* b);
165     //same as strcmp but doesn't crash on NULL pointers
166    
167     int Gstricmp(const char* a, const char* b);
168    
169     inline void swap(int &arg1, int &arg2){
170     //arg1 ^= arg2;
171     //arg2 ^= arg1;
172     //arg1 ^= arg2;
173     register int swp=arg1;
174     arg1=arg2; arg2=swp;
175     }
176    
177     inline void swap(char* &arg1, char* &arg2){ //swap pointers!
178     register char* swp=arg1;
179     arg1=arg2; arg2=swp;
180     }
181    
182     inline void swap(uint &arg1, uint &arg2) {
183     register uint swp=arg1;
184     arg1=arg2; arg2=swp;
185     }
186    
187     inline void swap(short &arg1, short &arg2) {
188     register short swp=arg1;
189     arg1=arg2; arg2=swp;
190     }
191    
192     inline void swap(unsigned short &arg1, unsigned short &arg2) {
193     register unsigned short swp=arg1;
194     arg1=arg2; arg2=swp;
195     }
196    
197     inline void swap(long &arg1, long &arg2) {
198     register long swp=arg1;
199     arg1=arg2; arg2=swp;
200     }
201    
202     inline void swap(unsigned long &arg1, unsigned long &arg2) {
203     register unsigned long swp=arg1;
204     arg1=arg2; arg2=swp;
205     }
206    
207    
208     inline void swap(char &arg1, char &arg2) {
209     register char swp=arg1;
210     arg1=arg2; arg2=swp;
211     }
212    
213     inline void swap(unsigned char &arg1, unsigned char &arg2) {
214     register unsigned char swp=arg1;
215     arg1=arg2; arg2=swp;
216     }
217    
218     /**************** Memory management ***************************/
219    
220     bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
221     bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
222     bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
223     void GFree(pointer* ptr); // Free memory, resets ptr to NULL
224    
225    
226     int saprintf(char **retp, const char *fmt, ...);
227    
228     void GError(const char* format,...); // Error routine (aborts program)
229     void GMessage(const char* format,...);// Log message to stderr
230     // Assert failed routine:- usually not called directly but through GASSERT
231     void GAssert(const char* expression, const char* filename, unsigned int lineno);
232    
233     // ****************** string manipulation *************************
234     char *Gstrdup(const char* str);
235     //duplicate a string by allocating a copy for it and returning it
236     char* Gstrdup(const char* sfrom, const char* sto);
237     //same as GStrdup, but with an early termination (e.g. on delimiter)
238    
239     char* Gsubstr(const char* str, char* from, char* to=NULL);
240     //extracts a substring, allocating it, including boundaries (from/to)
241    
242     int strsplit(char* str, char** fields, int maxfields, const char* delim);
243     int strsplit(char* str, char** fields, int maxfields, const char delim);
244     int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
245    
246     char* replaceStr(char* &str, char* newvalue);
247    
248     //conversion: to Lower/Upper case
249     // creating a new string:
250     char* upCase(const char* str);
251     char* loCase(const char* str);
252     // changing string in place:
253     char* strlower(char * str);
254     char* strupper(char * str);
255    
256     //strstr but for memory zones: scans a memory region
257     //for a substring:
258     void* Gmemscan(void *mem, unsigned int len,
259     void *part, unsigned int partlen);
260    
261     // test if a char is in a string:
262     bool chrInStr(char c, const char* str);
263    
264     char* rstrchr(char* str, char ch);
265     /* returns a pointer to the rightmost
266     occurence of ch in str - like rindex for platforms missing it*/
267    
268     char* strchrs(const char* s, const char* chrs);
269     //strchr but with a set of chars instead of only one
270    
271     char* rstrfind(const char* str, const char *substr); /* like rindex() but for strings
272     or like the right side version of strstr()
273     */
274     //reverse character string or
275     char* reverseChars(char* str, int slen=0);
276    
277     char* rstrstr(const char* rstart, const char *lend, const char* substr);
278     /*the reversed, rightside equivalent of strstr: starts searching
279     from right end (rstart), going back to left end (lend) and returns
280     a pointer to the last (right) matching character in str */
281    
282     char* strifind(const char* str, const char* substr);
283     // the case insensitive version of strstr -- finding a string within a strin
284    
285    
286     //Determines if a string begins with a given prefix
287     //(returns false when any of the params is NULL,
288     // but true when prefix is '' (empty string)!)
289     bool startsWith(const char* s, const char* prefix);
290    
291     bool endsWith(const char* s, const char* suffix);
292     //Note: returns true if suffix is empty string, but false if it's NULL
293    
294    
295     // ELF hash function for strings
296     int strhash(const char* str);
297    
298    
299    
300     //---- generic base GSeg : genomic segment (interval) --
301     // coordinates are considered 1-based (so 0 is invalid)
302     class GSeg {
303     public:
304     uint start; //start<end always!
305     uint end;
306     GSeg(uint s=0,uint e=0) {
307     if (s>e) { start=e;end=s; }
308     else { start=s;end=e; }
309     }
310     //check for overlap with other segment
311     uint len() { return end-start+1; }
312     bool overlap(GSeg* d) {
313     //return start<d->start ? (d->start<=end) : (start<=d->end);
314     return (start<=d->end && end>=d->start);
315     }
316    
317     bool overlap(GSeg& d) {
318     //return start<d.start ? (d.start<=end) : (start<=d.end);
319     return (start<=d.end && end>=d.start);
320     }
321    
322     bool overlap(GSeg& d, int fuzz) {
323     //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
324     return (start<=d.end+fuzz && end+fuzz>=d.start);
325     }
326    
327     bool overlap(uint s, uint e) {
328     if (s>e) { swap(s,e); }
329     //return start<s ? (s<=end) : (start<=e);
330     return (start<=e && end>=s);
331     }
332    
333     //return the length of overlap between two segments
334     int overlapLen(GSeg* r) {
335     if (start<r->start) {
336     if (r->start>end) return 0;
337     return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
338     }
339     else { //r->start<=start
340     if (start>r->end) return 0;
341     return (r->end<end)? r->end-start+1 : end-start+1;
342     }
343     }
344     int overlapLen(uint rstart, uint rend) {
345     if (rstart>rend) { swap(rstart,rend); }
346     if (start<rstart) {
347     if (rstart>end) return 0;
348     return (rend>end) ? end-rstart+1 : rend-rstart+1;
349     }
350     else { //rstart<=start
351     if (start>rend) return 0;
352     return (rend<end)? rend-start+1 : end-start+1;
353     }
354     }
355    
356     //fuzzy coordinate matching:
357     bool coordMatch(GSeg* s, uint fuzz=0) {
358     if (fuzz==0) return (start==s->start && end==s->end);
359     uint sd = (start>s->start) ? start-s->start : s->start-start;
360     uint ed = (end>s->end) ? end-s->end : s->end-end;
361     return (sd<=fuzz && ed<=fuzz);
362     }
363     //comparison operators required for sorting
364     bool operator==(GSeg& d){
365     return (start==d.start && end==d.end);
366     }
367     bool operator>(GSeg& d){
368     return (start==d.start)?(end>d.end):(start>d.start);
369     }
370     bool operator<(GSeg& d){
371     return (start==d.start)?(end<d.end):(start<d.start);
372     }
373     };
374    
375    
376    
377     //--------------------------------------------------------
378     // ************** simple line reading class for text files
379    
380     //GLineReader -- text line reading/buffering class
381     class GLineReader {
382     bool closeFile;
383     int len;
384     int allocated;
385     char* buf;
386     bool isEOF;
387     FILE* file;
388     off_t filepos; //current position
389     bool pushed; //pushed back
390     int lcount; //line counter (read lines)
391     public:
392     char* chars() { return buf; }
393     char* line() { return buf; }
394     int readcount() { return lcount; } //number of lines read
395     void setFile(FILE* stream) { file=stream; }
396     int length() { return len; }
397     int size() { return len; } //same as size();
398     bool isEof() {return isEOF; }
399     bool eof() { return isEOF; }
400     off_t getfpos() { return filepos; }
401     off_t getFpos() { return filepos; }
402     char* nextLine() { return getLine(); }
403     char* getLine() { if (pushed) { pushed=false; return buf; }
404     else return getLine(file); }
405     char* getLine(FILE* stream) {
406     if (pushed) { pushed=false; return buf; }
407     else return getLine(stream, filepos); }
408     char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
409     // the given file position
410     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
411     // so the next call will in fact return the same line
412     GLineReader(const char* fname) {
413     FILE* f=fopen(fname, "rb");
414     if (f==NULL) GError("Error opening file '%s'!\n",fname);
415     closeFile=true;
416     init(f);
417     }
418     GLineReader(FILE* stream=NULL, off_t fpos=0) {
419     closeFile=false;
420     init(stream,fpos);
421     }
422     void init(FILE* stream, off_t fpos=0) {
423     len=0;
424     isEOF=false;
425     allocated=1024;
426     GMALLOC(buf,allocated);
427     lcount=0;
428     buf[0]=0;
429     file=stream;
430     filepos=fpos;
431     pushed=false;
432     }
433     ~GLineReader() {
434     GFREE(buf);
435     if (closeFile) fclose(file);
436     }
437     };
438    
439    
440     /* extended fgets() - to read one full line from a file and
441     update the file position correctly !
442     buf will be reallocated as necessary, to fit the whole line
443     */
444     char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
445    
446    
447     //print int/values nicely formatted in 3-digit groups
448     char* commaprint(uint64 n);
449    
450     /*********************** File management functions *********************/
451    
452     // removes the last part (file or directory name) of a full path
453     // WARNING: this is a destructive operation for the given string!
454     void delFileName(char* filepath);
455    
456     // returns a pointer to the last file or directory name in a full path
457     const char* getFileName(const char* filepath);
458     // returns a pointer to the file "extension" part in a filename
459     const char* getFileExt(const char* filepath);
460    
461    
462     int fileExists(const char* fname);
463     //returns 0 if file entry doesn't exist
464     // 1 if it's a directory
465     // 2 if it's a regular file
466     // 3 otherwise (?)
467    
468     int64 fileSize(const char* fpath);
469    
470     //write a formatted fasta record, fasta formatted
471     void writeFasta(FILE *fw, const char* seqid, const char* descr,
472     const char* seq, int linelen=60, int seqlen=0);
473    
474     //parses the next number found in a string at the current position
475     //until a non-digit (and not a '.', 'e','E','-','+') is encountered;
476     //updates the char* pointer to be after the last digit parsed
477     bool parseNumber(char* &p, double& v);
478     bool parseDouble(char* &p, double& v); //just an alias for parseNumber
479    
480     bool parseInt(char* &p, int& i);
481     bool parseUInt(char* &p, uint& i);
482     bool parseHex(char* &p, uint& i);
483    
484     #endif /* G_BASE_DEFINED */