ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/bam_merge.cpp
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (13 years, 1 month ago) by gpertea
File size: 3171 byte(s)
Log Message:
adding tophat source work

Line File contents
1 #include <cstdlib>
2 #include <cstdio>
3 #include <cstring>
4
5 #include "bam/bam.h"
6 #include "bam/sam.h"
7 #include "GBase.h"
8 #include "GList.hh"
9
10 #define USAGE "Usage: bam_merge <out.bam> <in1.bam> <in2.bam> [...]\n"
11
12 #define ERR_BAM_OPEN "Error: bam_merge failed to open BAM file %s\n"
13
14 samfile_t **srcfiles; //array of SAM file handles
15 samfile_t *fw; //output SAM handle
16
17 class CBamLine {
18 public:
19 int fileno;
20 long read_id;
21 bam1_t* b;
22 bool operator==(CBamLine& l){
23 return (read_id==l.read_id);
24 }
25 bool operator>(CBamLine& l){
26 return (read_id<l.read_id); //(last has lowest #)
27 }
28 bool operator<(CBamLine& l){
29 return (read_id>l.read_id);
30 }
31 CBamLine(int fno=-1, bam1_t* br=NULL) {
32 fileno=fno;
33 read_id=0;
34 b=br;
35 b_init();
36 }
37 void b_init() {
38 if (b) {
39 char *name = bam1_qname(b);
40 read_id=atol(name);
41 if (read_id<1) {
42 char* samline=bam_format1(srcfiles[0]->header, b);
43 GError("Error: invalid read Id (must be numeric) for BAM record:\n%s\n",
44 samline);
45 }
46 }
47 }
48
49 void b_free() {
50 if (b!=NULL) { bam_destroy1(b); }
51 }
52 };
53
54 GList<CBamLine> lines(true,true,false);
55
56 int main(int argc, char *argv[])
57 {
58 char* outfname=NULL;
59 if (argc<4) {
60 fprintf(stderr, USAGE);
61 if (argc>1)
62 fprintf(stderr, "Error: only %d arguments given.\n", argc-1);
63 return -1;
64 }
65 outfname=argv[1];
66 int num_src=argc-2;
67 GMALLOC(srcfiles, (num_src*sizeof(samfile_t*)));
68 for (int i=2;i<argc;i++) {
69 int fno=i-2;
70 samfile_t* fp=samopen(argv[i], "rb", 0);
71 if (fp==0) {
72 fprintf(stderr, ERR_BAM_OPEN, argv[i]);
73 return 1;
74 }
75 bam1_t *b = bam_init1();
76 if (samread(fp, b) > 0) {
77 srcfiles[fno]=fp;
78 lines.Add(new CBamLine(fno, b));
79 }
80 }
81 if (lines.Count()==0) {
82 GMessage("Warning: no input BAM records found.\n");
83 }
84 fw=samopen(outfname, "wb", srcfiles[lines[0]->fileno]->header);
85 if (fw==NULL)
86 GError("Error creating output file %s\n", outfname);
87 int last;
88 while ((last=lines.Count()-1)>=0) {
89 CBamLine* from=lines[last]; //should have the smallest read_id
90 if (from->fileno<0 || from->b==NULL)
91 GError("Invalid processTopLine() call with uninitialized value.");
92 samwrite(fw, from->b);
93 if (samread(srcfiles[from->fileno], from->b)>0) {
94 from->b_init();
95 //adjust the position in the sorted lines list
96 if (last<7) {//
97 int i=last;
98 while (i>0 && lines[i-1]->read_id<lines[i]->read_id) {
99 //swap
100 CBamLine* tmp=lines[i-1];
101 lines.Put(i-1, lines[i], false);
102 lines.Put(i,tmp, false);
103 i--;
104 }
105 }
106 else { //use quick-insert
107 lines.Pop();
108 lines.Add(from);
109 }
110 }
111 else { //no more BAM records
112 lines.Pop();
113 from->b_free();
114 }
115 }
116 samclose(fw);
117 for (int i=0;i<num_src;i++)
118 samclose(srcfiles[i]);
119 GFREE(srcfiles);
120 return 0;
121 }