ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/deletions.cpp
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (13 years, 1 month ago) by gpertea
File size: 3155 byte(s)
Log Message:
adding tophat source work

Line User Rev File contents
1 gpertea 29 /*
2     * deletions.cpp
3     * TopHat
4     *
5     * Created by Ryan Kelley on 10/09/2010.
6     *
7     */
8    
9     #ifdef HAVE_CONFIG_H
10     #include <config.h>
11     #endif
12    
13     #include <cassert>
14     #include "common.h"
15     #include "deletions.h"
16    
17    
18    
19    
20    
21     /*
22     * Print deletions in BED format
23     * As per the BED-standard (http://genome.ucsc.edu/FAQ/FAQformat)
24     * -The coordinates should be 0-based
25     * -The chromEnd field should not contain the actual feature
26     * -The name will be "-"
27     * -The score will be count of supporting reads (max of 1,000)
28     *
29     * chromStart refers to the position of the first deleted based
30     * <chrom>\t<left>\t<right>\t-\t<read count>\n
31     * @param deletions_out The output file
32     * @param deletions Maps from deletions to number of supporting reads
33     * @pram ref_sequences The table of reference sequences
34     *
35     */
36     void print_deletions(FILE* deletions_out, const DeletionSet& deletions, RefSequenceTable& ref_sequences){
37     fprintf(deletions_out, "track name=deletions description=\"TopHat deletions\"\n");
38     for(DeletionSet::const_iterator i = deletions.begin(); i != deletions.end(); ++i){
39     fprintf(deletions_out, "%s\t%d\t%d\t%s\t%d\n",
40     ref_sequences.get_name(i->first.refid),
41     i->first.left + 1,
42     i->first.right,
43     "-",
44     i->second);
45     }
46     }
47    
48     /**
49     * Add deletions from an alignment to an DeletionSet.
50     * This will look for deletion in the alignment specified by bh. If the
51     * deletion is already in deletions, it will updated the count. Otherwise,
52     * it will add the deletion to the set and initialize the count to 1.
53     * @param bh The bowtie hit to be used to specify alignment infromation.
54     * @param deletions The DeletionSet that will be updated with the deletion information from teh alignment.
55     */
56     void deletions_from_alignment(const BowtieHit& bh, DeletionSet& deletions){
57     vector<Deletion> new_deletions;
58     deletions_from_spliced_hit(bh, new_deletions);
59    
60     for(size_t i = 0; i < new_deletions.size(); ++i){
61     Deletion deletion = new_deletions[i];
62     DeletionSet::iterator itr = deletions.find(deletion);
63     if (itr != deletions.end()){
64     itr->second += 1;
65     }
66     else{
67     deletions[deletion] = 1;
68     }
69     }
70     return;
71     }
72    
73    
74    
75    
76     /**
77     * Extract a list of deletions from a bowtie hit.
78     * Given a bowtie hit, extract a vector of deletions.
79     * @param bh The bowtie hit to use for alignment information.
80     * @param insertions Used to store the resultant vector of deletions.
81     */
82     void deletions_from_spliced_hit(const BowtieHit& bh, vector<Deletion>& deletions){
83     const vector<CigarOp>& cigar = bh.cigar();
84     unsigned int positionInGenome = bh.left();
85     unsigned int positionInRead = 0;
86    
87     for(size_t c = 0; c < cigar.size(); ++c){
88     Junction deletion;
89     switch(cigar[c].opcode){
90     case REF_SKIP:
91     positionInGenome += cigar[c].length;
92     break;
93     case MATCH:
94     positionInGenome += cigar[c].length;
95     positionInRead += cigar[c].length;
96     break;
97     case DEL:
98     deletion.refid = bh.ref_id();
99     deletion.left = positionInGenome - 1;
100     deletion.right = positionInGenome + cigar[c].length;
101     deletions.push_back(deletion);
102     positionInGenome += cigar[c].length;
103     break;
104     case INS:
105     positionInRead += cigar[c].length;
106     break;
107     default:
108     break;
109     }
110     }
111     return;
112     }