ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/rnaseq_mapflt.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (13 years, 1 month ago) by gpertea
File size: 2974 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4 use FindBin;use lib $FindBin::Bin;
5
6 my $usage = q/Usage:
7 rnaseq_mapflt.pl [-x <chromosome>] [-l <seedlen>] [-n <max_seed_mismatches>] \
8 [-t <max_terminal_mismatches>] <bowtie_map_data>
9
10 Filters bowtie map output (sorted with best hits first) such that only
11 mappings with <max_mismatches> at 3' end (after <seedlen>) are
12 allowed.
13
14 Use option -x to exclude mappings from a specific chromosome or contig
15
16 Defaults are : -l16 -n1 -t3 -xchrM
17 (exclude mitochondrial chromosome)
18
19 For bowtie output created with -n and --best --strata it will also make sure
20 that only the "best strata" is kept based on the total number of mismatches.
21
22 /;
23 umask 0002;
24 getopts('x:t:l:n:o:') || die($usage."\n");
25 my $seedlen=$Getopt::Std::opt_l||16;
26 my $maxmmseed=$Getopt::Std::opt_n || 1;
27 my $maxmmend=$Getopt::Std::opt_t || 3;
28 my $nochr=$Getopt::Std::opt_x || 'chrM';
29 my $outfile=$Getopt::Std::opt_o;
30 if ($outfile) {
31 open(OUTF, '>'.$outfile) || die("Error creating output file $outfile\n");
32 select(OUTF);
33 }
34 # --
35 my ($prevq, $prevmmcount, $prevmmseedcount);
36 while (<>) {
37 my $line=$_;
38 chomp;
39 next unless $_;
40 my ($qry, $strand, $chr, $cstart, $seq, $qv, $d, $mism)=split(/\t/);
41 #next if $chr eq $nochr;
42 my $len=length($seq);
43 next unless $len>8;
44 my @mm=map { [split(/\:/)] } (split(/\,/,$mism)) ;
45 #mismatches as a list of [mmcoord, nt_change]
46 if ($strand eq '-') { #for reverse complement, reverse and adjust
47 @mm=map { [$len-$$_[0]-1, $$_[1]] } reverse @mm;
48 }
49 my @mmseed=grep { $$_[0]<$seedlen } @mm;
50 next if @mmseed>$maxmmseed;
51 #my @mmbefore_end=grep { $$_[0]<$len-$maxmmend-1 } @mm;
52 #next if @mmbefore_end>0;
53 next if scalar(@mm)-scalar(@mmseed)>$maxmmend;
54 next if ($prevq eq $qry && @mm>$prevmmcount);
55 $prevq=$qry;
56 $prevmmcount=scalar(@mm);
57 $prevmmseedcount=scalar(@mmseed);
58 #TODO: trim 5' end if any mismatches are grouped there close to that end
59 my $tlen=$len;
60 #print STDERR ">> testing $line";
61 foreach my $md (reverse @mm) {
62 #print STDERR " .. testing $$md[0] vs $tlen-2\n";
63 if ($$md[0]<$tlen-2) { last; }
64 else { $tlen=$$md[0]; } #truncate at mismatch
65 }
66 if ($tlen<$len) {
67 #adjust mismatches and read/qual strings
68 my $dlen=$len-$tlen;
69 @mm=grep { $$_[0]<$tlen } @mm;
70 if ($strand eq '-') {
71 @mm = map { [$len-$$_[0]-1, $$_[1]] } reverse @mm;
72 $seq=substr($seq,$dlen);
73 $qv=substr($qv, $dlen);
74 $cstart+=$dlen;
75 #print STDERR "$qry\n" if $tlen>=$seedlen;
76 }
77 else { #truncate sequence
78 $seq=substr($seq,0,$tlen);
79 $qv=substr($qv,0,$tlen);
80 #print STDERR "$qry (+)\n" if @mm>0 && $tlen>=$seedlen;
81 }
82 next if $tlen<$seedlen;
83 my @mmstr = map { $$_[0].':'.$$_[1] } @mm;
84 $line=join("\t", $qry, $strand, $chr, $cstart, $seq, $qv, $d, join(',',@mmstr))."\n";
85 }
86 print $line unless $chr eq $nochr;
87 }
88
89 # --
90 if ($outfile) {
91 select(STDOUT);
92 close(OUTF);
93 }
94
95 #************ Subroutines **************

Properties

Name Value
svn:executable *