1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
use Getopt::Std; |
4 |
|
5 |
my $usage = q/Usage: |
6 |
gffstat [-a] [-t <ftype>] [-f <subfeature_name>] input.gffs.. |
7 |
|
8 |
Returns statistics about the number, length and composition of |
9 |
'mRNA' features and their 'CDS' segments. |
10 |
Options: |
11 |
-q omit header line |
12 |
-a treat all input files as a single input stream, |
13 |
so only the overall counts (totals) are shown |
14 |
-t only consider entries with <ftype> in the |
15 |
second column (e.g. "jigsaw") |
16 |
-f use <subfeature_name> instead of default 'CDS' |
17 |
(needed for exon statistics) |
18 |
/; |
19 |
getopts('qat:f:') || die($usage."\n"); |
20 |
|
21 |
my ($readall, $tflt)=($Getopt::Std::opt_a, $Getopt::Std::opt_t); |
22 |
my $subfname= $Getopt::Std::opt_f || 'CDS'; |
23 |
$subfname='CDS' unless $subfname; |
24 |
print join("\t",' ','G_count', 'G_cov', 'Ex_count', 'Ex_cov')."\n" |
25 |
unless ($Getopt::Std::opt_q); |
26 |
if ($readall || @ARGV==0) { |
27 |
processInput(); |
28 |
} |
29 |
else { |
30 |
foreach my $f (@ARGV) { |
31 |
processInput($f); |
32 |
} |
33 |
} |
34 |
|
35 |
sub processInput { |
36 |
my $inf=$_[0]; |
37 |
my ($gcount, $gextent, $exoncount, $exoncov); |
38 |
my $rlinefunc; |
39 |
if ($inf) { |
40 |
open(INFILE, $inf) |
41 |
|| die("Error opening file $inf!\n"); |
42 |
$rlinefunc = \&readfile; |
43 |
} else { |
44 |
$rlinefunc=\&readstdin; |
45 |
} |
46 |
|
47 |
while (&$rlinefunc()) { |
48 |
next if m/^\s*#/; |
49 |
my ($gobj, $ftype, $fname, $fstart, $fend, $fscore, $fstrand, $frame, $fdesrc)=split(/\t/); |
50 |
next if ($tflt && $tflt ne $ftype); |
51 |
($fstart, $fend)=($fend, $fstart) if $fstart>$fend; |
52 |
if ($fname eq 'mRNA') { |
53 |
$gcount++; |
54 |
$gextent+= ($fend-$fstart+1); |
55 |
next; |
56 |
} |
57 |
if ($fname eq $subfname) { |
58 |
$exoncount++; |
59 |
$exoncov+=($fend-$fstart+1); |
60 |
} |
61 |
}#while parsing |
62 |
my $label='Total'; |
63 |
if ($inf) { |
64 |
close(INFILE); |
65 |
($label)=($inf=~m/([^\/]+)$/); |
66 |
} |
67 |
print join("\t", $label.':', $gcount, $gextent, $exoncount, $exoncov)."\n"; |
68 |
} |
69 |
|
70 |
sub readfile { |
71 |
$_=<INFILE>; |
72 |
return $_; |
73 |
} |
74 |
|
75 |
sub readstdin { |
76 |
$_=<>; |
77 |
return $_; |
78 |
} |