1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
use Getopt::Std; |
4 |
use FindBin;use lib $FindBin::Bin; |
5 |
|
6 |
my $usage = q/Usage: |
7 |
gfflt [-c <mincov>] [-p <minpid>] [-P] [-l <fsize_file>] [-s<strand>][{-C|-E}] \ |
8 |
[-D|-T] [-f <ftstr>] [-t <track>] [-r <start>-<end>] [-n<ID_suffix>] \ |
9 |
[-i <maxintron>] [-u dupIDs.tab] [-w <map_out_file>] [-o <outfile>] |
10 |
|
11 |
Note: requires segment subfeatures (CDS\/exons) to be strictly |
12 |
grouped with (i.e. follow) their parent mRNA feature |
13 |
|
14 |
-l : file with extra mapping information, 3 tab-delimited columns |
15 |
are expected in each line: |
16 |
<qry_ID> <qry_len> <qry_description> |
17 |
-P : only for PMAP output files, take "Target" coordinates |
18 |
as nucleotide-based (divide by 3 to get the actual query protein |
19 |
coordinates) |
20 |
|
21 |
Filtering options: |
22 |
-r : only show gff records fully included in between the given coordinates |
23 |
-R : like -r but also show mappings that intersect the given range |
24 |
-s : only show gff records on the given strand |
25 |
-u : duplicate IDs should be discarded according to the given ID table |
26 |
-c : discard mappings with coverage lower than <mincov>; |
27 |
unless qreg or qlen attributes are found in the mRNA line, |
28 |
this option requires the -l option to provide query lengths |
29 |
-p : discard mappings with percent identity lower than <minpid>; |
30 |
requires the score column or the PID attribute for the 'mRNA' |
31 |
feature |
32 |
-x : discard exon mappings with percent identity lower than <xminpid>; |
33 |
(coverage and percent identity will be adjusted accordingly) |
34 |
-C|-E : read only the CDS (-C) or only the exon segments (-E) |
35 |
from the input file (default: read both); |
36 |
Output modifiers: |
37 |
|
38 |
-f : replace the output segment feature name ("CDS" or "exon") |
39 |
with this string (useful for converting "CDS" into "exon" or |
40 |
the other way around); requires -C or -E option |
41 |
-n : add the given alphanumeric suffix followed by a counter |
42 |
for each mapping of a query sequence (starting at 1) |
43 |
(only the first 4 letters of <ID_suffix> will be used) |
44 |
-N : for -n option, the whole ID is replaced with the given string |
45 |
-M : make the "Name" attribute to have the same value with "ID" attribute |
46 |
-t : change the track name (2nd GFF column) into given label |
47 |
-D : add mapping descriptions ("descr=" attribute) as found in the |
48 |
3rd column of the <fsize_file> (so it requires -l option) |
49 |
-V : add "cov" and "pid" attributes to the mRNA lines if possible |
50 |
and if not there already |
51 |
-S : strip the output of any other attributes besides ID, Parent and Name |
52 |
-T : write GTF format instead (this is incompatible with -D and -V) |
53 |
|
54 |
/; |
55 |
umask 0002; |
56 |
my @inputfiles; |
57 |
while ($ARGV[0] && $ARGV[0]!~m/^\-/) { |
58 |
push(@inputfiles, shift(@ARGV)); |
59 |
} |
60 |
getopts('i:t:r:R:l:s:c:x:p:w:f:u:n:o:PMSNCEDTV') || die($usage."\n"); |
61 |
unshift(@ARGV, @inputfiles) if @inputfiles>0; |
62 |
#-- options |
63 |
my $outfile=$Getopt::Std::opt_o; |
64 |
my $dupfile=$Getopt::Std::opt_u; |
65 |
my $maxintron=$Getopt::Std::opt_i || 450000; |
66 |
my $stripAttrs=$Getopt::Std::opt_S; |
67 |
my $fullRename=$Getopt::Std::opt_N; |
68 |
my $resetName=$Getopt::Std::opt_M; |
69 |
my $globalCounter=0; |
70 |
my $inFeat; |
71 |
$inFeat='exon' if $Getopt::Std::opt_E; |
72 |
$inFeat='CDS' if $Getopt::Std::opt_C; |
73 |
my $pmap=$Getopt::Std::opt_P; |
74 |
my %dups; #duplicate IDs are kept here -- $dups{$id} => $mainid |
75 |
my %uniq; #key is ctgid_qryid_startcoord_endcoord_covscore |
76 |
#my $startOnly=$Getopt::Std::opt_S; |
77 |
my ($minpid, $mincov, $addCounter, $addDescr, $outFeat, $GTF)= |
78 |
($Getopt::Std::opt_p, $Getopt::Std::opt_c, $Getopt::Std::opt_n, |
79 |
$Getopt::Std::opt_D, $Getopt::Std::opt_f, $Getopt::Std::opt_T); |
80 |
my $g_range = $Getopt::Std::opt_r || $Getopt::Std::opt_R; |
81 |
my $xrange = 1 if ($g_range && $Getopt::Std::opt_R); |
82 |
my ($mincoord, $maxcoord); |
83 |
if ($g_range) { |
84 |
($mincoord, $maxcoord)=($g_range=~m/^(\d*)[\.\-_]+(\d*)$/); |
85 |
$mincoord=1 unless $mincoord>0; |
86 |
$maxcoord=4000000000 unless $maxcoord>0; |
87 |
#print STDERR "range filter: $mincoord .. $maxcoord\n"; |
88 |
} |
89 |
my $fltstrand=$Getopt::Std::opt_s; |
90 |
my $xminpid = $Getopt::Std::opt_x; |
91 |
my $ntrack=$Getopt::Std::opt_t; |
92 |
$mincov=~tr/%//d; |
93 |
$mincov=1 unless $mincov; |
94 |
$minpid=20 unless $minpid; |
95 |
$minpid=20 unless $minpid; |
96 |
$xminpid=12 unless $xminpid; |
97 |
my $fmapout=$Getopt::Std::opt_w; |
98 |
if ($fmapout) { |
99 |
open(FMAP, '>'.$fmapout) || die("Error creating file $fmapout\n"); |
100 |
} |
101 |
my $fsizefile=$Getopt::Std::opt_l; |
102 |
my %f; |
103 |
my %n; |
104 |
if ($fsizefile) { |
105 |
open(FSIZE, $fsizefile) || die("Error opening query info file $fsizefile!\n"); |
106 |
while(<FSIZE>) { |
107 |
chomp; |
108 |
my @t=split(/\s+/,$_,3); |
109 |
next unless $t[1]; |
110 |
$f{$t[0]}=[$t[1], $t[2]]; # seqlen, description |
111 |
} |
112 |
close(FSIZE); |
113 |
} |
114 |
if ($dupfile) { |
115 |
open(DUPFILE, $dupfile) || die ("Error opening duplicate table file $dupfile!\n"); |
116 |
while (<DUPFILE>) { |
117 |
chomp; |
118 |
my @t=split(/\t/); |
119 |
foreach my $id (@t) { |
120 |
$dups{$id}=$t[0]; |
121 |
} |
122 |
} |
123 |
close(DUPFILE); |
124 |
} |
125 |
my $curqry; #current target (query) name |
126 |
my ($curqstart, $curqend, $curqlen, $curcov, $curpid, $use_tcoords); #length of the current query |
127 |
my $curdescr; #defline of the current query |
128 |
my $covlen; #coverage of current query |
129 |
my $eScore; #cummulated exon score |
130 |
my ($gffid, $gchr, $gtrack, $gname, $gstart, $gend, $gscore, $gstrand, $ginfo); |
131 |
# 0 1 2 3 4 5 6 |
132 |
my @exd; # list of [exon_start, exon_end, phase, score, qstart, qend, exon_other_attrs] |
133 |
my @cds; # list of [cds_start, cds_end, phase, score, qstart, qend, cds_other_attrs] |
134 |
if ($outfile) { |
135 |
open(OUTFILE, '>'.$outfile) || die ("Error creating file $outfile\n"); |
136 |
select(OUTFILE); |
137 |
} |
138 |
while (<>) { |
139 |
next if m/^\s*#/; |
140 |
chomp; |
141 |
my ($chr, $binfo, $ftype, $fstart, $fend, $fscore, |
142 |
$strand, $phase, $finfo)=split(/\t/); |
143 |
next if $ftype eq 'gene'; # just discard the superfluous 'gene' entries; |
144 |
if ($ftype eq 'mRNA') { #start of a new mapping |
145 |
&flush_gbuf(); |
146 |
($gchr, $gtrack, $gstart, $gend, $gscore, $gstrand, $ginfo)= |
147 |
($chr, $binfo, $fstart, $fend, $fscore, $strand, $finfo); |
148 |
($gffid)=($ginfo=~m/ID=([^;]+)/); |
149 |
die("Error finding the current ID (in attribute field: $finfo)\n") unless $gffid; |
150 |
$ginfo=~s/ID=[^;]+\s*;?//; |
151 |
$ginfo=~s/Parent=[^;]+\s*;?//; |
152 |
if ($ginfo=~s/Name=([^;]+)\s*;?//) { |
153 |
$gname=$1; |
154 |
} |
155 |
if ($ginfo=~m/qlen=(\d+)/i) { |
156 |
$curqlen=$1; |
157 |
$ginfo=~s/qlen=\d+\s*;?//i; |
158 |
} |
159 |
if ($ginfo=~m/qreg=(\d+)\-(\d+)/i) { |
160 |
($curqstart, $curqend)=($1,$2); |
161 |
$curqstart=1 if $curqstart==0; |
162 |
if ($ginfo=~m/qreg=\d+\-\d+\|(\d+)/i) { |
163 |
$curqlen=$1; |
164 |
$ginfo=~s/qreg=\d+\-\d+\|\d+\s*;?//i; |
165 |
} |
166 |
else { |
167 |
$ginfo=~s/qreg=\d+\-\d+\s*;?//i; |
168 |
} |
169 |
} |
170 |
if ($ginfo=~m/cov=([\d\.]+)/i) { |
171 |
$curcov=$1; |
172 |
$ginfo=~s/cov=[\d\.]+\s*;?//i; |
173 |
} |
174 |
if ($ginfo=~m/\bpid=([\d\.]+)/i) { |
175 |
$curpid=$1; |
176 |
$ginfo=~s/\bpid=[\d\.]+\s*;?//i; |
177 |
} |
178 |
if ($fscore>50 && $fscore<=100 && !$curpid) { |
179 |
$curpid=$fscore; |
180 |
} |
181 |
next; |
182 |
} # mRNA line |
183 |
# vvvvvvvvv exons or CDS segments vvvvvvvvvvv |
184 |
$ftype=~s/\w+\-exon$/exon/i; #jigsaw |
185 |
next unless $ftype eq 'exon' || $ftype eq 'CDS'; |
186 |
my $gtfgeneid; |
187 |
my $id; |
188 |
if ($finfo=~m/(?:Parent|transcript_id)[=\s]+"?([^;^\s^"]+)/) { |
189 |
$id=$1; |
190 |
unless ($finfo=~s/Parent=[^;]+\s*;?//) { |
191 |
#must be GTF format -- discard anything else but the gene_id, if found; |
192 |
($gtfgeneid)=($finfo=~m/gene_id[=\s]+"?([^;^\s^"]+)/); |
193 |
# we could parse all extra GTF attributes here and rebuild them in GFF3 format.. |
194 |
# .. nah |
195 |
$finfo=""; |
196 |
} |
197 |
} |
198 |
else { # jigsaw and other over-simplified GFF formats |
199 |
my ($geneid)=($finfo=~m/gene_id[=\s]+"?([^;^\s^"]+)/); |
200 |
if ($geneid) { |
201 |
$id=$geneid; |
202 |
$finfo=""; |
203 |
} |
204 |
else { |
205 |
($id)=($finfo=~m/^([^;^\s]+)/); |
206 |
if ($binfo=~m/^jigsaw/) { |
207 |
($finfo)=($finfo=~m/(gene_score=[\-\d\.]+)/); |
208 |
} |
209 |
else { $finfo=""; } |
210 |
} |
211 |
} |
212 |
die("Error getting the current ID from attribute field: $finfo\n") unless $id; |
213 |
$finfo=~s/ID=[^;]+\s*;?//; |
214 |
$finfo=~s/Name=([^;]+)\s*;?//; |
215 |
if (!$gffid || ($gffid ne $id) || ($gchr && $gchr ne $chr)) { |
216 |
# start of new mapping here (change in gff ID or base sequence) |
217 |
#print STDERR "gffid=$gffid, id=$id (gchr=$gchr)\n"; |
218 |
&flush_gbuf(); |
219 |
($gchr, $gtrack, $gstart, $gend, $gscore, $gstrand, $ginfo)= |
220 |
($chr, $binfo, $fstart, $fend, $fscore, $strand, $finfo); |
221 |
$ginfo=~s/Target=[^;]+\s*;?//i; |
222 |
$ginfo=~s/qreg=\d+\-\d+\s*;?//i; |
223 |
$gname=$gtfgeneid; |
224 |
} |
225 |
$finfo=~s/gene_score=[\-\d\.]+\s*\;?//i; |
226 |
$gffid=$id; |
227 |
my ($target, $t1, $t2, $tstrand); |
228 |
if ($finfo=~m/Target=([^;]+)/i) { |
229 |
my $alninfo=$1; |
230 |
($target, $t1, $t2, $tstrand)=($alninfo=~m/^(\S+)\s+(\d+)\s+(\d+)\s+([\-+])/); |
231 |
$finfo=~s/Target=[^;]+\s*;?//i; |
232 |
$use_tcoords=1; |
233 |
} |
234 |
else { |
235 |
if ($finfo=~m/qreg=(\d+)\-(\d+)/i) { |
236 |
($t1,$t2)=($1,$2); |
237 |
$t1=1 if $t1==0; |
238 |
$finfo=~s/qreg=\d+\-\d+\s*;?//i; |
239 |
} |
240 |
$target=$gffid; |
241 |
$target=~s/\.[a-z]{1,5}\d+$//; |
242 |
} |
243 |
if ($curqry) { |
244 |
die("Error: CDS/exon Target entry found out of place ($_)\n") |
245 |
if ($target ne $curqry); |
246 |
} |
247 |
else { |
248 |
$curqry=$target; #first time assignment of curqry |
249 |
if ($fsizefile) { |
250 |
# if ($mincov) { |
251 |
my $qd=$f{$curqry}; |
252 |
die("Error getting data for $curqry from $fsizefile!\n") unless $qd; |
253 |
($curqlen, $curdescr)=@$qd; |
254 |
#print STDERR "=====> descr for $curqry: $curdescr\n"; |
255 |
} |
256 |
$covlen=0; |
257 |
} # first time assignment of curqry |
258 |
$phase='.' if $phase<0; |
259 |
($fstart, $fend)=($fend, $fstart) if $fend<$fstart; |
260 |
($t1, $t2)=($t2, $t1) if ($t2<$t1); |
261 |
if ($t2 && $pmap) { |
262 |
($t1, $t2)=(int($t1/3), int($t2/3)); |
263 |
$t1=1 if $t1<1; |
264 |
}; |
265 |
if ($ftype eq 'exon') { |
266 |
# 0 1 2 3 4 5 6 |
267 |
push(@exd, [$fstart, $fend, $phase, $fscore, $t1, $t2, $finfo]); |
268 |
next; |
269 |
} |
270 |
else { # ftype must be CDS here |
271 |
# --exon entries should come first ? |
272 |
# die("Error: missing query heading for: $_\n") unless $curqry; |
273 |
#next if ($minpid && $fscore<$minpid); |
274 |
push(@cds, [$fstart, $fend, $phase, $fscore, $t1, $t2, $finfo]); |
275 |
} |
276 |
} # line parsing loop |
277 |
|
278 |
flush_gbuf() if $curqry; |
279 |
select(STDOUT); |
280 |
close(OUTFILE) if ($outfile); |
281 |
close(FMAP) if ($fmapout); |
282 |
# END here |
283 |
|
284 |
#================ SUBROUTINES ============ |
285 |
|
286 |
sub flush_gbuf { |
287 |
return unless $gffid; |
288 |
check_gff(); |
289 |
($gffid, $curqry, $curqlen, $covlen, $curqstart, $curqend, $curcov, $curpid, $use_tcoords)= |
290 |
(undef, undef, 0, 0, 0, 0, 0, 0, 0); |
291 |
($gchr, $gtrack, $gstart, $gend, $gscore, $gstrand, $gname, $ginfo)=(undef) x 8; |
292 |
@exd=(); |
293 |
@cds=(); |
294 |
$eScore=0; |
295 |
} |
296 |
|
297 |
sub check_gff { |
298 |
#die ("Error at calling flush_gbuf(): current length must NOT be zero!\n") unless $curqlen>0; |
299 |
#$covlen=$covlen/3 if $pmap; |
300 |
#my $cov=$curqlen ? ($covlen*100.00)/$curqlen : 0; |
301 |
#return if ($mincov && $cov<$mincov); #failed coverage condition |
302 |
|
303 |
my $hasCDSonly; # for pblat or some GTF file, we may have no exon segments |
304 |
if (@exd==0) { |
305 |
@exd=sort { $main::a->[0] <=> $main::b->[0] } @cds; |
306 |
$hasCDSonly=1; |
307 |
@cds=(); |
308 |
} |
309 |
else { |
310 |
@exd = sort { $main::a->[0] <=> $main::b->[0] } @exd; |
311 |
@cds = sort { $main::a->[0] <=> $main::b->[0] } @cds; |
312 |
} |
313 |
my $prev_end=0; |
314 |
#my $covscore=0; |
315 |
my $i=0; |
316 |
my $numexons=0; |
317 |
my @fexd; #filtered set of exons |
318 |
my $qcovlen=0; #total aligned length |
319 |
my $gcovlen=0; #covered length (in nucleotides) |
320 |
my $covscore=0; |
321 |
foreach my $x (@exd) { |
322 |
$i++; |
323 |
my $xpid = ($$x[3]>=10 && $$x[3]<=100.0)? $$x[3] : 0; |
324 |
next if ($xpid && $xpid<$xminpid && (($i>1 && $i<@exd) || @exd==1)); #skip internal exons with lower pid |
325 |
if ($prev_end>0) { |
326 |
my $intronlen=$$x[0]-$prev_end; |
327 |
return if $intronlen>$maxintron; |
328 |
} |
329 |
$prev_end=$$x[1]; |
330 |
$covscore+=($$x[1]-$$x[0]+1)*$xpid; |
331 |
$gcovlen+=($$x[1]-$$x[0]+1); |
332 |
$qcovlen+=($$x[5]-$$x[4]+1) if $$x[5]>0; |
333 |
push(@fexd, $x); |
334 |
$numexons++; |
335 |
} |
336 |
return if @fexd==0; |
337 |
if ($g_range) { |
338 |
if ($xrange) { |
339 |
#everything that intersects given range |
340 |
return if ($gstart>$maxcoord || $gend<$mincoord); |
341 |
} |
342 |
else { |
343 |
return unless ($gstart>=$mincoord && $gend<=$maxcoord); |
344 |
} |
345 |
} |
346 |
return if ($fltstrand && $gstrand ne $fltstrand); |
347 |
my $totpid=sprintf('%.2f',$covscore/$gcovlen); |
348 |
my $qlen=$curqlen; #query length, in nucleotides |
349 |
my $qcov = ($qcovlen>5 && $qlen>0) ? ($qcovlen*100.00)/$qlen : $curcov; |
350 |
#print STDERR "=====> gffID: $gffid qcov=$qcov\n"; |
351 |
return if ($numexons<1 || ($totpid>10 && $totpid<$minpid) || ($qcov && $qcov<$mincov)); |
352 |
#my ($parent)=($ginfo=~m/\bID\s*=\s*([^;]+)/); |
353 |
my $parent=$gffid; |
354 |
if ($curqry) { |
355 |
my $q=$dups{$curqry} || $curqry; |
356 |
my $uid="$gchr|$q|$gstart|$gend|$qcov"; |
357 |
return if exists($uniq{$uid}); |
358 |
$uniq{$uid}=1; |
359 |
} |
360 |
if ($addCounter) { |
361 |
if ($fullRename) { |
362 |
$globalCounter++; |
363 |
$parent=$addCounter.$globalCounter; |
364 |
} |
365 |
else { |
366 |
die("Error: no curqry available for gff ID: $gffid!\n") unless $curqry; |
367 |
my $mnum = ++$n{$curqry}; |
368 |
$parent=$curqry.'.'.$addCounter.$mnum; |
369 |
} |
370 |
|
371 |
} |
372 |
my $gffattrs="ID=$parent"; |
373 |
my $gffname=$gname || $parent; |
374 |
if ($stripAttrs || $resetName) { |
375 |
$gffname=$parent; |
376 |
} |
377 |
$gffattrs.=";Name=$gffname"; #just use a name there so argo can show it |
378 |
|
379 |
if ($use_tcoords) { |
380 |
my @td=sort { $main::a->[4] <=> $main::b->[4] } @fexd; |
381 |
$curqstart=$td[0]->[4]; |
382 |
$curqend=$td[-1]->[5]; |
383 |
} |
384 |
if ($qcov>1) { |
385 |
$qcov=100 if $qcov>100; |
386 |
$gffattrs.=';cov='.sprintf('%.1f',$qcov) unless $stripAttrs; |
387 |
} |
388 |
unless ($totpid>0) { |
389 |
$totpid=$gscore if $gscore>10 && $gscore<=100.0; |
390 |
} |
391 |
unless ($covscore) { |
392 |
my $rqlen=$qlen; |
393 |
$rqlen*=3 if $pmap; |
394 |
$covscore=(($rqlen*$qcov)/100.00)*$totpid; |
395 |
} |
396 |
unless ($stripAttrs) { |
397 |
$gffattrs.=';pid='.sprintf('%.2f',$totpid) if $totpid>2; # unless $ginfo=~m/Identity=/; |
398 |
$gffattrs.=';covscore='.sprintf('%d',$covscore) if $covscore>10; |
399 |
#$ginfo.=';score='.sprintf('%d',($eScore/100.00)) unless $ginfo=~m/score=/i; |
400 |
if ($curqstart && $curqend) { |
401 |
$gffattrs.=";qreg=$curqstart-$curqend"; |
402 |
$gffattrs.="|$curqlen" if $curqlen; |
403 |
} |
404 |
else { |
405 |
$gffattrs.=';qlen='.$curqlen if $curqlen; |
406 |
} |
407 |
} |
408 |
my ($gffdescr)=($ginfo=~m/\b(?:descr|info)=\"([^"]+)/); |
409 |
unless ($stripAttrs) { |
410 |
$ginfo=~s/;$//; |
411 |
$ginfo=~s/^;//; |
412 |
if ($curdescr) { |
413 |
$curdescr =~ tr/"\t;=/' .:/s; #" |
414 |
$curdescr=~s/[\.\;\, ]+$//; |
415 |
$ginfo=~s/\b(?:descr|info)=\"[^"]+\"\s*;?//i; |
416 |
if ($ginfo) { |
417 |
$ginfo.=';descr="'.$curdescr.'"'; |
418 |
} |
419 |
else { $ginfo=';descr="'.$curdescr.'"'; } |
420 |
} |
421 |
$gffattrs.=";$ginfo" if $ginfo; |
422 |
} |
423 |
$gffattrs=~tr/; /; /s; |
424 |
|
425 |
my $gfftrack= $ntrack || $gtrack; |
426 |
print join("\t", $gchr, $gfftrack, 'mRNA', $gstart, $gend, $gscore, $gstrand, '.', $gffattrs)."\n" unless $GTF; |
427 |
# exons: |
428 |
my $fonly=$outFeat; |
429 |
if ($outFeat) { |
430 |
$fonly=$outFeat; |
431 |
} |
432 |
else { |
433 |
$fonly= $hasCDSonly ? 'CDS':'exon'; |
434 |
} |
435 |
my $wroteExons=0; |
436 |
if (!$inFeat || ($inFeat eq 'exon') || $hasCDSonly) { |
437 |
$wroteExons=1; |
438 |
foreach my $x (@fexd) { |
439 |
my $xinfo="Parent=$parent"; |
440 |
unless ($stripAttrs) { |
441 |
$xinfo.=";qreg=$$x[4]-$$x[5]" if ($$x[4]>0); |
442 |
$$x[6]=~s/^;+//; |
443 |
$xinfo.=";$$x[6]" if $$x[6]; |
444 |
} |
445 |
print join("\t", $gchr, $gfftrack, $fonly, $$x[0], $$x[1], $$x[3], $gstrand, $$x[2],$xinfo)."\n"; |
446 |
} |
447 |
} |
448 |
if ($fmapout) { |
449 |
my $descr=$curdescr || $gffdescr; |
450 |
print FMAP join("\t", $gchr, $gstrand, $gstart, $gend, $curqry, sprintf('%d',$covscore), |
451 |
sprintf('%.1f',$totpid), sprintf('%.1f',$qcov), "$curqstart-$curqend|$curqlen", $descr)."\n"; |
452 |
} |
453 |
return if ($outFeat && $wroteExons); |
454 |
$fonly=$outFeat || 'CDS'; |
455 |
if (@cds>0 && (!$inFeat || ($inFeat eq 'CDS'))) { |
456 |
foreach my $x (@cds) { |
457 |
my $xinfo="Parent=$parent"; |
458 |
unless ($stripAttrs) { |
459 |
$xinfo.=";qreg=$$x[4]-$$x[5]" if ($$x[4]>0); |
460 |
$$x[6]=~s/^;+//; |
461 |
$xinfo.=";$$x[6]" if $$x[6]; |
462 |
} |
463 |
print join("\t", $gchr, $gfftrack, $fonly, $$x[0], $$x[1], $$x[3], $gstrand, $$x[2], $xinfo)."\n"; |
464 |
} |
465 |
} |
466 |
}#sub flush_gbuf() |