1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
my $usage=q/ |
4 |
gff_add_CDS.pl <gff_w_CDS.gff> <gff_input..> |
5 |
|
6 |
Adds the CDS records from a GFF file to another GFF input |
7 |
which presumably lacks it (or if the input already has CDS |
8 |
records, they will be discarded). |
9 |
/; |
10 |
my %cdsdata; # CDSParent => [ "CDSseg1_data", "CDSseg2_data", ..] |
11 |
|
12 |
#loads the CDS data |
13 |
my $cdsfile=shift(@ARGV); |
14 |
my $cdscount; |
15 |
open(CDSGFF, $cdsfile) || die ("Error opening CDS file $cdsfile\n"); |
16 |
while (<CDSGFF>) { |
17 |
my @t=split('\t'); |
18 |
next unless $t[2] eq 'CDS'; |
19 |
chomp($t[8]); |
20 |
my ($p)=($t[8]=~m/\bParent=([^;]+)/); |
21 |
die("Error parsing Parent for CDS segment at $_\n") unless $p; |
22 |
$t[8]=~s/\bParent=[^;]+;?//; |
23 |
push(@{$cdsdata{$p}}, join("\t",@t[2..8])); |
24 |
$cdscount++; |
25 |
} |
26 |
close CDSGFF; |
27 |
|
28 |
#print STDERR "..loaded $cdscount CDS segments..\n"; |
29 |
|
30 |
while (<>) { |
31 |
my @t=split('\t'); |
32 |
print $_; |
33 |
if ($t[2] eq 'mRNA') { |
34 |
# print CDS data here, if found |
35 |
my ($id)=($t[8]=~m/\bID=([^;]+)/); |
36 |
my $cds=$cdsdata{$id}; |
37 |
unless ($cds) { |
38 |
#search for the special "acc" attribute we may store there for Drupal |
39 |
my ($acc)=($t[8]=~m/\bacc=([^;]+)/); |
40 |
$cds=$cdsdata{$acc}; |
41 |
} |
42 |
if ($cds) { |
43 |
foreach my $cdseg (@$cds) { |
44 |
#$cdseg.="Parent=$id" |
45 |
# unless ($cdseg=~s/\t([^\t]+)$/\tParent=$id;$1/); |
46 |
$cdseg=~s/\t([^\t]*)$/\tParent=$id;$1/; |
47 |
print join("\t",@t[0..1],$cdseg)."\n"; |
48 |
} |
49 |
} |
50 |
} |
51 |
} |