ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/twinscan2gff.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (13 years, 1 month ago) by gpertea
File size: 2232 byte(s)
Log Message:
Line User Rev File contents
1 gpertea 23 #!/usr/bin/perl
2     use strict;
3     use Getopt::Std;
4     use FindBin;use lib $FindBin::Bin;
5     umask 0002;
6     my $usage=q/
7     Usage:
8     twinscan2gff.pl [-p <transcript_suffix>] < twinscan_output.gtf > gff3
9     /;
10     getopts('p:') || die($usage."\n");
11     my $prefix=$Getopt::Std::opt_p || "winscan";
12     my $curgene; #current gene name
13     my $curtran; #current transcript
14     my ($curstrand, $curchr); #chromosome and strand for current model
15     my @exd; #exons for current model
16     my %stops; #transcripts with stop_codon info transcript_id=>stop_start
17    
18     while (<>) {
19     next if m/^\s*#/;
20     chomp;
21     next unless $_;
22     my ($chr, $prog, $ftype, $fstart, $fend, $fscore, $strand, $frame, $info)=
23     split(/\t/);
24     my ($gid)=($info=~m/gene_id\s+\"([\w\.\:\;\'\|\#\-]+)\"/);
25     my ($tid)=($info=~m/transcript_id\s+\"([\w\.\:\;\'\|\#\-]+)\"/);
26     die ("Error parsing gene/transcript id from $_\n") unless $gid && $tid;
27     if ($ftype eq 'stop_codon') {
28     $stops{$tid}=($strand eq '-')? $fend : $fstart;
29     next;
30     }
31     next unless $ftype eq 'CDS';
32    
33     if ($tid ne $curtran) {
34     &writeTranscript() if $curtran;
35     $curtran=$tid;
36     $curchr=$chr;
37     $curstrand=$strand;
38     @exd=();
39     }
40     push(@exd, [$fstart, $fend, $frame]);
41     }
42    
43     &writeTranscript() if $curtran;
44    
45    
46     sub writeTranscript {
47     my @ex= sort { $main::a->[0] <=> $main::b->[0] } @exd;
48     my ($mstart, $mend)=($ex[0]->[0], $ex[-1]->[1]);
49     if (my $stpos=$stops{$curtran}) {
50     if ($curstrand eq '-') {
51     die ("Invalid stop codon position for $curtran! ($stpos vs $mstart)\n")
52     unless $stpos==$mstart-1;
53     $mstart-=3; #include stop codon (twinscan doesn't..)
54     $ex[0]->[0]-=3;
55     }
56     else {
57     die ("Invalid stop codon position for $curtran! ($stpos vs $mend)\n")
58     unless $stpos==$mend+1;
59     $mend+=3; #include stop codon!
60     $ex[-1]->[1]+=3;
61     }
62     }
63     my ($tnum, $anum)=($curtran=~m/\.(\d+)\.(\d+)$/);
64     my $geneid='t'.$prefix.$tnum;
65     $curtran=$geneid."tws$anum";
66     print join("\t",$curchr, 'twinscan', 'mRNA', $mstart, $mend, '.',
67     $curstrand, '.', "ID=$curtran;Name=$geneid")."\n";
68     my $i=1;
69     foreach my $exon (@ex) {
70     my ($estart, $eend, $eframe)=@$exon;
71     print join("\t",$curchr, 'twinscan', 'CDS', $estart, $eend, '.',
72     $curstrand, $eframe,
73     "Parent=$curtran")."\n";
74     $i++;
75     }
76     }

Properties

Name Value
svn:executable *