ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/tri_jigsaw.csh
Revision: 23
Committed: Tue Jul 26 21:44:38 2011 UTC (13 years, 1 month ago) by gpertea
Original Path: ann_bin/tri_jigsaw.csh
File size: 3267 byte(s)
Log Message:
adding misc scripts

Line User Rev File contents
1 gpertea 23 #!/bin/tcsh -f
2    
3     #set gmap = 'refseq_exon.gff3'
4     set gmap = 'sim4.gff3'
5     #set pmap = 'unipr.blat.gff3'
6     set pmap = 'pexo_exon.gff3'
7    
8     set mtrack='sim4cc'
9     set ptrack='p2g_exo'
10    
11     if ($2'x' == 'x') then
12     echo "Usage: tri_jigsaw.csh <dirlist_file> <jigsaw_output_suffix>"
13     echo " <dirlist_file> is the one corresponding to the 'both strands' run"
14     echo " <jigsaw_output_suffix> is the one that was used when jigsaw was run"
15     echo " (e.g. refseq.jgff) "
16     echo ' WARNING: make sure variables $pmap and $gmap are correct!'
17     echo " (currently they are: $pmap and $gmap)"
18     exit 1
19     endif
20    
21     set dirlist=$1
22     set jsuf=$2
23     echo "..running:\ntri_jigsaw.csh $*"
24     set protdb = '~/ann/protdb/unipr_mammals.fa.cidx'
25     set predcount=0
26     foreach d ( `cat $dirlist` )
27     set bname=$d:t
28     set dbase=$d:h
29     #echo "dirbase=$dbase (bname=$bname)"
30     set fname=$bname'f'
31     set rname=$bname'r'
32     cd $dbase
33    
34     set j_gff3 = $bname.recon_$jsuf.gff3
35     set j_anngff3 = $bname.ann.recon_$jsuf.gff3
36     echo "recon_jigsaw.pl -P $bname/$bname.fa -o $j_gff3 $bname/$bname.$jsuf $fname/$fname.$jsuf $rname/$rname.$jsuf"
37     recon_jigsaw.pl -P $bname/$bname.fa -o $j_gff3 $bname/$bname.$jsuf $fname/$fname.$jsuf $rname/$rname.$jsuf
38     #ls -al $bname/$bname.pmap.fltOK.gtf
39     set pcount=`cat $bname/$bname.$jsuf | grep -v '^#' | cut -f9 | cut -f1 -d ';' | sort -u | wc -l`
40     @ predcount = $predcount + $pcount
41     set d=$bname
42     if (-s $j_gff3) then
43     #--validate the resulting gff
44     # set badcds=$bname.recon_$jsuf.badCDS.gtf
45     # gffilter -g $bname/$bname.fa -b $badcds $j_gff3
46     # if (-s $badcds) then
47     # echo "**** ERROR found at CDS validation for $j_gff3! (see $badcds)"
48     # else
49     # /bin/rm -f $badcds
50     # endif
51     /bin/rm -f $d.[pg]map.i{it,fa}
52     /bin/rm -f $d.pmap.iit $d.pmap.ifa
53     #gff2iit -o $d.pmap -t $ptrack $d/$d.$pmap
54     gtf2gff -t $ptrack $d/$d.$pmap > $d.iit_pmap.gff3
55     gtf2gff -t 'pmap' $d/$d.pmap_exon.gff3 >> $d.iit_pmap.gff3
56     gff2iit -o $d.pmap $d.iit_pmap.gff3
57     /bin/rm -f $d.gmap.iit $d.gmap.ifa
58     #gff2iit -o $d.gmap -t $mtrack $d/$d.$gmap
59     gtf2gff -t $mtrack $d/$d.$gmap | perl -pe 's/\.mrna(\d+)/\.m$1/' > $d.iit_gmap.gff3
60     gtf2gff -t 'gmap' $d/$d.gmap_exon.gff3 >> $d.iit_gmap.gff3
61     gff2iit -o $d.gmap -t $mtrack $d.iit_gmap.gff3
62    
63     #echo "gffann.pl -o $j_anngff3 -t $d -m $d.gmap.iit -p $d.pmap.iit -P $protdb $j_gff3"
64     gffann.pl -o $j_anngff3 -t $d -m $d.gmap.iit -p $d.pmap.iit -P $protdb $j_gff3
65     # -- temporary: throw in all the mapping evidence
66     gtf2gff -t $mtrack $d/$d.$gmap > $d.top5gmap.gff3
67     gtf2gff -t $ptrack $d/$d.$pmap > $d.top5pmap.gff3
68     gtf2gff -t 'gmap' $d/$d.gmap_exon.gff3 >> $d.top5gmap.gff3
69    
70     gtf2gff -t 'pmap' $d/$d.pmap_exon.gff3 >> $d.top5pmap.gff3
71     gtf2gff -t 'blat' $d/$d.pblat.gff3 >> $d.top5pmap.gff3
72    
73     else #no predictions.. just show whatever evidence is there
74     cp $j_gff3 $j_anngff3
75     gtf2gff -t $mtrack $d/$d.$gmap | perl -pe 's/\.mrna(\d+)/\.m$1/' > $d.top5gmap.gff3
76     gtf2gff -t $ptrack $d/$d.$pmap > $d.top5pmap.gff3
77     gtf2gff -t 'gmap' $d/$d.gmap_exon.gff3 >> $d.top5gmap.gff3
78     gtf2gff -t 'pmap' $d/$d.pmap_exon.gff3 >> $d.top5pmap.gff3
79     gtf2gff -t 'blat' $d/$d.pblat.gff3 >> $d.top5pmap.gff3
80     endif
81    
82     cd ..
83     end
84    
85     echo "Total: $predcount gene models predicted."

Properties

Name Value
svn:executable *