ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/run_jigsaw.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (13 years, 1 month ago) by gpertea
File size: 4609 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4 use File::Basename;
5 use Cwd qw(abs_path cwd);
6 umask 0002;
7
8 my $output;
9 my $myfasta;
10 my $evidence_file;
11 my $treedir;
12 my $idir_list;
13 my $addPrefix;
14 my $useLinComb;
15 my $noOverWrite;
16 my $penStr;
17 my $inLen;
18
19 my $usage = q~
20 Wrapper script for running jigsaw (not the training part).
21
22 run_jigsaw.pl [options]
23
24 Options:
25 -e <evidence_fie> - evidence descriptor file (required)
26 -l <dirlist_file> - file with full paths to the genomic sequence and data
27 directories, one per line (required)
28 -o <output_file> - name of output file for gene predictions (required)
29 -f <fasta_suffix> - the filename suffix of the genomic sequence file
30 (default 'fa')
31 -d <training_dir> - directory with training data (required, unless -L)
32 -n <intron_penalty> - specify maximum intron length and penalty for
33 exceeding the length
34 -g <grid_engine> - grid engine to use: 'smp', 'sge' or 'condor';
35 (default is: run locally, do not use the grid)
36 -c <numCPUs> - max grid nodes/CPUs to use (default 20; requires -g)
37 -m <e-mail> - e-mail to notify when all jobs are finished
38 -i <min_intron_len> - specify minimum intron length
39 -C - do not clobber existing output file (default is to
40 overwrite)
41 -L - run the linear combiner instead
42
43 Miscellaneous Options:
44 -h, -help - print this help message
45 -V - obtain program version
46 ~;
47
48 my $cmdline="$0 ".join(' ',@ARGV);
49 getopts('LCm:c:g:f:e:d:l:p:n:o:') || die($usage."\n");
50
51 ($evidence_file, $output, $treedir, $idir_list, $myfasta) =
52 ($Getopt::Std::opt_e, $Getopt::Std::opt_o, $Getopt::Std::opt_d, $Getopt::Std::opt_l,
53 $Getopt::Std::opt_f);
54 $myfasta='fa' unless $myfasta;
55 $useLinComb=$Getopt::Std::opt_L;
56 $penStr=$Getopt::Std::opt_n;
57 $inLen=$Getopt::Std::opt_i;
58 $noOverWrite=$Getopt::Std::opt_C;
59 my $mailnotify=$Getopt::Std::opt_m;
60 my $gridengine=$Getopt::Std::opt_g;
61 my $maxCPUs=$Getopt::Std::opt_c || 20;
62
63
64 $addPrefix = 1;
65
66 # if ( defined $addPrefix ) {
67 # $addPrefix = 0;
68 # } else {
69 # $addPrefix = 1;
70 # }
71
72 die("$usage\n Error: not all the required parameters where given!\n")
73 unless $output && -f $idir_list && -f $evidence_file;
74
75 die("$usage\nError: training directory not given!\n") if (!-d $treedir && !$useLinComb);
76
77 print STDERR "#Command line:\n$cmdline\n";
78
79 my $penArg=$penStr ? "-n \"$penStr\"" : '';
80
81 my $linArg = $useLinComb ? '-l' : '';
82
83 my $ilArg="";
84
85 #if ( defined $penStr ) {
86 #$ilArg="-i $inLen";
87 #}
88
89 my $evidence;
90 open(FILE,$evidence_file) || die("Cannot open $evidence_file: $!");
91 while(my $line = <FILE>) {
92 chomp($line);
93 ## ignore the curation line, not used when actually running combiner
94 $evidence.="$line:" unless ($line =~ /\scuration\s*/ );
95 }
96 close(FILE);
97
98 my @testOn;
99 open(FILE,$idir_list) || die "unable to open [$idir_list]\n";
100 while(my $line = <FILE>) {
101 chomp($line);
102 push(@testOn,$line) if $line;
103 }
104 close(FILE);
105 my $combid = basename($evidence_file);
106
107 my $CMDQFile='.runJIGSAW.'.$combid.'.'.$$.'.gridcmds';
108
109 my @cmdqueue;
110 my $treearg = $treedir ? '-d '.abs_path($treedir) : '';
111 for(my $cnt = 0; $cnt < @testOn; $cnt++) {
112 my $dir = $testOn[$cnt];
113 my $dname = basename($dir);
114 my $prefix;
115 if( $addPrefix ) {
116 $prefix = "$dir/$dname.";
117 } else {
118 $prefix = "$dir/";
119 }
120 my $combd = "${prefix}${combid}.run";
121 writeEvidenceFile($combd, $evidence, $prefix);
122 my $combout = "${prefix}$output";
123 #my $cmd=;
124 push(@cmdqueue, "jigsaw -f ${prefix}$myfasta $treearg $linArg -e $combd -m $combout $penArg");
125
126 #print "[$cmd]\n";
127 #if (!defined($noOverWrite) || ! -e $combout || -z $combout ) {
128 # runCmd($cmd);
129 # }
130 }
131
132 close(RUNQ);
133 $mailnotify = $mailnotify ? "-m '$mailnotify'": '';
134 if ($gridengine) {
135 open(RUNQ, '>'.$CMDQFile) || die("Error creating $CMDQFile!\n");
136 foreach my $l (@cmdqueue) {
137 print RUNQ $l."\n";
138 }
139 close(RUNQ);
140 &runCmd("gridx -g $gridengine -p $maxCPUs -S -f $CMDQFile $mailnotify -O logs_jigs_run -q");
141 }
142 else { # run locally:
143 foreach my $cmd (@cmdqueue) {
144 &runCmd($cmd);
145 }
146 }
147
148 exit(0);
149
150 sub writeEvidenceFile {
151 my ($tfname,$evlst,$prefix) = @_;
152 my @evlst = split(/:/,$evlst);
153 my $evdata = $tfname.'.run';
154 my $status = 0;
155 open(EVDATA, '>'.$tfname) || die ("Cannot create evidence file: $tfname\n");
156 for(my $iter = 0; $iter <= $#evlst; $iter++) {
157 print EVDATA "$prefix$evlst[$iter]\n";
158 }
159 close(EVDATA);
160 }
161
162 sub runCmd {
163 my $cmd = $_[0];
164 print STDERR "#>running: $cmd\n";
165 system($cmd);
166 }

Properties

Name Value
svn:executable *