1 |
gpertea |
23 |
#!/usr/bin/perl |
2 |
|
|
use strict; |
3 |
|
|
use FindBin; |
4 |
|
|
|
5 |
|
|
umask 0002; |
6 |
|
|
#the line below is needed if pvmsx is used |
7 |
|
|
# also, the error condition is set only by the presence of $file |
8 |
|
|
$ENV{'PATH'}=$FindBin::Bin.':'.$ENV{'PATH'}; |
9 |
|
|
|
10 |
|
|
#so for pvmsx to consider the task was successful, $file must be deleted! |
11 |
|
|
#============== |
12 |
|
|
# 1 is the name of the fasta sequence input file |
13 |
|
|
# 2 is the # of sequences in ${1} should = 1 for this script |
14 |
|
|
# 3 is the slice no. being processed by sx |
15 |
|
|
# 4 is 0 if not the last file, 1 if the last file |
16 |
|
|
# 5 is the # of sequences skipped initially |
17 |
|
|
# 6 is the # of sequences to be processed (-1 = ALL) |
18 |
|
|
# 7 user parameter |
19 |
|
|
# 1 2 3 4 5 6 |
20 |
|
|
my ($file, $numpass, $slice_num, $last, $skipped, $total, $serverport, $dbdir)=@ARGV; |
21 |
|
|
#print STDERR "running: $0 ".join(' ',@ARGV)."\n"; |
22 |
|
|
my $usage=q{ |
23 |
|
|
Slice processing script for blat protein-to-dnax mode; |
24 |
|
|
Never use without a parent controllers (e.g. gridx). |
25 |
|
|
Must start the server prior to running this gridx command. |
26 |
|
|
Usage example: |
27 |
|
|
gridx -q -N -O logs -g condor -p 20 -n 2000 -i proteins.fa blat_client.psx server:port /fullpath/to/dbseqdir |
28 |
|
|
}; |
29 |
|
|
|
30 |
|
|
die $usage."\n" unless -s $file && $total && $dbdir; |
31 |
|
|
my $fout=$file.'.gff3'; |
32 |
|
|
my ($server, $port)=split(/:/, $serverport); |
33 |
|
|
die $usage."\n" unless $port>255; |
34 |
|
|
die $usage."\nInvalid data directory $dbdir" unless -d $dbdir; |
35 |
|
|
my $log_file='log_std'; |
36 |
|
|
my $err_file='err_log'; |
37 |
|
|
open(STDERR, '>>'.$err_file); |
38 |
|
|
open(STDOUT, '>>'.$log_file); |
39 |
|
|
|
40 |
|
|
#my $toskip=($file =~ m/_\@(\d+)_v\d+\.\d+/) ? $1 : $skipped+$numpass*($slice_num-1); |
41 |
|
|
#my ($srvhost, $srvport)=split(/\:/,$srv); |
42 |
|
|
#$srvport=8080 unless $srvport; |
43 |
|
|
my $cmd="gfClient -minScore=50 -minIdentity=70 -t=dnax -q=prot -out=gff -nohead $server $port $dbdir $file $fout"; |
44 |
|
|
|
45 |
|
|
#my $cmd="gmap -D $dbdir -d $gmapdb -B 2 -f 1 $file > $gmap_res"; |
46 |
|
|
my $slno=sprintf("slice:%09d",$slice_num); |
47 |
|
|
print STDERR ">>$slno: $cmd\n"; |
48 |
|
|
&runCmd($cmd, $fout); |
49 |
|
|
|
50 |
|
|
print STDERR "<<$slno: done.\n"; |
51 |
|
|
unlink($file); |
52 |
|
|
exit 0; |
53 |
|
|
|
54 |
|
|
sub runCmd { |
55 |
|
|
my ($docmd, @todel) = @_; |
56 |
|
|
my $errmsg = `($docmd) 2>&1`; |
57 |
|
|
if ($? || ($errmsg =~ m/Error|Segmentation|Failed|Invalid/is)) { |
58 |
|
|
print STDERR "!Error at:\n$docmd\n"; |
59 |
|
|
print STDERR "$errmsg\n"; |
60 |
|
|
system("cp $file err.at.$file"); |
61 |
|
|
foreach (@todel) { |
62 |
|
|
unlink($_); |
63 |
|
|
} |
64 |
|
|
# exit(1); |
65 |
|
|
return 0; |
66 |
|
|
} |
67 |
|
|
} |