1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
use FindBin; |
4 |
|
5 |
umask 0002; |
6 |
#the line below is needed if pvmsx is used |
7 |
# also, the error condition is set only by the presence of $file |
8 |
chdir($ENV{PVMSX_WD}) if ($ENV{PVMSX_WD}); |
9 |
|
10 |
|
11 |
$ENV{'PATH'}=$FindBin::Bin.':'.$ENV{'PATH'}; |
12 |
|
13 |
#so for pvmsx to consider the task was successful, $file must be deleted! |
14 |
#============== |
15 |
# 1 is the name of the fasta sequence input file |
16 |
# 2 is the # of sequences in ${1} should = 1 for this script |
17 |
# 3 is the slice no. being processed by sx |
18 |
# 4 is 0 if not the last file, 1 if the last file |
19 |
# 5 is the # of sequences skipped initially |
20 |
# 6 is the # of sequences to be processed (-1 = ALL) |
21 |
# 7 user parameter |
22 |
# 1 2 3 4 5 6 |
23 |
my ($file, $numpass, $slice_num, $last, $skipped, $total, $userparams)=@ARGV; |
24 |
#user parameters: |
25 |
# 1) database (full path) |
26 |
# 2) PID cutoff (using 94 if not specified) |
27 |
# 3) maximum overhang |
28 |
# 4) minimum overlap |
29 |
# 5) extra flags (optional) |
30 |
# flags: M = no masking, D= separate database (not all-vs-all) |
31 |
# G = show gaps |
32 |
my ($searchdb,$pid,$maxovh,$minovl, $xflags)=split(/:/,$userparams); |
33 |
my $blast_res=$file.'.tab'; |
34 |
my $nomasking = $xflags=~/M/; |
35 |
my $gapinfo = $xflags=~/G/; |
36 |
my $otherDb = $xflags=~/D/; |
37 |
|
38 |
$pid=25 unless $pid>20; #not used for this tblastx search |
39 |
$minovl=40 unless $minovl; #not used for this blastn |
40 |
my $log_file='log_std'; |
41 |
my $err_file='err_log'; |
42 |
open(STDERR, '>>'.$err_file); |
43 |
open(STDOUT, '>>'.$log_file); |
44 |
|
45 |
my $toskip = $skipped+$numpass*($slice_num-1); |
46 |
$ENV{BLASTMAT}='/fs/sz-user-supported/Linux-i686/packages/wu-blast/matrix'; |
47 |
my $cmd = "tblastx $searchdb $file E=1e-10 B=300000 V=300000 -noseqs -notes -warnings -novalidctxok -cpus=1 -topcomboN 1 "; |
48 |
$cmd.= " -dbrecmin $toskip " if $toskip>0 && $otherDb==0; |
49 |
# $cmd.= "| blastflt PID=25 | bzip2 -cf > $blast_res.bz2"; |
50 |
$cmd.= "| blast2btab PID=25 | bzip2 -f > $blast_res.bz2" ; |
51 |
# $cmd.=$gapinfo ? ' -D5 ':' -D4 '; |
52 |
# $cmd.= ($nomasking) ? ' -FF -UF ' : ' -UT -F "m D" '; |
53 |
# unless ($otherDb) { |
54 |
# $cmd.=' -KT '; |
55 |
# $cmd.= " -k $toskip " if $toskip>0; |
56 |
# } |
57 |
#$cmd.=" > $blast_res"; |
58 |
my $slno=sprintf("slice:%09d",$slice_num); |
59 |
print STDERR ">>$slno: $cmd\n"; |
60 |
|
61 |
my $errmsg = `($cmd) 2>&1`; |
62 |
my $r=$?; |
63 |
print STDERR "<<$slno: done.\n"; |
64 |
|
65 |
if ($r || ($errmsg=~/ERROR/is) || ($errmsg=~/Segmentation/is)) { |
66 |
print STDERR "!Error at:\n$cmd\n"; |
67 |
print STDERR "$errmsg\n"; |
68 |
exit(1); |
69 |
} |
70 |
#------- sorting of results not needed for blastn |
71 |
# $cmd="sort -k11,11g -k10,10nr -k9,9nr $blast_res | bzip2 -cf > $mgblast_res.bz2"; |
72 |
# $errmsg=`($cmd) 2>&1`; |
73 |
#$r=$?; |
74 |
#if (($r && ($r>>8 != 2)) || $errmsg=~m/memory/is) { |
75 |
# die "Error sorting/compressing the results file '$mgblast_res'\n$cmd\n"; |
76 |
# } |
77 |
# unlink($file, $mgblast_res); |
78 |
unlink($file); |
79 |
exit 0; |