ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/gicl_mgbl_short.psx
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (13 years, 2 months ago) by gpertea
File size: 3681 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use FindBin;
4
5 umask 0002;
6 #the line below is needed if pvmsx is used
7 # also, the error condition is set only by the presence of $file
8 chdir($ENV{PVMSX_WD}) if ($ENV{PVMSX_WD});
9
10
11 $ENV{'PATH'}=$FindBin::Bin.':'.$ENV{'PATH'};
12
13 #so for pvmsx to consider the task was successful, $file must be deleted!
14 #==============
15 # 1 is the name of the fasta sequence input file
16 # 2 is the # of sequences in ${1} should = 1 for this script
17 # 3 is the slice no. being processed by sx
18 # 4 is 0 if not the last file, 1 if the last file
19 # 5 is the # of sequences skipped initially
20 # 6 is the # of sequences to be processed (-1 = ALL)
21 # 7 user parameter
22 # 1 2 3 4 5 6
23 my ($file, $numpass, $slice_num, $last, $skipped, $total, $userparams)=@ARGV;
24 #user parameters:
25 # 1) database (full path)
26 # 2) PID cutoff (using 94 if not specified)
27 # 3) maximum overhang
28 # 4) minimum overlap
29 # 5) extra flags (optional)
30 # flags: M = no masking, D= separate database (not all-vs-all)
31 # G = show gaps
32 my ($searchdb,$pid,$maxovh,$minovl, $xflags)=split(/:/,$userparams);
33 my $mgblast_res=$file.'.tab';
34 my $nomasking = $xflags=~/M/;
35 my $gapinfo = $xflags=~/G/;
36 my $otherDb = $xflags=~/D/;
37
38 $pid=94 unless $pid>50;
39 $minovl=40 unless $minovl;
40 my $log_file='log_std';
41 my $err_file='err_log';
42 open(STDERR, '>>'.$err_file);
43 open(STDOUT, '>>'.$log_file);
44
45 my $toskip=($file =~ m/_\@(\d+)_v\d+\.\d+/) ? $1 : $skipped+$numpass*($slice_num-1);
46
47 #= mgblast weakness - when some query seqs are entirely masked
48 #= or what's left is just low complexity, it will abort the search
49 #= so let's filter the input in advance
50 unless ($nomasking) {
51 my $renfile=$file.'_cpy';
52 rename($file, $renfile) || die "Cannot rename '$file' to '$renfile'!\n";
53 local $/="\n>";
54 open (INFILE, "mdust -m L < $renfile |") || die "Cannot open renamed file '$renfile'\n";
55 open (OUTFILE, '>'.$file) || die "Cannot create flt file '$file'\n";
56 open (MASKED, '>>masked.lst') || die "Cannot open masked.lst for append\n";
57 open (TOOSHORT, '>>tooshort.lst') || die "Cannot open tooshort.lst for append\n";
58
59 my $counter;
60 while(<INFILE>) {
61 s/^>//;
62 chomp;
63 $counter++;
64 my ($defline, $seq)=(m/^(.+?)\n(.+)/s);
65 my $s=$seq;
66 $s =~ tr/\n \t//d;
67 my $seqlen=length($s);
68 if ($seqlen<$minovl) {
69 print TOOSHORT $defline."\n";
70 next;
71 }
72 my @uc=($s =~ m/([A-M,O-W,Y,Z]+)/g);
73 my $valid;
74 foreach my $l (@uc) {
75 if (length($l)>12) { # a valid stretch of at least 12nt would suffice
76 $valid=1;
77 last;
78 }
79 }
80 if ($valid) {
81 print OUTFILE '>'.$defline."\n".$seq."\n";
82 }
83 else {
84 print MASKED $defline."\n";
85 }
86 } #while
87 close(INFILE);close(OUTFILE);close(MASKED);close(TOOSHORT);
88 die "Error: no sequence in this slice!\n" unless $counter;
89 unlink($renfile);
90 }
91
92 my $cmd="mgblast -i $file -d $searchdb -p $pid -W12 -X12 ".
93 " -JF -v100 -b100 -C$minovl -H$maxovh ";
94 #$cmd.=$gapinfo ? ' -D5 ':' -D4 ';
95 $cmd.= ' -D4 -UF -F"m D" '; #use dust filter
96 #$cmd.= ($nomasking) ? ' -FF -UF ' : ' -UT -F "m D" ';
97 unless ($otherDb) {
98 $cmd.=' -KT ';
99 $cmd.= " -k $toskip " if $toskip>0;
100 }
101 $cmd.=" > $mgblast_res";
102 my $slno=sprintf("slice:%09d",$slice_num);
103 print STDERR ">>$slno: $cmd\n";
104
105 my $errmsg = `($cmd) 2>&1`;
106 print STDERR "<<$slno: done.\n";
107
108 if ($? || ($errmsg=~/ERROR/i) || ($errmsg=~/Segmentation/i)) {
109 print STDERR "!Error at:\n$cmd\n";
110 print STDERR "$errmsg\n";
111 exit(1);
112 }
113 $cmd="sort -k11,11g -k10,10nr -k9,9nr $mgblast_res | bzip2 -cf > $mgblast_res.bz2";
114 my $r=system($cmd);
115 if ($r && ($r>>8 != 2) ) {
116 die "Error sorting/compressing the results file '$mgblast_res'\n$cmd\n";
117 }
118
119 unlink($file, $mgblast_res);
120 exit 0;

Properties

Name Value
svn:executable *