ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/dust.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (13 years, 1 month ago) by gpertea
File size: 811 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4 use FindBin;use lib $FindBin::Bin;
5
6 my $usage = q/Usage:
7 dust.pl [-c <repeat#>] <fasta_file>
8
9 Masks all repeats of unit length 1 or greater that are repeated at
10 least 4 times.
11 Options:
12 -c sets the repeat count to <repeat#> (default 4)
13 /;
14 umask 0002;
15 getopts('c:o:') || die($usage."\n");
16 my $outfile=$Getopt::Std::opt_o;
17 my $c=$Getopt::Std::opt_c || 4;
18
19 {
20 local $/="\n>";
21 while (<>) {
22 s/^>//;
23 chomp;
24 my ($header, $seq)=(m/^([^\n]+)\n(.+)/s);
25 $seq =~ tr/\t \n\r//d;
26
27 $seq =~ s/((\w{2,}?)\2{$c,})/'N' x length $1/oeg;
28 # for poly-nucleotide
29 #$seq =~ s/((\w)\2{6,})/'N' x length $1/oeg;
30 $seq =~ s/((\w)\2{5,})/'N' x length $1/eg;
31 #print $seq."\n";
32 print ">$header\n";
33 print join("\n", unpack('(A70)*', $seq))."\n";
34 }
35 }

Properties

Name Value
svn:executable *