ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/dust.pl
Revision: 23
Committed: Tue Jul 26 21:44:38 2011 UTC (13 years ago) by gpertea
Original Path: ann_bin/dust.pl
File size: 811 byte(s)
Log Message:
adding misc scripts

Line User Rev File contents
1 gpertea 23 #!/usr/bin/perl
2     use strict;
3     use Getopt::Std;
4     use FindBin;use lib $FindBin::Bin;
5    
6     my $usage = q/Usage:
7     dust.pl [-c <repeat#>] <fasta_file>
8    
9     Masks all repeats of unit length 1 or greater that are repeated at
10     least 4 times.
11     Options:
12     -c sets the repeat count to <repeat#> (default 4)
13     /;
14     umask 0002;
15     getopts('c:o:') || die($usage."\n");
16     my $outfile=$Getopt::Std::opt_o;
17     my $c=$Getopt::Std::opt_c || 4;
18    
19     {
20     local $/="\n>";
21     while (<>) {
22     s/^>//;
23     chomp;
24     my ($header, $seq)=(m/^([^\n]+)\n(.+)/s);
25     $seq =~ tr/\t \n\r//d;
26    
27     $seq =~ s/((\w{2,}?)\2{$c,})/'N' x length $1/oeg;
28     # for poly-nucleotide
29     #$seq =~ s/((\w)\2{6,})/'N' x length $1/oeg;
30     $seq =~ s/((\w)\2{5,})/'N' x length $1/eg;
31     #print $seq."\n";
32     print ">$header\n";
33     print join("\n", unpack('(A70)*', $seq))."\n";
34     }
35     }

Properties

Name Value
svn:executable *