ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/dbxref2bcp.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (13 years, 1 month ago) by gpertea
File size: 2511 byte(s)
Log Message:
Line User Rev File contents
1 gpertea 23 #!/usr/bin/perl
2     use strict;
3     use Getopt::Std;
4     use LWP::Simple;
5     use FindBin;use lib $FindBin::Bin;
6     #use dbSession;
7     my $url='http://www.expasy.org/cgi-bin/lists?dbxref.txt';
8     my $usage = qq/Usage:
9     dbxref2bcp.pl [-f <input_dbxref.txt>]
10    
11     Parses the UniProt's dbxref.txt info preparing it for loading into geanno db,
12     table xrefdbs. The required data file is either given explicitely or
13     downloaded from the embedded url:
14     $url
15     /;
16    
17     umask 0002;
18     getopts('f:') || die($usage."\n");
19     my $file=$Getopt::Std::opt_f;
20     unless ($file) {
21     print STDERR "downloading file..\n";
22     $file='dbxref_uniprot.txt';
23     unlink($file);
24     die("Error: file $file already exists (couldn't remove!)\n")
25     if -f $file;
26     die "Error downloading with getstore()!\n"
27     if is_error(getstore($url, $file));
28     die "Error: file $file is non-existent or zero size after retrieval!\n"
29     unless -s $file;
30     print STDERR "Download OK.\n";
31     }
32    
33     my @knowntags=('Abbrev', 'Name', 'Cat', 'LinkTp', 'Ref', 'Server', 'Db_URL', 'Note');
34     # xrefdb name cat linktp linktp_info server db_url note
35     my %known;
36     @known{@knowntags}=(1) x scalar(@knowntags);
37    
38     open(INF, $file) || die("Error opening file $file!\n");
39    
40     my ($curtag, $curval);
41     open(OUTF, '>xrefdbs.bcp') || die("Error creating file xrefdbs.bcp!\n");
42    
43     my %val;
44     while (<INF>) {
45     if (m/^$/) {
46     #end record
47     if ($curtag) {
48     storeCurVal();
49     putValues();
50     ($curtag,$curval)=(undef,undef);
51     }
52     next;
53     }
54     if (m/^Abbrev:\s*(.+)/) {
55     ($curtag, $curval)=('Abbrev',$1);
56     putValues();
57     next;
58     }
59     next unless $curtag;
60     chomp;
61     if (m/^(\S+)\s*:(.+)/) { #new tag
62     my ($t, $v)=($1,$2);
63     die("Error: unrecognized tag $t at '$_'!\n") unless exists $known{$t} || $t eq 'AC';
64     storeCurVal() unless $t eq 'AC';
65     $curtag=$t;
66     $curval=$v;
67     if ($curtag eq 'LinkTp') {
68     if ($v=~m/^\s*(\w[\w ]+)\;\s+(\S.+)/) {
69     $curval=$1;
70     $val{$curtag}=$curval;
71     $curtag='linfo';
72     $curval=$2;
73     }
74     }
75     }
76     elsif (m/^\s+(.+)/) {
77     my $vx=$1;
78     $vx=~s/^\s+//;$vx=~s/\s+$//;
79     $curval.=' '.$vx;
80     }
81     chomp;
82     }
83    
84     storeCurVal() if ($curtag);
85     putValues();
86    
87     close(INF);
88     close(OUTF);
89    
90     # map { print STDERR $_."\t".$known{$_}."\n" } @knowntags;
91    
92     sub putValues {
93     return unless keys(%val);
94     my @vals = map { $val{$_} } @knowntags;
95     print OUTF join("\t",@vals)."\n";
96     undef(%val);
97     }
98    
99     sub storeCurVal {
100     $curval=~s/^\s+//; $curval=~s/\s+$//;
101     $val{$curtag}=$curval;
102     $known{$curtag}=length($curval) if $known{$curtag}<length($curval);
103     }

Properties

Name Value
svn:executable *