ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/dbxref2bcp.pl
Revision: 23
Committed: Tue Jul 26 21:44:38 2011 UTC (13 years, 1 month ago) by gpertea
Original Path: ann_bin/dbxref2bcp.pl
File size: 2511 byte(s)
Log Message:
adding misc scripts

Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4 use LWP::Simple;
5 use FindBin;use lib $FindBin::Bin;
6 #use dbSession;
7 my $url='http://www.expasy.org/cgi-bin/lists?dbxref.txt';
8 my $usage = qq/Usage:
9 dbxref2bcp.pl [-f <input_dbxref.txt>]
10
11 Parses the UniProt's dbxref.txt info preparing it for loading into geanno db,
12 table xrefdbs. The required data file is either given explicitely or
13 downloaded from the embedded url:
14 $url
15 /;
16
17 umask 0002;
18 getopts('f:') || die($usage."\n");
19 my $file=$Getopt::Std::opt_f;
20 unless ($file) {
21 print STDERR "downloading file..\n";
22 $file='dbxref_uniprot.txt';
23 unlink($file);
24 die("Error: file $file already exists (couldn't remove!)\n")
25 if -f $file;
26 die "Error downloading with getstore()!\n"
27 if is_error(getstore($url, $file));
28 die "Error: file $file is non-existent or zero size after retrieval!\n"
29 unless -s $file;
30 print STDERR "Download OK.\n";
31 }
32
33 my @knowntags=('Abbrev', 'Name', 'Cat', 'LinkTp', 'Ref', 'Server', 'Db_URL', 'Note');
34 # xrefdb name cat linktp linktp_info server db_url note
35 my %known;
36 @known{@knowntags}=(1) x scalar(@knowntags);
37
38 open(INF, $file) || die("Error opening file $file!\n");
39
40 my ($curtag, $curval);
41 open(OUTF, '>xrefdbs.bcp') || die("Error creating file xrefdbs.bcp!\n");
42
43 my %val;
44 while (<INF>) {
45 if (m/^$/) {
46 #end record
47 if ($curtag) {
48 storeCurVal();
49 putValues();
50 ($curtag,$curval)=(undef,undef);
51 }
52 next;
53 }
54 if (m/^Abbrev:\s*(.+)/) {
55 ($curtag, $curval)=('Abbrev',$1);
56 putValues();
57 next;
58 }
59 next unless $curtag;
60 chomp;
61 if (m/^(\S+)\s*:(.+)/) { #new tag
62 my ($t, $v)=($1,$2);
63 die("Error: unrecognized tag $t at '$_'!\n") unless exists $known{$t} || $t eq 'AC';
64 storeCurVal() unless $t eq 'AC';
65 $curtag=$t;
66 $curval=$v;
67 if ($curtag eq 'LinkTp') {
68 if ($v=~m/^\s*(\w[\w ]+)\;\s+(\S.+)/) {
69 $curval=$1;
70 $val{$curtag}=$curval;
71 $curtag='linfo';
72 $curval=$2;
73 }
74 }
75 }
76 elsif (m/^\s+(.+)/) {
77 my $vx=$1;
78 $vx=~s/^\s+//;$vx=~s/\s+$//;
79 $curval.=' '.$vx;
80 }
81 chomp;
82 }
83
84 storeCurVal() if ($curtag);
85 putValues();
86
87 close(INF);
88 close(OUTF);
89
90 # map { print STDERR $_."\t".$known{$_}."\n" } @knowntags;
91
92 sub putValues {
93 return unless keys(%val);
94 my @vals = map { $val{$_} } @knowntags;
95 print OUTF join("\t",@vals)."\n";
96 undef(%val);
97 }
98
99 sub storeCurVal {
100 $curval=~s/^\s+//; $curval=~s/\s+$//;
101 $val{$curtag}=$curval;
102 $known{$curtag}=length($curval) if $known{$curtag}<length($curval);
103 }

Properties

Name Value
svn:executable *