1 |
import gnu.getopt.Getopt; |
2 |
|
3 |
import java.io.BufferedReader; |
4 |
import java.io.File; |
5 |
import java.io.FileOutputStream; |
6 |
import java.io.FileReader; |
7 |
import java.io.IOException; |
8 |
import java.io.PrintStream; |
9 |
import java.sql.SQLException; |
10 |
|
11 |
import proteinstructure.Pdb; |
12 |
import proteinstructure.PdbCodeNotFoundError; |
13 |
import proteinstructure.PdbLoadError; |
14 |
import proteinstructure.PdbasePdb; |
15 |
import tools.MySQLConnection; |
16 |
|
17 |
|
18 |
public class dumpseq { |
19 |
/*------------------------------ constants ------------------------------*/ |
20 |
|
21 |
public static final String PDB_DB = "pdbase"; |
22 |
public static final String DB_HOST = "white"; |
23 |
public static final String DB_USER = MySQLConnection.getUserName(); |
24 |
public static final String DB_PWD = "nieve"; |
25 |
|
26 |
public static final String GAP_CHARACTER = "-"; |
27 |
|
28 |
public static void main(String[] args) throws IOException { |
29 |
|
30 |
|
31 |
String help = "Usage, 3 options:\n" + |
32 |
"1) genGraph -i <listfile> [-o <output_dir> | -f <one_output_file> | -s] [-D <pdbase_db>] \n" + |
33 |
"2) genGraph -p <pdb_code> -c <chain_pdb_code> [-o <output_dir> | -f <one_output_file> | -s] [-D <pdbase_db>] \n" + |
34 |
"Output options: -o one file per sequence, -f one file for all sequences, -s standard output"+ |
35 |
"In case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" + |
36 |
"If pdbase_db not specified, the default pdbase will be used\n"; |
37 |
|
38 |
String listfile = ""; |
39 |
String[] pdbCodes = null; |
40 |
String[] pdbChainCodes = null; |
41 |
String pdbaseDb = PDB_DB; |
42 |
String outputDir = ""; |
43 |
File oneOutputFile = null; |
44 |
boolean stdout = false; |
45 |
|
46 |
Getopt g = new Getopt("genGraph", args, "i:p:c:o:f:D:sh?"); |
47 |
int c; |
48 |
while ((c = g.getopt()) != -1) { |
49 |
switch(c){ |
50 |
case 'i': |
51 |
listfile = g.getOptarg(); |
52 |
break; |
53 |
case 'p': |
54 |
pdbCodes = g.getOptarg().split(","); |
55 |
break; |
56 |
case 'c': |
57 |
pdbChainCodes = g.getOptarg().split(","); |
58 |
break; |
59 |
case 'o': |
60 |
outputDir = g.getOptarg(); |
61 |
break; |
62 |
case 'f': |
63 |
oneOutputFile = new File(g.getOptarg()); |
64 |
break; |
65 |
case 'D': |
66 |
pdbaseDb = g.getOptarg(); |
67 |
break; |
68 |
case 's': |
69 |
stdout = true; |
70 |
break; |
71 |
case 'h': |
72 |
case '?': |
73 |
System.out.println(help); |
74 |
System.exit(0); |
75 |
break; // getopt() already printed an error |
76 |
} |
77 |
} |
78 |
|
79 |
if (listfile.equals("") && pdbCodes==null){ |
80 |
System.err.println("Either a listfile or some pdb codes/chain codes must be given"); |
81 |
System.err.println(help); |
82 |
System.exit(1); |
83 |
} |
84 |
if (!listfile.equals("") && pdbCodes!=null) { |
85 |
System.err.println("Options -p/-c and -i are exclusive. Use only one of them"); |
86 |
System.err.println(help); |
87 |
System.exit(1); |
88 |
} |
89 |
|
90 |
|
91 |
MySQLConnection conn = null; |
92 |
|
93 |
try{ |
94 |
conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD); |
95 |
} catch (Exception e) { |
96 |
System.err.println("Error opening database connection. Exiting"); |
97 |
System.exit(1); |
98 |
} |
99 |
|
100 |
|
101 |
|
102 |
|
103 |
if (!listfile.equals("")) { |
104 |
BufferedReader fpdb = new BufferedReader(new FileReader(listfile)); |
105 |
String line = ""; |
106 |
int numLines = 0; |
107 |
fpdb.mark(100000); |
108 |
while ((line = fpdb.readLine() ) != null ) { |
109 |
if (line.length()>0) numLines++; |
110 |
} |
111 |
fpdb.reset(); |
112 |
pdbCodes = new String[numLines]; |
113 |
pdbChainCodes = new String[numLines]; |
114 |
numLines = 0; |
115 |
while ((line = fpdb.readLine() ) != null ) { |
116 |
pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase(); |
117 |
pdbChainCodes[numLines] = line.split("\\s+")[1]; |
118 |
numLines++; |
119 |
} |
120 |
} |
121 |
|
122 |
int numPdbs = 0; |
123 |
|
124 |
PrintStream Out = null; |
125 |
if (stdout) { |
126 |
Out = System.out; |
127 |
} else if (oneOutputFile!=null) { |
128 |
Out = new PrintStream(new FileOutputStream(oneOutputFile)); |
129 |
} |
130 |
|
131 |
for (int i=0;i<pdbCodes.length;i++) { |
132 |
String pdbCode = pdbCodes[i]; |
133 |
String pdbChainCode = pdbChainCodes[i]; |
134 |
|
135 |
try { |
136 |
|
137 |
Pdb pdb = new PdbasePdb(pdbCode, pdbaseDb, conn); |
138 |
pdb.load(pdbChainCode); |
139 |
|
140 |
String sequence = pdb.getSequence(); |
141 |
|
142 |
File outputFile = new File(outputDir,pdbCode+"_"+pdbChainCode+".fasta"); |
143 |
|
144 |
if (!stdout && oneOutputFile==null) { |
145 |
Out = new PrintStream(new FileOutputStream(outputFile.getAbsolutePath())); |
146 |
} |
147 |
|
148 |
if (!stdout) { // if output of sequence is stdout, then we don't want to print just the sequence without FASTA headers |
149 |
Out.println(">"+pdbCode+"_"+pdbChainCode); |
150 |
} |
151 |
|
152 |
Out.println(sequence); |
153 |
|
154 |
if (!stdout && oneOutputFile==null) { |
155 |
Out.close(); |
156 |
} |
157 |
|
158 |
if (!stdout) { // if output of sequence is stdout, then we don't want to print anything else to stdout |
159 |
System.out.println("Wrote "+pdbCode+"_"+pdbChainCode+".fasta"); |
160 |
} |
161 |
|
162 |
numPdbs++; |
163 |
|
164 |
} catch (PdbLoadError e) { |
165 |
System.err.println("Error loading pdb data for " + pdbCode + pdbChainCode+", specific error: "+e.getMessage()); |
166 |
} catch (PdbCodeNotFoundError e) { |
167 |
System.err.println("Couldn't find pdb code "+pdbCode); |
168 |
} catch (SQLException e) { |
169 |
System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage()); |
170 |
} |
171 |
|
172 |
} |
173 |
|
174 |
if (!stdout && oneOutputFile!=null) { |
175 |
Out.close(); |
176 |
} |
177 |
|
178 |
// output results |
179 |
if (!stdout) { // if output of sequence is stdout, then we don't want to print anything else to stdout |
180 |
System.out.println("Number of dumped sequences: " + numPdbs); |
181 |
} |
182 |
|
183 |
|
184 |
} |
185 |
|
186 |
|
187 |
} |