1 |
duarte |
264 |
import gnu.getopt.Getopt; |
2 |
|
|
|
3 |
|
|
import java.io.BufferedReader; |
4 |
|
|
import java.io.FileReader; |
5 |
|
|
import java.io.IOException; |
6 |
|
|
import java.sql.SQLException; |
7 |
|
|
|
8 |
|
|
|
9 |
filippis |
398 |
//import proteinstructure.CiffilePdb; |
10 |
|
|
//import proteinstructure.CiffileFormatError; |
11 |
duarte |
264 |
import proteinstructure.Graph; |
12 |
|
|
import proteinstructure.Pdb; |
13 |
|
|
import proteinstructure.PdbChainCodeNotFoundError; |
14 |
|
|
import proteinstructure.PdbCodeNotFoundError; |
15 |
|
|
import proteinstructure.PdbaseInconsistencyError; |
16 |
|
|
import proteinstructure.PdbasePdb; |
17 |
|
|
import proteinstructure.PdbfileFormatError; |
18 |
|
|
import proteinstructure.PdbfilePdb; |
19 |
|
|
import tools.MySQLConnection; |
20 |
|
|
|
21 |
|
|
|
22 |
|
|
public class genDbGraph { |
23 |
|
|
/*------------------------------ constants ------------------------------*/ |
24 |
|
|
|
25 |
|
|
public static final String PDB_DB = "pdbase"; |
26 |
|
|
public static final String DB_HOST = "white"; |
27 |
|
|
public static final String DB_USER = getUserName(); |
28 |
|
|
public static final String DB_PWD = "nieve"; |
29 |
|
|
public static final String DSSP_EXE = "/project/StruPPi/bin/dssp"; |
30 |
|
|
public static final String DSSP_PARAMS = "--"; |
31 |
filippis |
396 |
public static final String NACCESS_EXE = "/project/StruPPi/bin/naccess"; |
32 |
|
|
public static final String NACCESS_PARAMS = ""; |
33 |
duarte |
264 |
|
34 |
|
|
//public static double cutoff = 4.2; |
35 |
|
|
//public static String edgeType = "ALL"; |
36 |
|
|
|
37 |
|
|
/*---------------------------- private methods --------------------------*/ |
38 |
|
|
/** |
39 |
|
|
* Get user name from operating system (for use as database username). |
40 |
|
|
* */ |
41 |
|
|
private static String getUserName() { |
42 |
|
|
String user = null; |
43 |
|
|
user = System.getProperty("user.name"); |
44 |
|
|
if(user == null) { |
45 |
|
|
System.err.println("Could not get user name from operating system."); |
46 |
|
|
} |
47 |
|
|
return user; |
48 |
|
|
} |
49 |
|
|
|
50 |
|
|
public static void main(String[] args) throws IOException { |
51 |
|
|
|
52 |
|
|
|
53 |
|
|
String help = "Usage, 3 options:\n" + |
54 |
filippis |
407 |
"1) genDbGraph -i <listfile> -d <distance_cutoff> -t <contact_type> -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" + |
55 |
|
|
"2) genDbGraph -p <pdb_code> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" + |
56 |
|
|
"3) genDbGraph -f <pdbfile> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> -s <seq_sep> -o <output_db> [-m <mode>] \n" + |
57 |
duarte |
359 |
"\nA comma separated list of contact types and distance cutoffs can be given instead of just 1, e.g. -d 8.0,8.5 -t Ca,Cb will generate the graphs for Ca at 8.0 and for Cb at 8.5\n" + |
58 |
|
|
"If only 1 contact type given and multiple cutoffs, graphs will be generated at all the cutoffs for the one contact type\n"+ |
59 |
|
|
"\nIn case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" + |
60 |
duarte |
358 |
"\nIf pdbase_db not specified, the default pdbase will be used\n" + |
61 |
|
|
"\nSecondary structure will be taken from pdbase database. If reading from pdb file and the pdb file is missing the secondary structure, then it will be assigned using dssp\n"; |
62 |
duarte |
264 |
|
63 |
|
|
String listfile = ""; |
64 |
|
|
String[] pdbCodes = null; |
65 |
|
|
String[] pdbChainCodes = null; |
66 |
|
|
String pdbfile = ""; |
67 |
|
|
String pdbaseDb = PDB_DB; |
68 |
duarte |
359 |
String[] edgeTypes = null; |
69 |
|
|
double[] cutoffs = null; |
70 |
filippis |
407 |
int[] seqseps = null; |
71 |
duarte |
264 |
String outputDb = ""; |
72 |
filippis |
396 |
String mode = "GRAPH"; |
73 |
duarte |
264 |
|
74 |
filippis |
407 |
Getopt g = new Getopt("genDbGraph", args, "i:p:c:f:d:t:s:o:D:m:h?"); |
75 |
duarte |
264 |
int c; |
76 |
|
|
while ((c = g.getopt()) != -1) { |
77 |
|
|
switch(c){ |
78 |
|
|
case 'i': |
79 |
|
|
listfile = g.getOptarg(); |
80 |
|
|
break; |
81 |
|
|
case 'p': |
82 |
|
|
pdbCodes = g.getOptarg().split(","); |
83 |
|
|
break; |
84 |
|
|
case 'c': |
85 |
|
|
pdbChainCodes = g.getOptarg().split(","); |
86 |
|
|
break; |
87 |
|
|
case 'f': |
88 |
|
|
pdbfile = g.getOptarg(); |
89 |
|
|
break; |
90 |
|
|
case 'd': |
91 |
duarte |
359 |
String[] cutoffsStr = g.getOptarg().split(","); |
92 |
|
|
cutoffs = new double[cutoffsStr.length]; |
93 |
|
|
for (int i =0;i<cutoffs.length;i++) { |
94 |
|
|
cutoffs[i] = Double.valueOf(cutoffsStr[i]); |
95 |
|
|
} |
96 |
duarte |
264 |
break; |
97 |
|
|
case 't': |
98 |
duarte |
359 |
edgeTypes = g.getOptarg().split(","); |
99 |
duarte |
264 |
break; |
100 |
filippis |
407 |
case 's': |
101 |
|
|
String[] seqsepsStr = g.getOptarg().split(","); |
102 |
|
|
seqseps = new int[seqsepsStr.length]; |
103 |
|
|
for (int i =0;i<seqseps.length;i++) { |
104 |
|
|
seqseps[i] = Integer.valueOf(seqsepsStr[i]); |
105 |
|
|
} |
106 |
|
|
break; |
107 |
duarte |
264 |
case 'o': |
108 |
|
|
outputDb = g.getOptarg(); |
109 |
|
|
break; |
110 |
|
|
case 'D': |
111 |
|
|
pdbaseDb = g.getOptarg(); |
112 |
|
|
break; |
113 |
filippis |
396 |
case 'm': |
114 |
|
|
mode = g.getOptarg(); |
115 |
filippis |
398 |
break; |
116 |
duarte |
264 |
case 'h': |
117 |
|
|
case '?': |
118 |
|
|
System.out.println(help); |
119 |
|
|
System.exit(0); |
120 |
|
|
break; // getopt() already printed an error |
121 |
|
|
} |
122 |
|
|
} |
123 |
|
|
|
124 |
duarte |
359 |
if (outputDb.equals("") || edgeTypes==null || cutoffs==null) { |
125 |
|
|
System.err.println("Some missing option\n"); |
126 |
duarte |
264 |
System.err.println(help); |
127 |
|
|
System.exit(1); |
128 |
|
|
} |
129 |
duarte |
359 |
if (edgeTypes.length!=cutoffs.length && edgeTypes.length!=1) { |
130 |
|
|
System.err.println("Not same number of contact types as cutoffs given\n"); |
131 |
|
|
System.err.println(help); |
132 |
|
|
System.exit(1); |
133 |
|
|
} |
134 |
filippis |
407 |
if (seqseps != null && edgeTypes.length!=seqseps.length) { |
135 |
|
|
System.err.println("Not same number of contact types as sequence separations given\n"); |
136 |
|
|
System.err.println(help); |
137 |
|
|
System.exit(1); |
138 |
|
|
} |
139 |
duarte |
264 |
if (listfile.equals("") && pdbCodes==null && pdbfile.equals("")){ |
140 |
duarte |
359 |
System.err.println("Either a listfile, some pdb codes/chain codes or a pdbfile must be given\n"); |
141 |
duarte |
264 |
System.err.println(help); |
142 |
|
|
System.exit(1); |
143 |
|
|
} |
144 |
|
|
if ((!listfile.equals("") && pdbCodes!=null) || (!listfile.equals("") && !pdbfile.equals("")) || (pdbCodes!=null && !pdbfile.equals(""))) { |
145 |
duarte |
359 |
System.err.println("Options -p/-c, -i and -f/-c are exclusive. Use only one of them\n"); |
146 |
duarte |
264 |
System.err.println(help); |
147 |
|
|
System.exit(1); |
148 |
|
|
} |
149 |
filippis |
396 |
if (!(mode.equals("GRAPH") || mode.equals("PDB") || mode.equals("BOTH"))) { |
150 |
|
|
System.err.println("Allowed values for mode:GRAPH,PDB,BOTH."); |
151 |
|
|
System.err.println(help); |
152 |
|
|
System.exit(1); |
153 |
|
|
} |
154 |
duarte |
359 |
|
155 |
|
|
// setting edgeTypes in case only 1 was given with multiple cutoffs |
156 |
|
|
if (edgeTypes.length==1 && cutoffs.length>1) { |
157 |
|
|
String edgeType = edgeTypes[0]; |
158 |
|
|
edgeTypes = new String[cutoffs.length]; |
159 |
|
|
for (int i=0;i<cutoffs.length;i++){ |
160 |
|
|
edgeTypes[i] = edgeType; |
161 |
|
|
} |
162 |
|
|
} |
163 |
duarte |
264 |
|
164 |
|
|
|
165 |
|
|
MySQLConnection conn = null; |
166 |
|
|
|
167 |
|
|
try{ |
168 |
|
|
conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD); |
169 |
filippis |
407 |
conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL"); |
170 |
duarte |
264 |
} catch (Exception e) { |
171 |
|
|
System.err.println("Error opening database connection. Exiting"); |
172 |
|
|
System.exit(1); |
173 |
|
|
} |
174 |
|
|
|
175 |
|
|
|
176 |
|
|
if (pdbfile.equals("")){ |
177 |
|
|
|
178 |
|
|
if (!listfile.equals("")) { |
179 |
|
|
BufferedReader fpdb = new BufferedReader(new FileReader(listfile)); |
180 |
|
|
String line = ""; |
181 |
|
|
int numLines = 0; |
182 |
|
|
fpdb.mark(100000); |
183 |
|
|
while ((line = fpdb.readLine() ) != null ) { |
184 |
duarte |
285 |
if (line.length()>0) numLines++; |
185 |
duarte |
264 |
} |
186 |
|
|
fpdb.reset(); |
187 |
|
|
pdbCodes = new String[numLines]; |
188 |
|
|
pdbChainCodes = new String[numLines]; |
189 |
|
|
numLines = 0; |
190 |
|
|
while ((line = fpdb.readLine() ) != null ) { |
191 |
duarte |
285 |
pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase(); |
192 |
|
|
pdbChainCodes[numLines] = line.split("\\s+")[1]; |
193 |
duarte |
264 |
numLines++; |
194 |
|
|
} |
195 |
duarte |
358 |
fpdb.close(); |
196 |
duarte |
264 |
} |
197 |
|
|
|
198 |
|
|
int numPdbs = 0; |
199 |
|
|
|
200 |
|
|
for (int i=0;i<pdbCodes.length;i++) { |
201 |
|
|
String pdbCode = pdbCodes[i]; |
202 |
|
|
String pdbChainCode = pdbChainCodes[i]; |
203 |
|
|
|
204 |
|
|
try { |
205 |
duarte |
359 |
|
206 |
|
|
System.out.println("Getting pdb data for "+pdbCode+"_"+pdbChainCode); |
207 |
duarte |
285 |
|
208 |
duarte |
277 |
Pdb pdb = new PdbasePdb(pdbCode, pdbChainCode, pdbaseDb, conn); |
209 |
filippis |
398 |
//Pdb pdb = new CiffilePdb(new File("/project/StruPPi/BiO/DBd/PDB-REMEDIATED/data/structures/unzipped/all/mmCIF/"+pdbCode+".cif"), pdbChainCode); |
210 |
filippis |
396 |
if (!mode.equals("GRAPH")) { |
211 |
filippis |
398 |
try { |
212 |
|
|
pdb.runDssp(DSSP_EXE, DSSP_PARAMS); |
213 |
|
|
} catch (Exception e) { |
214 |
|
|
System.err.println(e.getMessage()); |
215 |
|
|
} |
216 |
|
|
try { |
217 |
|
|
pdb.checkScop("1.71", false); |
218 |
|
|
} catch (Exception e) { |
219 |
|
|
System.err.println(e.getMessage()); |
220 |
|
|
} |
221 |
|
|
try { |
222 |
|
|
pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS); |
223 |
|
|
} catch (Exception e) { |
224 |
|
|
System.err.println(e.getMessage()); |
225 |
|
|
} |
226 |
|
|
try { |
227 |
|
|
int mistakes = pdb.checkConsurfHssp(false); |
228 |
|
|
System.out.println("ConsurfHssp Mistakes:"+mistakes); |
229 |
|
|
} catch (Exception e) { |
230 |
|
|
System.err.println(e.getMessage()); |
231 |
|
|
} |
232 |
|
|
try { |
233 |
|
|
pdb.checkEC(false); |
234 |
|
|
} catch (Exception e) { |
235 |
|
|
System.err.println(e.getMessage()); |
236 |
|
|
} |
237 |
|
|
try { |
238 |
|
|
int mistakes = pdb.checkCSA("2.2.5", false); |
239 |
|
|
System.out.println("CSA Mistakes:"+mistakes); |
240 |
|
|
} catch (Exception e) { |
241 |
|
|
System.err.println(e.getMessage()); |
242 |
|
|
} |
243 |
filippis |
407 |
//pdb.writeToDb(conn,outputDb); |
244 |
|
|
pdb.writeToDbFast(conn, outputDb); |
245 |
filippis |
396 |
} |
246 |
duarte |
359 |
// get graphs |
247 |
filippis |
396 |
if (!mode.equals("PDB")) { |
248 |
|
|
for (int j = 0; j<edgeTypes.length; j++) { |
249 |
filippis |
411 |
System.out.print("--> graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]); |
250 |
filippis |
396 |
|
251 |
|
|
Graph graph = pdb.get_graph(edgeTypes[j], cutoffs[j]); |
252 |
filippis |
407 |
if (seqseps != null) { |
253 |
|
|
if (seqseps[j] > 1) { |
254 |
filippis |
411 |
System.out.print(" and sequence separation >= "+seqseps[j]); |
255 |
filippis |
407 |
graph.restrictContactsToMinRange(seqseps[j]); |
256 |
|
|
} |
257 |
|
|
} |
258 |
|
|
//graph.write_graph_to_db(conn,outputDb); |
259 |
|
|
graph.write_graph_to_db_fast(conn,outputDb); |
260 |
filippis |
396 |
|
261 |
filippis |
411 |
System.out.println(); |
262 |
filippis |
396 |
numPdbs++; |
263 |
|
|
} |
264 |
duarte |
359 |
} |
265 |
duarte |
285 |
|
266 |
duarte |
264 |
} catch (PdbaseInconsistencyError e) { |
267 |
duarte |
277 |
System.err.println("Inconsistency in " + pdbCode + pdbChainCode); |
268 |
duarte |
264 |
} catch (PdbCodeNotFoundError e) { |
269 |
duarte |
277 |
System.err.println("Couldn't find pdb code "+pdbCode); |
270 |
duarte |
264 |
} catch (SQLException e) { |
271 |
duarte |
285 |
System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage()); |
272 |
duarte |
264 |
} catch (PdbChainCodeNotFoundError e) { |
273 |
duarte |
277 |
System.err.println("Couldn't find pdb chain code "+pdbChainCode+" for pdb code "+pdbCode); |
274 |
filippis |
398 |
}/* catch (CiffileFormatError e) { |
275 |
|
|
System.err.println(e.getMessage()); |
276 |
|
|
}*/ |
277 |
duarte |
264 |
|
278 |
|
|
} |
279 |
|
|
|
280 |
|
|
// output results |
281 |
duarte |
359 |
System.out.println("Number of graphs loaded successfully: " + numPdbs); |
282 |
duarte |
264 |
|
283 |
|
|
|
284 |
|
|
} else { |
285 |
|
|
String pdbChainCode = pdbChainCodes[0]; |
286 |
|
|
try { |
287 |
duarte |
359 |
|
288 |
|
|
System.out.println("Getting chain "+pdbChainCode+" from pdb file "+pdbfile); |
289 |
|
|
|
290 |
duarte |
264 |
Pdb pdb = new PdbfilePdb(pdbfile,pdbChainCode); |
291 |
|
|
if (!pdb.hasSecondaryStructure()) { |
292 |
|
|
pdb.runDssp(DSSP_EXE, DSSP_PARAMS); |
293 |
|
|
} |
294 |
filippis |
396 |
if (!mode.equals("GRAPH")) { |
295 |
filippis |
398 |
try { |
296 |
|
|
pdb.runDssp(DSSP_EXE, DSSP_PARAMS); |
297 |
|
|
} catch (Exception e) { |
298 |
|
|
System.err.println(e.getMessage()); |
299 |
|
|
} |
300 |
|
|
try { |
301 |
|
|
pdb.checkScop("1.71", false); |
302 |
|
|
} catch (Exception e) { |
303 |
|
|
System.err.println(e.getMessage()); |
304 |
|
|
} |
305 |
|
|
try { |
306 |
|
|
pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS); |
307 |
|
|
} catch (Exception e) { |
308 |
|
|
System.err.println(e.getMessage()); |
309 |
|
|
} |
310 |
|
|
try { |
311 |
|
|
int mistakes = pdb.checkConsurfHssp(false); |
312 |
|
|
System.out.println("ConsurfHssp Mistakes:"+mistakes); |
313 |
|
|
} catch (Exception e) { |
314 |
|
|
System.err.println(e.getMessage()); |
315 |
|
|
} |
316 |
|
|
try { |
317 |
|
|
pdb.checkEC(false); |
318 |
|
|
} catch (Exception e) { |
319 |
|
|
System.err.println(e.getMessage()); |
320 |
|
|
} |
321 |
|
|
try { |
322 |
|
|
int mistakes = pdb.checkCSA("2.2.5", false); |
323 |
|
|
System.out.println("CSA Mistakes:"+mistakes); |
324 |
|
|
} catch (Exception e) { |
325 |
|
|
System.err.println(e.getMessage()); |
326 |
|
|
} |
327 |
filippis |
396 |
pdb.writeToDb(conn,outputDb); |
328 |
filippis |
411 |
pdb.writeToDbFast(conn, outputDb); |
329 |
filippis |
396 |
} |
330 |
duarte |
359 |
|
331 |
|
|
// get graphs |
332 |
filippis |
396 |
if (!mode.equals("PDB")) { |
333 |
|
|
for (int j = 0; j<edgeTypes.length; j++) { |
334 |
filippis |
411 |
System.out.print("--> graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]); |
335 |
filippis |
396 |
|
336 |
|
|
Graph graph = pdb.get_graph(edgeTypes[j], cutoffs[j]); |
337 |
filippis |
407 |
if (seqseps != null) { |
338 |
|
|
if (seqseps[j] > 1) { |
339 |
filippis |
411 |
System.out.print(" and sequence separation >= "+seqseps[j]); |
340 |
filippis |
407 |
graph.restrictContactsToMinRange(seqseps[j]); |
341 |
|
|
} |
342 |
|
|
} |
343 |
|
|
//graph.write_graph_to_db(conn,outputDb); |
344 |
|
|
graph.write_graph_to_db_fast(conn,outputDb); |
345 |
filippis |
411 |
|
346 |
|
|
System.out.println(); |
347 |
filippis |
396 |
} |
348 |
duarte |
264 |
} |
349 |
|
|
|
350 |
duarte |
359 |
} catch (SQLException e) { |
351 |
|
|
System.err.println("Couldn't write graph to db, error: "+e.getMessage()); |
352 |
duarte |
264 |
} catch (PdbfileFormatError e) { |
353 |
|
|
System.err.println("pdb file "+pdbfile+" doesn't have right format"); |
354 |
|
|
} catch (PdbChainCodeNotFoundError e) { |
355 |
|
|
System.err.println("chain code "+pdbChainCode+" wasn't found in file "+pdbfile); |
356 |
|
|
} |
357 |
|
|
} |
358 |
duarte |
358 |
|
359 |
|
|
// closing db connection |
360 |
|
|
conn.close(); |
361 |
duarte |
264 |
} |
362 |
|
|
|
363 |
|
|
} |