ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/genDbGraph.java
Revision: 503
Committed: Tue Jan 8 19:51:39 2008 UTC (16 years, 9 months ago) by filippis
File size: 14412 byte(s)
Log Message:
createGraphDb:
-scop_graph table added
-!!!!residue serial fields (num, i_num, j_num) are changed to unsigned

!!!!ProtStructGraph:
-interSSE variable added
-getResidueSerial abstract method added
-restrictContactsBetweenSs method added

!!!!AIGraph:
-getResidueSerial method added

RIGraph:
-!!!!getResidueSerial method added
-scop graphs are written to db correctly
-interSSE variable taken into account for CR db field

DbRIgraph:
-changes made so to be able to read from db using a scop id
-interSSE and minSeqSep are set now in get_db_graph_info()
-!!!!FIXED BUG:fullLength in read_graph_from_db is not set to the size of the serials2nodes TreeMap instead of the maximum serial in serials2nodes. It was giving wrong result for scop graphs.

testDbRIGraph added:
It reads one graph from a source db based either 
-on pdbcode, chaincode and graph properties or 
-on scop id and graph properties or 
-on a graph id 
and it writes the graph to a destination db.

Pdb:
-unused scop regions are now removed in restrictToScopDomain
-unused residues are also removed from resser2pdbresser, pdbresser2resser, resser2allrsa, resser2scrsa, resser2consurfhsspscore, resser2consurfhsspcolor, catalSiteSet
-!!!!sequence is reset to scop sequence and fullLength to the length of the scop sequence

CatalSiteSet:
- removeCatalSiteRes(int resser) method added

CatalyticSite:
-remRes(int resser) method added

Scop:
-remove(ScopRegion e) method added

genDbGraph:Comments only added to demonstrate new functionality.
-comment added to show how to use restrictContactsBetweenSs
-comment added to show how to use restrictToScopDomain
-runDssp now is always run and not only when mode != "GRAPH". This has been changed since now the contact range might depend on the ss assignment (restrictContactsBetweenSs) and we want to ensure consistent results
-runNaccess moved to the bottom so it is always run last. In this way if restrictToScopDomain is used, we don't have to run naccess twice.


Line File contents
1 import gnu.getopt.Getopt;
2
3 import java.io.BufferedReader;
4 import java.io.FileReader;
5 import java.io.IOException;
6 import java.sql.SQLException;
7
8
9 //import proteinstructure.CiffilePdb;
10 import proteinstructure.PdbLoadError;
11 import proteinstructure.RIGraph;
12 import proteinstructure.Pdb;
13 import proteinstructure.PdbCodeNotFoundError;
14 import proteinstructure.PdbasePdb;
15 import proteinstructure.PdbfilePdb;
16 import proteinstructure.SecStrucElement;
17 import tools.MySQLConnection;
18
19
20 public class genDbGraph {
21 /*------------------------------ constants ------------------------------*/
22
23 public static final String PDB_DB = "pdbase";
24 public static final String DB_HOST = "white";
25 public static final String DB_USER = getUserName();
26 public static final String DB_PWD = "nieve";
27 public static final String DSSP_EXE = "/project/StruPPi/bin/dssp";
28 public static final String DSSP_PARAMS = "--";
29 public static final String NACCESS_EXE = "/project/StruPPi/bin/naccess";
30 public static final String NACCESS_PARAMS = "";
31
32 //public static double cutoff = 4.2;
33 //public static String edgeType = "ALL";
34
35 /*---------------------------- private methods --------------------------*/
36 /**
37 * Get user name from operating system (for use as database username).
38 * */
39 private static String getUserName() {
40 String user = null;
41 user = System.getProperty("user.name");
42 if(user == null) {
43 System.err.println("Could not get user name from operating system.");
44 }
45 return user;
46 }
47
48 public static void main(String[] args) throws IOException {
49
50
51 String help = "Usage, 3 options:\n" +
52 "1) genDbGraph -i <listfile> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" +
53 "2) genDbGraph -p <pdb_code> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" +
54 "3) genDbGraph -f <pdbfile> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-m <mode>] \n" +
55 "\nA comma separated list of contact types and distance cutoffs can be given instead of just 1, e.g. -d 8.0,8.5 -t Ca,Cb will generate the graphs for Ca at 8.0 and for Cb at 8.5\n" +
56 "If only 1 contact type given and multiple cutoffs, graphs will be generated at all the cutoffs for the one contact type\n"+
57 "\nIn case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" +
58 "\nIf pdbase_db not specified, the default pdbase will be used\n" +
59 "\nSecondary structure will be taken from pdbase database. If reading from pdb file and the pdb file is missing the secondary structure, then it will be assigned using dssp\n";
60
61 String listfile = "";
62 String[] pdbCodes = null;
63 String[] pdbChainCodes = null;
64 String pdbfile = "";
65 String pdbaseDb = PDB_DB;
66 String[] edgeTypes = null;
67 double[] cutoffs = null;
68 int[] seqseps = null;
69 boolean[] directed = null;
70 String outputDb = "";
71 String mode = "GRAPH";
72
73 Getopt g = new Getopt("genDbGraph", args, "i:p:c:f:d:t:r:s:o:D:m:h?");
74 int c;
75 while ((c = g.getopt()) != -1) {
76 switch(c){
77 case 'i':
78 listfile = g.getOptarg();
79 break;
80 case 'p':
81 pdbCodes = g.getOptarg().split(",");
82 break;
83 case 'c':
84 pdbChainCodes = g.getOptarg().split(",");
85 break;
86 case 'f':
87 pdbfile = g.getOptarg();
88 break;
89 case 'd':
90 String[] cutoffsStr = g.getOptarg().split(",");
91 cutoffs = new double[cutoffsStr.length];
92 for (int i =0;i<cutoffs.length;i++) {
93 cutoffs[i] = Double.valueOf(cutoffsStr[i]);
94 }
95 break;
96 case 't':
97 edgeTypes = g.getOptarg().split(",");
98 break;
99 case 'r':
100 String[] directedStr = g.getOptarg().split(",");
101 directed = new boolean[directedStr.length];
102 for (int i =0;i<directed.length;i++) {
103 directed[i] = Boolean.valueOf(directedStr[i]);
104 }
105 break;
106 case 's':
107 String[] seqsepsStr = g.getOptarg().split(",");
108 seqseps = new int[seqsepsStr.length];
109 for (int i=0;i<seqseps.length;i++) {
110 seqseps[i] = Integer.valueOf(seqsepsStr[i]);
111 }
112 break;
113 case 'o':
114 outputDb = g.getOptarg();
115 break;
116 case 'D':
117 pdbaseDb = g.getOptarg();
118 break;
119 case 'm':
120 mode = g.getOptarg();
121 break;
122 case 'h':
123 case '?':
124 System.out.println(help);
125 System.exit(0);
126 break; // getopt() already printed an error
127 }
128 }
129
130 if (directed==null) {
131 // we set by default all directed to false
132 directed = new boolean[edgeTypes.length];
133 }
134
135 if (outputDb.equals("") || edgeTypes==null || cutoffs==null) {
136 System.err.println("Some missing option\n");
137 System.err.println(help);
138 System.exit(1);
139 }
140 if (edgeTypes.length!=cutoffs.length && edgeTypes.length!=1) {
141 System.err.println("Not same number of contact types as cutoffs given\n");
142 System.err.println(help);
143 System.exit(1);
144 }
145 if (seqseps != null && edgeTypes.length!=seqseps.length) {
146 System.err.println("Not same number of contact types as sequence separations given\n");
147 System.err.println(help);
148 System.exit(1);
149 }
150 if (directed != null && edgeTypes.length!=directed.length) {
151 System.err.println("Not same number of contact types as directionalities given\n");
152 System.err.println(help);
153 System.exit(1);
154 }
155 if (listfile.equals("") && pdbCodes==null && pdbfile.equals("")){
156 System.err.println("Either a listfile, some pdb codes/chain codes or a pdbfile must be given\n");
157 System.err.println(help);
158 System.exit(1);
159 }
160 if ((!listfile.equals("") && pdbCodes!=null) || (!listfile.equals("") && !pdbfile.equals("")) || (pdbCodes!=null && !pdbfile.equals(""))) {
161 System.err.println("Options -p/-c, -i and -f/-c are exclusive. Use only one of them\n");
162 System.err.println(help);
163 System.exit(1);
164 }
165 if (!(mode.equals("GRAPH") || mode.equals("PDB") || mode.equals("BOTH"))) {
166 System.err.println("Allowed values for mode:GRAPH,PDB,BOTH.");
167 System.err.println(help);
168 System.exit(1);
169 }
170
171 // setting edgeTypes in case only 1 was given with multiple cutoffs
172 if (edgeTypes.length==1 && cutoffs.length>1) {
173 String edgeType = edgeTypes[0];
174 edgeTypes = new String[cutoffs.length];
175 for (int i=0;i<cutoffs.length;i++){
176 edgeTypes[i] = edgeType;
177 }
178 }
179
180
181 MySQLConnection conn = null;
182
183 try{
184 conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD);
185 conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL");
186 } catch (Exception e) {
187 System.err.println("Error opening database connection. Exiting");
188 System.exit(1);
189 }
190
191
192 if (pdbfile.equals("")){
193
194 if (!listfile.equals("")) {
195 BufferedReader fpdb = new BufferedReader(new FileReader(listfile));
196 String line = "";
197 int numLines = 0;
198 fpdb.mark(100000);
199 while ((line = fpdb.readLine() ) != null ) {
200 if (line.length()>0) numLines++;
201 }
202 fpdb.reset();
203 pdbCodes = new String[numLines];
204 pdbChainCodes = new String[numLines];
205 numLines = 0;
206 while ((line = fpdb.readLine() ) != null ) {
207 pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase();
208 pdbChainCodes[numLines] = line.split("\\s+")[1];
209 numLines++;
210 }
211 fpdb.close();
212 }
213
214 int numPdbs = 0;
215
216 for (int i=0;i<pdbCodes.length;i++) {
217 String pdbCode = pdbCodes[i];
218 String pdbChainCode = pdbChainCodes[i];
219
220 boolean dssp = false, scop = false, naccess = false, consurf = false, ec = false, csa = false;
221 int numGraphs = 0;
222
223 try {
224
225 System.out.println("Getting pdb data for "+pdbCode+"_"+pdbChainCode);
226
227 Pdb pdb = new PdbasePdb(pdbCode, pdbaseDb, conn);
228 //Pdb pdb = new CiffilePdb(new File("/project/StruPPi/BiO/DBd/PDB-REMEDIATED/data/structures/unzipped/all/mmCIF/"+pdbCode+".cif"), pdbChainCode);
229 pdb.load(pdbChainCode);
230 try {
231 pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
232 //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
233 dssp = true;
234 } catch (Exception e) {
235 System.err.println(e.getMessage());
236 }
237 if (!mode.equals("GRAPH")) {
238 try {
239 pdb.checkScop("1.71", false);
240 scop = true;
241 } catch (Exception e) {
242 System.err.println(e.getMessage());
243 }
244 try {
245 int mistakes = pdb.checkConsurfHssp(false);
246 System.out.println("ConsurfHssp Mistakes:"+mistakes);
247 if (mistakes == 0) consurf = true;
248 } catch (Exception e) {
249 System.err.println(e.getMessage());
250 }
251 try {
252 pdb.checkEC(false);
253 ec = true;
254 } catch (Exception e) {
255 System.err.println(e.getMessage());
256 }
257 try {
258 int mistakes = pdb.checkCSA("2.2.5", false);
259 System.out.println("CSA Mistakes:"+mistakes);
260 if (mistakes == 0) csa = true;
261 } catch (Exception e) {
262 System.err.println(e.getMessage());
263 }/*
264 pdb.restrictToScopDomain("d1pjua2");
265 try {
266 pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
267 //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
268 dssp = true;
269 } catch (Exception e) {
270 System.err.println(e.getMessage());
271 }*/
272 try {
273 pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS);
274 naccess = true;
275 } catch (Exception e) {
276 System.err.println(e.getMessage());
277 }
278
279 //pdb.writeToDb(conn,outputDb);
280 pdb.writeToDbFast(conn, outputDb);
281 }
282 // get graphs
283 if (!mode.equals("PDB")) {
284 for (int j = 0; j<edgeTypes.length; j++) {
285 System.out.print("--> "+(directed[j]?"directed":"")+" graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]);
286
287 RIGraph graph = pdb.get_graph(edgeTypes[j], cutoffs[j], directed[j]);
288 //graph.restrictContactsBetweenSs();
289 if (seqseps != null) {
290 if (seqseps[j] > 1) {
291 System.out.print(" and sequence separation >= "+seqseps[j]);
292 graph.restrictContactsToMinRange(seqseps[j]);
293 }
294 }
295 //graph.writeToDb(conn,outputDb);
296 graph.write_graph_to_db_fast(conn,outputDb);
297
298 System.out.println();
299 numPdbs++;
300 numGraphs++;
301 }
302 }
303
304 } catch (PdbLoadError e) {
305 System.err.println("Error loading pdb data for " + pdbCode + pdbChainCode+", specific error: "+e.getMessage());
306 } catch (PdbCodeNotFoundError e) {
307 System.err.println("Couldn't find pdb code "+pdbCode);
308 } catch (SQLException e) {
309 System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage());
310 }
311 /* catch (CiffileFormatError e) {
312 System.err.println(e.getMessage());
313 }*/
314
315 System.out.println("SUMMARY:"+pdbCode+"_"+pdbChainCode+" dssp:"+dssp+" scop:"+scop+" naccess:"+naccess+" consurf:"+consurf+" ec:"+ec+" csa:"+csa+ " graphs:"+numGraphs);
316
317 }
318
319 // output results
320 System.out.println("Number of graphs loaded successfully: " + numPdbs);
321
322
323 } else {
324 String pdbChainCode = pdbChainCodes[0];
325 boolean dssp = false, scop = false, naccess = false, consurf = false, ec = false, csa = false;
326 int numGraphs = 0;
327
328 try {
329
330 System.out.println("Getting chain "+pdbChainCode+" from pdb file "+pdbfile);
331
332 Pdb pdb = new PdbfilePdb(pdbfile);
333 pdb.load(pdbChainCode);
334 if (!pdb.hasSecondaryStructure()) {
335 pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
336 //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
337 }
338 if (!mode.equals("GRAPH")) {
339 try {
340 pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
341 //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
342 dssp = true;
343 } catch (Exception e) {
344 System.err.println(e.getMessage());
345 }
346 try {
347 pdb.checkScop("1.71", false);
348 scop = true;
349 } catch (Exception e) {
350 System.err.println(e.getMessage());
351 }
352 try {
353 int mistakes = pdb.checkConsurfHssp(false);
354 System.out.println("ConsurfHssp Mistakes:"+mistakes);
355 if (mistakes == 0) consurf = true;
356 } catch (Exception e) {
357 System.err.println(e.getMessage());
358 }
359 try {
360 pdb.checkEC(false);
361 ec = true;
362 } catch (Exception e) {
363 System.err.println(e.getMessage());
364 }
365 try {
366 int mistakes = pdb.checkCSA("2.2.5", false);
367 System.out.println("CSA Mistakes:"+mistakes);
368 if (mistakes == 0) csa = true;
369 } catch (Exception e) {
370 System.err.println(e.getMessage());
371 }
372 /*pdb.restrictToScopDomain("d1eaka3");
373 try {
374 pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
375 //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
376 dssp = true;
377 } catch (Exception e) {
378 System.err.println(e.getMessage());
379 }*/
380 try {
381 pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS);
382 naccess = true;
383 } catch (Exception e) {
384 System.err.println(e.getMessage());
385 }
386
387 //pdb.writeToDb(conn,outputDb);
388 pdb.writeToDbFast(conn, outputDb);
389 }
390
391 // get graphs
392 if (!mode.equals("PDB")) {
393 for (int j = 0; j<edgeTypes.length; j++) {
394 System.out.print("--> "+(directed[j]?"directed":"")+" graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]);
395
396 RIGraph graph = pdb.get_graph(edgeTypes[j], cutoffs[j], directed[j]);
397 //graph.restrictContactsBetweenSs();
398 if (seqseps != null) {
399 if (seqseps[j] > 1) {
400 System.out.print(" and sequence separation >= "+seqseps[j]);
401 graph.restrictContactsToMinRange(seqseps[j]);
402 }
403 }
404 //graph.writeToDb(conn,outputDb);
405 graph.write_graph_to_db_fast(conn,outputDb);
406
407 System.out.println();
408 numGraphs++;
409 }
410 }
411
412 } catch (SQLException e) {
413 System.err.println("Couldn't write graph to db, error: "+e.getMessage());
414 } catch (PdbLoadError e) {
415 System.err.println("Error loading from pdb file "+pdbfile+", specific error: "+e.getMessage());
416 }
417
418 System.out.println("SUMMARY:"+pdbfile+"_"+pdbChainCode+" dssp:"+dssp+" scop:"+scop+" naccess:"+naccess+" consurf:"+consurf+" ec:"+ec+" csa:"+csa+ " graphs:"+numGraphs);
419
420 }
421
422 // closing db connection
423 conn.close();
424 }
425
426 }