ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/makeNbhTbl.java
Revision: 24
Committed: Thu Mar 9 10:08:44 2006 UTC (18 years, 6 months ago) by stehr
File size: 16108 byte(s)
Log Message:
moved executables (classes with main function) to root directory
Line File contents
1 /****************************************************************************
2 * Make neighbourhood table
3 * Author: Henning Stehr
4 * Date: 21/12/2005
5 * Usage: makenbh [start_index] [end_index]
6 * Description: Simple command line program to generate the 'neighbourhoods'
7 * table in the 'newmsdgraph' database. The program will
8 * generate rows in the neighbourhood table for each node in
9 * each graph with start_index <= graph_id <= end_index.
10 * Contact: stehr@molgen.mpg.de
11 *****************************************************************************/
12
13 import java.sql.*;
14
15 public class makeNbhTbl {
16
17 /* -------------------------------- constants ------------------------------------------*/
18
19 // constants for database connection
20 static final String dbDriver = "org.gjt.mm.mysql.Driver";
21 static final String dbServer = "jdbc:mysql://fliederlila.molgen.mpg.de:3036/newmsdgraph";
22 static final String dbUserName = "henning";
23 static final String dbPassword = "das4ke";
24
25 // constants for graph database
26 //static final String listTable = "henning.myfamily_list";
27 //static final String nodeTable = "henning.myfamily_nodes";
28 //static final String edgeTable = "henning.myfamily_edges";
29 static final String listTable = "list";
30 static final String nodeTable = "nodes";
31 static final String edgeTable = "edges";
32
33
34 // constants for neighbourhood table
35 //static final String nbhTable = "henning.myfamily_nbhs";
36 static final String nbhTable = "neighbourhoods_sc_sc";
37 static final String edgeType = "SC_SC";
38 static final String tempNodes = "temp_nodes";
39 static final String tempEdges = "temp_edges";
40 static final String nbhTableDefinition = "(`graph_id` int(10) unsigned default NULL," +
41 " `cid` varchar(6) default NULL," +
42 " `num` int(5) unsigned default NULL," +
43 " `res` char(3) default NULL," +
44 " `entry_id` int(5) unsigned default NULL," +
45 " `size` int(3) unsigned default NULL, " +
46 " `hash` char(20) default NULL, " +
47 " `ALA` int(3) unsigned default NULL," +
48 " `ARG` int(3) unsigned default NULL," +
49 " `ASN` int(3) unsigned default NULL," +
50 " `ASP` int(3) unsigned default NULL," +
51 " `CYS` int(3) unsigned default NULL," +
52 " `GLN` int(3) unsigned default NULL," +
53 " `GLU` int(3) unsigned default NULL," +
54 " `GLY` int(3) unsigned default NULL," +
55 " `HIS` int(3) unsigned default NULL," +
56 " `ILE` int(3) unsigned default NULL," +
57 " `LEU` int(3) unsigned default NULL," +
58 " `LYS` int(3) unsigned default NULL," +
59 " `MET` int(3) unsigned default NULL," +
60 " `PHE` int(3) unsigned default NULL," +
61 " `PRO` int(3) unsigned default NULL," +
62 " `SER` int(3) unsigned default NULL," +
63 " `THR` int(3) unsigned default NULL," +
64 " `TRP` int(3) unsigned default NULL," +
65 " `TYR` int(3) unsigned default NULL," +
66 " `VAL` int(3) unsigned default NULL," +
67 " CONSTRAINT PRIMARY KEY (graph_id, cid, num));";
68
69 // This array is used to convert an amino acid number to its three letter code.
70 // The numbering is from 1 to 20 to ensure compatibility with function aa_three2num().
71 static final String[] aa_num2three = {"???", "ALA", "ARG", "ASN", "ASP", "CYS",
72 "GLN", "GLU", "GLY", "HIS", "ILE",
73 "LEU", "LYS", "MET", "PHE", "PRO",
74 "SER", "THR", "TRP", "TYR", "VAL"};
75
76 /* ------------------------------ class variables -------------------------------------*/
77
78 static Connection conn; // global database connection
79
80 /* -------------------------------- functions -----------------------------------------*/
81
82 // return the number of an amino acid by its three letter code
83 public static int aa_three2num(String threeLetterCode) {
84 if(threeLetterCode.equalsIgnoreCase("ALA")) return 1;
85 if(threeLetterCode.equalsIgnoreCase("ARG")) return 2;
86 if(threeLetterCode.equalsIgnoreCase("ASN")) return 3;
87 if(threeLetterCode.equalsIgnoreCase("ASP")) return 4;
88 if(threeLetterCode.equalsIgnoreCase("CYS")) return 5;
89 if(threeLetterCode.equalsIgnoreCase("GLN")) return 6;
90 if(threeLetterCode.equalsIgnoreCase("GLU")) return 7;
91 if(threeLetterCode.equalsIgnoreCase("GLY")) return 8;
92 if(threeLetterCode.equalsIgnoreCase("HIS")) return 9;
93 if(threeLetterCode.equalsIgnoreCase("ILE")) return 10;
94 if(threeLetterCode.equalsIgnoreCase("LEU")) return 11;
95 if(threeLetterCode.equalsIgnoreCase("LYS")) return 12;
96 if(threeLetterCode.equalsIgnoreCase("MET")) return 13;
97 if(threeLetterCode.equalsIgnoreCase("PHE")) return 14;
98 if(threeLetterCode.equalsIgnoreCase("PRO")) return 15;
99 if(threeLetterCode.equalsIgnoreCase("SER")) return 16;
100 if(threeLetterCode.equalsIgnoreCase("THR")) return 17;
101 if(threeLetterCode.equalsIgnoreCase("TRP")) return 18;
102 if(threeLetterCode.equalsIgnoreCase("TYR")) return 19;
103 if(threeLetterCode.equalsIgnoreCase("VAL")) return 20;
104 return -1;
105 }
106
107 public static String nbhGetHashValue(int[] nbh) {
108
109 char currentChar;
110 StringBuffer resultBuffer = new StringBuffer("????????????????????");
111
112 // nbh[i] is defined for i=1..20
113 for(int i = 1; i <= 20; i++) {
114 currentChar = '?';
115 if(nbh[i] < 0)
116 System.err.println("Error in function nbhGetHashValue: " +
117 "array entry nbh[" + i + "] = " + nbh[i] +" is negative.");
118 //else if(nbh[i] <= 9) currentChar = '0' + nbh[i];
119 //else if(nbh[i] <= 9 + 26) currentChar = 'A' + nbh[i];
120 else if(nbh[i] > 26 + 9)
121 System.err.println("Error in function nbhGetHashValue: " +
122 "array entry nbh[" + i + "] = " + nbh[i] + " is too big.");
123 else currentChar = Character.forDigit(nbh[i], 26 + 10);
124 resultBuffer.setCharAt(i-1, currentChar);
125 }
126 return resultBuffer.toString();
127 }
128
129
130 // open connection to database
131 public static void openDBConnection() {
132
133 try { // try to load driver
134 Class.forName(dbDriver).newInstance();
135 try {
136 conn = DriverManager.getConnection(dbServer, dbUserName, dbPassword);
137 } catch (SQLException E) {
138 System.err.println("Error: Unable to open database connection.");
139 System.err.println("SQLException: " + E.getMessage());
140 System.err.println("SQLState: " + E.getSQLState());
141 System.err.println("VendorError: " + E.getErrorCode());
142 } // end try/catch connection
143 } // end try load Driver
144 catch (Exception E) {
145 System.err.println("Error: Unable to load database driver.");
146 E.printStackTrace();
147 } // end catch
148
149 return;
150 }
151
152 // close database connection
153 public static void closeDBConnection() {
154
155 try {
156 conn.close();
157 } catch (SQLException E) {
158 System.out.println("Error: Unable to close connection.");
159 System.out.println("SQLException: " + E.getMessage());
160 System.out.println("SQLState: " + E.getSQLState());
161 System.out.println("VendorError: " + E.getErrorCode());
162 System.exit(1);
163 } // end try/catch connection
164
165 return;
166 }
167
168 // create new neighbourhood table if not exists
169 public static void createNeighbourhoodTableIfNotExists() {
170 Statement stmt;
171 String query;
172 try {
173 query = "CREATE TABLE IF NOT EXISTS " + nbhTable + " " + nbhTableDefinition + ";";
174 stmt = conn.createStatement();
175 stmt.execute(query);
176 } catch (SQLException E) {
177 System.err.println("Error: Unable to create neighbourhood table.");
178 System.err.println("SQLException: " + E.getMessage());
179 System.err.println("SQLState: " + E.getSQLState());
180 System.err.println("VendorError: " + E.getErrorCode());
181 } // end catch
182
183 return;
184 }
185
186 // insert neighbourhood entries for each graph with from <= graph_id <= to
187 public static void insertNeighbourhoods(int fromIdx, int toIdx) {
188 Statement graphsStmt,
189 nodesStmt,
190 edgesStmt;
191 ResultSet graphsRs,
192 nodesRs,
193 edgesRs;
194 String query,
195 chainId = "?",
196 resType,
197 nbhResType,
198 hash;
199 int graphId = 0,
200 entryId,
201 resNum = 0,
202 numGraphs = 0,
203 numNodes = 0,
204 // existingNodes,
205 resTypeNum,
206 nbhResCount,
207 nbhResTypeNum,
208 nbhSize;
209 int[] nbh;
210
211 try {
212 // get number of graphs
213 graphsStmt = conn.createStatement();
214 query = "SELECT COUNT(*) FROM " + listTable +
215 " WHERE graph_id >= " + fromIdx + " AND graph_id <= " + toIdx + ";";
216 graphsRs = graphsStmt.executeQuery(query);
217 if(graphsRs.next()) {
218 numGraphs = graphsRs.getInt(1);
219 }
220 graphsRs.close();
221 graphsStmt.close();
222 System.out.println("Processing " + numGraphs + (numGraphs==1?" graph...":" graphs..."));
223
224 // iterate over all graphs
225 graphsStmt = conn.createStatement();
226 query = "SELECT graph_id, entry_id FROM " + listTable +
227 " WHERE graph_id >= " + fromIdx + " AND graph_id <= " + toIdx + ";";
228 graphsRs = graphsStmt.executeQuery(query);
229 numGraphs = 0;
230 while(graphsRs.next()) {
231 graphId = graphsRs.getInt(1);
232 entryId = graphsRs.getInt(2);
233
234 // check whether neighbourhoods for this graph already exist
235 // nodesStmt = conn.createStatement();
236 // query = "SELECT COUNT(*) FROM " + nbhTable + " WHERE graph_id = " + graphId + ";";
237 // nodesRs = nodesStmt.executeQuery(query);
238 // if(nodesRs.next()) {
239 // existingNodes = nodesRs.getInt(1);
240 // if(existingNodes > 0) {
241 // System.out.println("Warning: Neighbourhoods for the graph with graph_id = "
242 // + graphId + " already exist.");
243 // }
244 // }
245 // nodesRs.close();
246 // nodesStmt.close();
247
248 // extract all nodes and edges for this graph into temporary tables
249 nodesStmt = conn.createStatement();
250 nodesStmt.addBatch("DROP TEMPORARY TABLE IF EXISTS " + tempNodes + ";");
251 nodesStmt.addBatch("DROP TEMPORARY TABLE IF EXISTS " + tempEdges + ";");
252 nodesStmt.addBatch("CREATE TEMPORARY TABLE " + tempNodes +
253 " SELECT * FROM " + nodeTable + " WHERE graph_id = " + graphId + ";");
254 nodesStmt.addBatch("CREATE TEMPORARY TABLE " + tempEdges +
255 " SELECT * FROM " + edgeTable + " WHERE graph_id = " + graphId +
256 " AND " + edgeType + " > 0;"); // take only relevant edges
257 // if necessary create indices to speed up queries
258 nodesStmt.executeBatch();
259 nodesStmt.close();
260
261 // iterate over all nodes for this graph
262 nodesStmt = conn.createStatement();
263 query = "SELECT cid, num, res FROM "+ tempNodes + ";"; // graph_id matches automatically
264 nodesRs = nodesStmt.executeQuery(query);
265 while(nodesRs.next()) {
266 chainId = nodesRs.getString(1);
267 resNum = nodesRs.getInt(2);
268 resType = nodesRs.getString(3);
269 resTypeNum = aa_three2num(resType);
270 if(resTypeNum < 1 || resTypeNum > 20) {
271 System.err.println("Warning: Non standard amino acid " + resType +
272 " at position " + resNum + " in chain " + chainId +
273 " of graph " + graphId + ".");
274 }
275
276 // get residue counts from edges table
277 edgesStmt = conn.createStatement();
278 query = "SELECT j_res AS res, COUNT(*) AS count FROM "+ tempEdges +
279 " WHERE i_cid = '" + chainId + "' AND i_num = " + resNum +
280 " GROUP BY j_res;";
281 //System.out.println(query);
282 edgesRs = edgesStmt.executeQuery(query);
283
284 nbh = new int[21]; // set all residue counts to zero
285 nbhSize = 0;
286 while(edgesRs.next()) {
287 nbhResType = edgesRs.getString(1);
288 nbhResCount = edgesRs.getInt(2);
289 nbhResTypeNum = aa_three2num(nbhResType);
290 if(nbhResTypeNum >= 1 && nbhResTypeNum <= 20) {
291 nbh[nbhResTypeNum] = nbhResCount;
292 nbhSize += nbhResCount;
293 }
294 }
295 edgesRs.close();
296 edgesStmt.close();
297
298 // update nbh table
299 hash = nbhGetHashValue(nbh);
300 query = "INSERT INTO " + nbhTable + " (graph_id, cid, num, res, entry_id, size, hash, "
301 + "ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, "
302 + "LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL) " +
303 "VALUES (" + graphId + ",'" + chainId + "'," + resNum + ",'"
304 + resType + "'," + entryId + "," + nbhSize + ",'" + hash + "',"
305 + nbh[1] + "," + nbh[2] + "," + nbh[3] + "," + nbh[4] + "," + nbh[5] + ","
306 + nbh[6] + "," + nbh[7] + "," + nbh[8] + "," + nbh[9] + "," + nbh[10] + ","
307 + nbh[11] + "," + nbh[12] + "," + nbh[13] + "," + nbh[14] + "," + nbh[15] + ","
308 + nbh[16] + "," + nbh[17] + "," + nbh[18] + "," + nbh[19] + "," + nbh[20] + ");";
309 edgesStmt = conn.createStatement();
310 if(edgesStmt.executeUpdate(query) != 1) {
311 System.err.println("Error: Insert into table " + nbhTable + " failed.");
312 System.exit(1);
313 }
314 edgesStmt.close();
315 numNodes++;
316 } // end of while over nodes
317 nodesRs.close();
318 nodesStmt.close();
319 numGraphs++;
320 if(numGraphs % 1000 == 0) {
321 System.out.print(numGraphs + " ");
322 System.out.flush();
323 }
324 System.gc(); // clean up
325 } // end while over all graphs
326 graphsRs.close();
327 graphsStmt.close();
328 } catch (SQLException E) {
329 System.err.println("Error in function insertNeighbourhoods:");
330 System.err.println("graphId = "+ graphId +", chainId = "+ chainId +", resNum = "+ resNum);
331 System.err.println("SQLException: " + E.getMessage());
332 System.err.println("SQLState: " + E.getSQLState());
333 System.err.println("VendorError: " + E.getErrorCode());
334 } // end catch
335
336 System.out.println();
337 System.out.println("Extracted "+ numNodes + " neighbourhood" + (numNodes==1?"":"s")
338 + " from " + numGraphs + " graph" + (numGraphs==1?"":"s") + ".");
339 return;
340 }
341
342 public static void createIndicesOnNeighbourhoodTable() {
343 // index on graph_id
344 // index on graph_id, cid, num
345 // index on res
346 // index on size
347 // index on hash
348 }
349
350 /* ----------------------------------- main -------------------------------------------*/
351
352 public static void main(String[] args) {
353
354 int from = 0,
355 to = 0;
356
357 // read command line parameters
358 if ( args.length != 2) {
359 System.err.println("Usage: makeNbhTbl [start_index] [end_index]");
360 System.exit(1);
361 } else {
362
363 // read parameter 1
364
365 try {
366 from = Integer.parseInt(args[0]);
367 if(from <= 0) {
368 System.err.println("Parameter 1 has to be positive");
369 System.exit(1);
370 }
371 }
372 catch(NumberFormatException e) {
373 System.err.println("Parameter 1 has to be an integer");
374 System.exit(1);
375 }
376
377 // read parameter 2
378
379 try {
380 to = Integer.parseInt(args[1]);
381 if(to <= 0) {
382 System.err.println("Parameter 2 has to be positive");
383 System.exit(1);
384 } else
385 if(from > to) {
386 System.err.println("Start index has to be smaller or equal than end index");
387 System.exit(1);
388 }
389 }
390 catch(NumberFormatException e) {
391 System.err.println("Parameter 2 has to be an integer");
392 System.exit(1);
393 }
394
395 }
396
397 // execute
398 openDBConnection();
399 createNeighbourhoodTableIfNotExists();
400 insertNeighbourhoods(from, to);
401 createIndicesOnNeighbourhoodTable();
402 closeDBConnection();
403
404 } // end of main
405
406 } // end of class makeNbhTbl