1 |
/**************************************************************************** |
2 |
* Make neighbourhood table |
3 |
* Author: Henning Stehr |
4 |
* Date: 21/12/2005 |
5 |
* Usage: makenbh [start_index] [end_index] |
6 |
* Description: Simple command line program to generate the 'neighbourhoods' |
7 |
* table in the 'newmsdgraph' database. The program will |
8 |
* generate rows in the neighbourhood table for each node in |
9 |
* each graph with start_index <= graph_id <= end_index. |
10 |
* Contact: stehr@molgen.mpg.de |
11 |
*****************************************************************************/ |
12 |
|
13 |
import java.sql.*; |
14 |
|
15 |
public class makeNbhTbl { |
16 |
|
17 |
/* -------------------------------- constants ------------------------------------------*/ |
18 |
|
19 |
// constants for database connection |
20 |
static final String dbDriver = "org.gjt.mm.mysql.Driver"; |
21 |
static final String dbServer = "jdbc:mysql://fliederlila.molgen.mpg.de:3036/newmsdgraph"; |
22 |
static final String dbUserName = "henning"; |
23 |
static final String dbPassword = "das4ke"; |
24 |
|
25 |
// constants for graph database |
26 |
//static final String listTable = "henning.myfamily_list"; |
27 |
//static final String nodeTable = "henning.myfamily_nodes"; |
28 |
//static final String edgeTable = "henning.myfamily_edges"; |
29 |
static final String listTable = "list"; |
30 |
static final String nodeTable = "nodes"; |
31 |
static final String edgeTable = "edges"; |
32 |
|
33 |
|
34 |
// constants for neighbourhood table |
35 |
//static final String nbhTable = "henning.myfamily_nbhs"; |
36 |
static final String nbhTable = "neighbourhoods_sc_sc"; |
37 |
static final String edgeType = "SC_SC"; |
38 |
static final String tempNodes = "temp_nodes"; |
39 |
static final String tempEdges = "temp_edges"; |
40 |
static final String nbhTableDefinition = "(`graph_id` int(10) unsigned default NULL," + |
41 |
" `cid` varchar(6) default NULL," + |
42 |
" `num` int(5) unsigned default NULL," + |
43 |
" `res` char(3) default NULL," + |
44 |
" `entry_id` int(5) unsigned default NULL," + |
45 |
" `size` int(3) unsigned default NULL, " + |
46 |
" `hash` char(20) default NULL, " + |
47 |
" `ALA` int(3) unsigned default NULL," + |
48 |
" `ARG` int(3) unsigned default NULL," + |
49 |
" `ASN` int(3) unsigned default NULL," + |
50 |
" `ASP` int(3) unsigned default NULL," + |
51 |
" `CYS` int(3) unsigned default NULL," + |
52 |
" `GLN` int(3) unsigned default NULL," + |
53 |
" `GLU` int(3) unsigned default NULL," + |
54 |
" `GLY` int(3) unsigned default NULL," + |
55 |
" `HIS` int(3) unsigned default NULL," + |
56 |
" `ILE` int(3) unsigned default NULL," + |
57 |
" `LEU` int(3) unsigned default NULL," + |
58 |
" `LYS` int(3) unsigned default NULL," + |
59 |
" `MET` int(3) unsigned default NULL," + |
60 |
" `PHE` int(3) unsigned default NULL," + |
61 |
" `PRO` int(3) unsigned default NULL," + |
62 |
" `SER` int(3) unsigned default NULL," + |
63 |
" `THR` int(3) unsigned default NULL," + |
64 |
" `TRP` int(3) unsigned default NULL," + |
65 |
" `TYR` int(3) unsigned default NULL," + |
66 |
" `VAL` int(3) unsigned default NULL," + |
67 |
" CONSTRAINT PRIMARY KEY (graph_id, cid, num));"; |
68 |
|
69 |
// This array is used to convert an amino acid number to its three letter code. |
70 |
// The numbering is from 1 to 20 to ensure compatibility with function aa_three2num(). |
71 |
static final String[] aa_num2three = {"???", "ALA", "ARG", "ASN", "ASP", "CYS", |
72 |
"GLN", "GLU", "GLY", "HIS", "ILE", |
73 |
"LEU", "LYS", "MET", "PHE", "PRO", |
74 |
"SER", "THR", "TRP", "TYR", "VAL"}; |
75 |
|
76 |
/* ------------------------------ class variables -------------------------------------*/ |
77 |
|
78 |
static Connection conn; // global database connection |
79 |
|
80 |
/* -------------------------------- functions -----------------------------------------*/ |
81 |
|
82 |
// return the number of an amino acid by its three letter code |
83 |
public static int aa_three2num(String threeLetterCode) { |
84 |
if(threeLetterCode.equalsIgnoreCase("ALA")) return 1; |
85 |
if(threeLetterCode.equalsIgnoreCase("ARG")) return 2; |
86 |
if(threeLetterCode.equalsIgnoreCase("ASN")) return 3; |
87 |
if(threeLetterCode.equalsIgnoreCase("ASP")) return 4; |
88 |
if(threeLetterCode.equalsIgnoreCase("CYS")) return 5; |
89 |
if(threeLetterCode.equalsIgnoreCase("GLN")) return 6; |
90 |
if(threeLetterCode.equalsIgnoreCase("GLU")) return 7; |
91 |
if(threeLetterCode.equalsIgnoreCase("GLY")) return 8; |
92 |
if(threeLetterCode.equalsIgnoreCase("HIS")) return 9; |
93 |
if(threeLetterCode.equalsIgnoreCase("ILE")) return 10; |
94 |
if(threeLetterCode.equalsIgnoreCase("LEU")) return 11; |
95 |
if(threeLetterCode.equalsIgnoreCase("LYS")) return 12; |
96 |
if(threeLetterCode.equalsIgnoreCase("MET")) return 13; |
97 |
if(threeLetterCode.equalsIgnoreCase("PHE")) return 14; |
98 |
if(threeLetterCode.equalsIgnoreCase("PRO")) return 15; |
99 |
if(threeLetterCode.equalsIgnoreCase("SER")) return 16; |
100 |
if(threeLetterCode.equalsIgnoreCase("THR")) return 17; |
101 |
if(threeLetterCode.equalsIgnoreCase("TRP")) return 18; |
102 |
if(threeLetterCode.equalsIgnoreCase("TYR")) return 19; |
103 |
if(threeLetterCode.equalsIgnoreCase("VAL")) return 20; |
104 |
return -1; |
105 |
} |
106 |
|
107 |
public static String nbhGetHashValue(int[] nbh) { |
108 |
|
109 |
char currentChar; |
110 |
StringBuffer resultBuffer = new StringBuffer("????????????????????"); |
111 |
|
112 |
// nbh[i] is defined for i=1..20 |
113 |
for(int i = 1; i <= 20; i++) { |
114 |
currentChar = '?'; |
115 |
if(nbh[i] < 0) |
116 |
System.err.println("Error in function nbhGetHashValue: " + |
117 |
"array entry nbh[" + i + "] = " + nbh[i] +" is negative."); |
118 |
//else if(nbh[i] <= 9) currentChar = '0' + nbh[i]; |
119 |
//else if(nbh[i] <= 9 + 26) currentChar = 'A' + nbh[i]; |
120 |
else if(nbh[i] > 26 + 9) |
121 |
System.err.println("Error in function nbhGetHashValue: " + |
122 |
"array entry nbh[" + i + "] = " + nbh[i] + " is too big."); |
123 |
else currentChar = Character.forDigit(nbh[i], 26 + 10); |
124 |
resultBuffer.setCharAt(i-1, currentChar); |
125 |
} |
126 |
return resultBuffer.toString(); |
127 |
} |
128 |
|
129 |
|
130 |
// open connection to database |
131 |
public static void openDBConnection() { |
132 |
|
133 |
try { // try to load driver |
134 |
Class.forName(dbDriver).newInstance(); |
135 |
try { |
136 |
conn = DriverManager.getConnection(dbServer, dbUserName, dbPassword); |
137 |
} catch (SQLException E) { |
138 |
System.err.println("Error: Unable to open database connection."); |
139 |
System.err.println("SQLException: " + E.getMessage()); |
140 |
System.err.println("SQLState: " + E.getSQLState()); |
141 |
System.err.println("VendorError: " + E.getErrorCode()); |
142 |
} // end try/catch connection |
143 |
} // end try load Driver |
144 |
catch (Exception E) { |
145 |
System.err.println("Error: Unable to load database driver."); |
146 |
E.printStackTrace(); |
147 |
} // end catch |
148 |
|
149 |
return; |
150 |
} |
151 |
|
152 |
// close database connection |
153 |
public static void closeDBConnection() { |
154 |
|
155 |
try { |
156 |
conn.close(); |
157 |
} catch (SQLException E) { |
158 |
System.out.println("Error: Unable to close connection."); |
159 |
System.out.println("SQLException: " + E.getMessage()); |
160 |
System.out.println("SQLState: " + E.getSQLState()); |
161 |
System.out.println("VendorError: " + E.getErrorCode()); |
162 |
System.exit(1); |
163 |
} // end try/catch connection |
164 |
|
165 |
return; |
166 |
} |
167 |
|
168 |
// create new neighbourhood table if not exists |
169 |
public static void createNeighbourhoodTableIfNotExists() { |
170 |
Statement stmt; |
171 |
String query; |
172 |
try { |
173 |
query = "CREATE TABLE IF NOT EXISTS " + nbhTable + " " + nbhTableDefinition + ";"; |
174 |
stmt = conn.createStatement(); |
175 |
stmt.execute(query); |
176 |
} catch (SQLException E) { |
177 |
System.err.println("Error: Unable to create neighbourhood table."); |
178 |
System.err.println("SQLException: " + E.getMessage()); |
179 |
System.err.println("SQLState: " + E.getSQLState()); |
180 |
System.err.println("VendorError: " + E.getErrorCode()); |
181 |
} // end catch |
182 |
|
183 |
return; |
184 |
} |
185 |
|
186 |
// insert neighbourhood entries for each graph with from <= graph_id <= to |
187 |
public static void insertNeighbourhoods(int fromIdx, int toIdx) { |
188 |
Statement graphsStmt, |
189 |
nodesStmt, |
190 |
edgesStmt; |
191 |
ResultSet graphsRs, |
192 |
nodesRs, |
193 |
edgesRs; |
194 |
String query, |
195 |
chainId = "?", |
196 |
resType, |
197 |
nbhResType, |
198 |
hash; |
199 |
int graphId = 0, |
200 |
entryId, |
201 |
resNum = 0, |
202 |
numGraphs = 0, |
203 |
numNodes = 0, |
204 |
// existingNodes, |
205 |
resTypeNum, |
206 |
nbhResCount, |
207 |
nbhResTypeNum, |
208 |
nbhSize; |
209 |
int[] nbh; |
210 |
|
211 |
try { |
212 |
// get number of graphs |
213 |
graphsStmt = conn.createStatement(); |
214 |
query = "SELECT COUNT(*) FROM " + listTable + |
215 |
" WHERE graph_id >= " + fromIdx + " AND graph_id <= " + toIdx + ";"; |
216 |
graphsRs = graphsStmt.executeQuery(query); |
217 |
if(graphsRs.next()) { |
218 |
numGraphs = graphsRs.getInt(1); |
219 |
} |
220 |
graphsRs.close(); |
221 |
graphsStmt.close(); |
222 |
System.out.println("Processing " + numGraphs + (numGraphs==1?" graph...":" graphs...")); |
223 |
|
224 |
// iterate over all graphs |
225 |
graphsStmt = conn.createStatement(); |
226 |
query = "SELECT graph_id, entry_id FROM " + listTable + |
227 |
" WHERE graph_id >= " + fromIdx + " AND graph_id <= " + toIdx + ";"; |
228 |
graphsRs = graphsStmt.executeQuery(query); |
229 |
numGraphs = 0; |
230 |
while(graphsRs.next()) { |
231 |
graphId = graphsRs.getInt(1); |
232 |
entryId = graphsRs.getInt(2); |
233 |
|
234 |
// check whether neighbourhoods for this graph already exist |
235 |
// nodesStmt = conn.createStatement(); |
236 |
// query = "SELECT COUNT(*) FROM " + nbhTable + " WHERE graph_id = " + graphId + ";"; |
237 |
// nodesRs = nodesStmt.executeQuery(query); |
238 |
// if(nodesRs.next()) { |
239 |
// existingNodes = nodesRs.getInt(1); |
240 |
// if(existingNodes > 0) { |
241 |
// System.out.println("Warning: Neighbourhoods for the graph with graph_id = " |
242 |
// + graphId + " already exist."); |
243 |
// } |
244 |
// } |
245 |
// nodesRs.close(); |
246 |
// nodesStmt.close(); |
247 |
|
248 |
// extract all nodes and edges for this graph into temporary tables |
249 |
nodesStmt = conn.createStatement(); |
250 |
nodesStmt.addBatch("DROP TEMPORARY TABLE IF EXISTS " + tempNodes + ";"); |
251 |
nodesStmt.addBatch("DROP TEMPORARY TABLE IF EXISTS " + tempEdges + ";"); |
252 |
nodesStmt.addBatch("CREATE TEMPORARY TABLE " + tempNodes + |
253 |
" SELECT * FROM " + nodeTable + " WHERE graph_id = " + graphId + ";"); |
254 |
nodesStmt.addBatch("CREATE TEMPORARY TABLE " + tempEdges + |
255 |
" SELECT * FROM " + edgeTable + " WHERE graph_id = " + graphId + |
256 |
" AND " + edgeType + " > 0;"); // take only relevant edges |
257 |
// if necessary create indices to speed up queries |
258 |
nodesStmt.executeBatch(); |
259 |
nodesStmt.close(); |
260 |
|
261 |
// iterate over all nodes for this graph |
262 |
nodesStmt = conn.createStatement(); |
263 |
query = "SELECT cid, num, res FROM "+ tempNodes + ";"; // graph_id matches automatically |
264 |
nodesRs = nodesStmt.executeQuery(query); |
265 |
while(nodesRs.next()) { |
266 |
chainId = nodesRs.getString(1); |
267 |
resNum = nodesRs.getInt(2); |
268 |
resType = nodesRs.getString(3); |
269 |
resTypeNum = aa_three2num(resType); |
270 |
if(resTypeNum < 1 || resTypeNum > 20) { |
271 |
System.err.println("Warning: Non standard amino acid " + resType + |
272 |
" at position " + resNum + " in chain " + chainId + |
273 |
" of graph " + graphId + "."); |
274 |
} |
275 |
|
276 |
// get residue counts from edges table |
277 |
edgesStmt = conn.createStatement(); |
278 |
query = "SELECT j_res AS res, COUNT(*) AS count FROM "+ tempEdges + |
279 |
" WHERE i_cid = '" + chainId + "' AND i_num = " + resNum + |
280 |
" GROUP BY j_res;"; |
281 |
//System.out.println(query); |
282 |
edgesRs = edgesStmt.executeQuery(query); |
283 |
|
284 |
nbh = new int[21]; // set all residue counts to zero |
285 |
nbhSize = 0; |
286 |
while(edgesRs.next()) { |
287 |
nbhResType = edgesRs.getString(1); |
288 |
nbhResCount = edgesRs.getInt(2); |
289 |
nbhResTypeNum = aa_three2num(nbhResType); |
290 |
if(nbhResTypeNum >= 1 && nbhResTypeNum <= 20) { |
291 |
nbh[nbhResTypeNum] = nbhResCount; |
292 |
nbhSize += nbhResCount; |
293 |
} |
294 |
} |
295 |
edgesRs.close(); |
296 |
edgesStmt.close(); |
297 |
|
298 |
// update nbh table |
299 |
hash = nbhGetHashValue(nbh); |
300 |
query = "INSERT INTO " + nbhTable + " (graph_id, cid, num, res, entry_id, size, hash, " |
301 |
+ "ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, " |
302 |
+ "LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL) " + |
303 |
"VALUES (" + graphId + ",'" + chainId + "'," + resNum + ",'" |
304 |
+ resType + "'," + entryId + "," + nbhSize + ",'" + hash + "'," |
305 |
+ nbh[1] + "," + nbh[2] + "," + nbh[3] + "," + nbh[4] + "," + nbh[5] + "," |
306 |
+ nbh[6] + "," + nbh[7] + "," + nbh[8] + "," + nbh[9] + "," + nbh[10] + "," |
307 |
+ nbh[11] + "," + nbh[12] + "," + nbh[13] + "," + nbh[14] + "," + nbh[15] + "," |
308 |
+ nbh[16] + "," + nbh[17] + "," + nbh[18] + "," + nbh[19] + "," + nbh[20] + ");"; |
309 |
edgesStmt = conn.createStatement(); |
310 |
if(edgesStmt.executeUpdate(query) != 1) { |
311 |
System.err.println("Error: Insert into table " + nbhTable + " failed."); |
312 |
System.exit(1); |
313 |
} |
314 |
edgesStmt.close(); |
315 |
numNodes++; |
316 |
} // end of while over nodes |
317 |
nodesRs.close(); |
318 |
nodesStmt.close(); |
319 |
numGraphs++; |
320 |
if(numGraphs % 1000 == 0) { |
321 |
System.out.print(numGraphs + " "); |
322 |
System.out.flush(); |
323 |
} |
324 |
System.gc(); // clean up |
325 |
} // end while over all graphs |
326 |
graphsRs.close(); |
327 |
graphsStmt.close(); |
328 |
} catch (SQLException E) { |
329 |
System.err.println("Error in function insertNeighbourhoods:"); |
330 |
System.err.println("graphId = "+ graphId +", chainId = "+ chainId +", resNum = "+ resNum); |
331 |
System.err.println("SQLException: " + E.getMessage()); |
332 |
System.err.println("SQLState: " + E.getSQLState()); |
333 |
System.err.println("VendorError: " + E.getErrorCode()); |
334 |
} // end catch |
335 |
|
336 |
System.out.println(); |
337 |
System.out.println("Extracted "+ numNodes + " neighbourhood" + (numNodes==1?"":"s") |
338 |
+ " from " + numGraphs + " graph" + (numGraphs==1?"":"s") + "."); |
339 |
return; |
340 |
} |
341 |
|
342 |
public static void createIndicesOnNeighbourhoodTable() { |
343 |
// index on graph_id |
344 |
// index on graph_id, cid, num |
345 |
// index on res |
346 |
// index on size |
347 |
// index on hash |
348 |
} |
349 |
|
350 |
/* ----------------------------------- main -------------------------------------------*/ |
351 |
|
352 |
public static void main(String[] args) { |
353 |
|
354 |
int from = 0, |
355 |
to = 0; |
356 |
|
357 |
// read command line parameters |
358 |
if ( args.length != 2) { |
359 |
System.err.println("Usage: makeNbhTbl [start_index] [end_index]"); |
360 |
System.exit(1); |
361 |
} else { |
362 |
|
363 |
// read parameter 1 |
364 |
|
365 |
try { |
366 |
from = Integer.parseInt(args[0]); |
367 |
if(from <= 0) { |
368 |
System.err.println("Parameter 1 has to be positive"); |
369 |
System.exit(1); |
370 |
} |
371 |
} |
372 |
catch(NumberFormatException e) { |
373 |
System.err.println("Parameter 1 has to be an integer"); |
374 |
System.exit(1); |
375 |
} |
376 |
|
377 |
// read parameter 2 |
378 |
|
379 |
try { |
380 |
to = Integer.parseInt(args[1]); |
381 |
if(to <= 0) { |
382 |
System.err.println("Parameter 2 has to be positive"); |
383 |
System.exit(1); |
384 |
} else |
385 |
if(from > to) { |
386 |
System.err.println("Start index has to be smaller or equal than end index"); |
387 |
System.exit(1); |
388 |
} |
389 |
} |
390 |
catch(NumberFormatException e) { |
391 |
System.err.println("Parameter 2 has to be an integer"); |
392 |
System.exit(1); |
393 |
} |
394 |
|
395 |
} |
396 |
|
397 |
// execute |
398 |
openDBConnection(); |
399 |
createNeighbourhoodTableIfNotExists(); |
400 |
insertNeighbourhoods(from, to); |
401 |
createIndicesOnNeighbourhoodTable(); |
402 |
closeDBConnection(); |
403 |
|
404 |
} // end of main |
405 |
|
406 |
} // end of class makeNbhTbl |