1 |
stehr |
24 |
/**************************************************************************** |
2 |
|
|
* Make neighbourhood table |
3 |
|
|
* Author: Henning Stehr |
4 |
|
|
* Date: 21/12/2005 |
5 |
|
|
* Usage: makenbh [start_index] [end_index] |
6 |
|
|
* Description: Simple command line program to generate the 'neighbourhoods' |
7 |
|
|
* table in the 'newmsdgraph' database. The program will |
8 |
|
|
* generate rows in the neighbourhood table for each node in |
9 |
|
|
* each graph with start_index <= graph_id <= end_index. |
10 |
|
|
* Contact: stehr@molgen.mpg.de |
11 |
|
|
*****************************************************************************/ |
12 |
|
|
|
13 |
|
|
import java.sql.*; |
14 |
|
|
|
15 |
|
|
public class makeNbhTbl { |
16 |
|
|
|
17 |
|
|
/* -------------------------------- constants ------------------------------------------*/ |
18 |
|
|
|
19 |
|
|
// constants for database connection |
20 |
|
|
static final String dbDriver = "org.gjt.mm.mysql.Driver"; |
21 |
|
|
static final String dbServer = "jdbc:mysql://fliederlila.molgen.mpg.de:3036/newmsdgraph"; |
22 |
|
|
static final String dbUserName = "henning"; |
23 |
|
|
static final String dbPassword = "das4ke"; |
24 |
|
|
|
25 |
|
|
// constants for graph database |
26 |
|
|
//static final String listTable = "henning.myfamily_list"; |
27 |
|
|
//static final String nodeTable = "henning.myfamily_nodes"; |
28 |
|
|
//static final String edgeTable = "henning.myfamily_edges"; |
29 |
|
|
static final String listTable = "list"; |
30 |
|
|
static final String nodeTable = "nodes"; |
31 |
|
|
static final String edgeTable = "edges"; |
32 |
|
|
|
33 |
|
|
|
34 |
|
|
// constants for neighbourhood table |
35 |
|
|
//static final String nbhTable = "henning.myfamily_nbhs"; |
36 |
|
|
static final String nbhTable = "neighbourhoods_sc_sc"; |
37 |
|
|
static final String edgeType = "SC_SC"; |
38 |
|
|
static final String tempNodes = "temp_nodes"; |
39 |
|
|
static final String tempEdges = "temp_edges"; |
40 |
|
|
static final String nbhTableDefinition = "(`graph_id` int(10) unsigned default NULL," + |
41 |
|
|
" `cid` varchar(6) default NULL," + |
42 |
|
|
" `num` int(5) unsigned default NULL," + |
43 |
|
|
" `res` char(3) default NULL," + |
44 |
|
|
" `entry_id` int(5) unsigned default NULL," + |
45 |
|
|
" `size` int(3) unsigned default NULL, " + |
46 |
|
|
" `hash` char(20) default NULL, " + |
47 |
|
|
" `ALA` int(3) unsigned default NULL," + |
48 |
|
|
" `ARG` int(3) unsigned default NULL," + |
49 |
|
|
" `ASN` int(3) unsigned default NULL," + |
50 |
|
|
" `ASP` int(3) unsigned default NULL," + |
51 |
|
|
" `CYS` int(3) unsigned default NULL," + |
52 |
|
|
" `GLN` int(3) unsigned default NULL," + |
53 |
|
|
" `GLU` int(3) unsigned default NULL," + |
54 |
|
|
" `GLY` int(3) unsigned default NULL," + |
55 |
|
|
" `HIS` int(3) unsigned default NULL," + |
56 |
|
|
" `ILE` int(3) unsigned default NULL," + |
57 |
|
|
" `LEU` int(3) unsigned default NULL," + |
58 |
|
|
" `LYS` int(3) unsigned default NULL," + |
59 |
|
|
" `MET` int(3) unsigned default NULL," + |
60 |
|
|
" `PHE` int(3) unsigned default NULL," + |
61 |
|
|
" `PRO` int(3) unsigned default NULL," + |
62 |
|
|
" `SER` int(3) unsigned default NULL," + |
63 |
|
|
" `THR` int(3) unsigned default NULL," + |
64 |
|
|
" `TRP` int(3) unsigned default NULL," + |
65 |
|
|
" `TYR` int(3) unsigned default NULL," + |
66 |
|
|
" `VAL` int(3) unsigned default NULL," + |
67 |
|
|
" CONSTRAINT PRIMARY KEY (graph_id, cid, num));"; |
68 |
|
|
|
69 |
|
|
// This array is used to convert an amino acid number to its three letter code. |
70 |
|
|
// The numbering is from 1 to 20 to ensure compatibility with function aa_three2num(). |
71 |
|
|
static final String[] aa_num2three = {"???", "ALA", "ARG", "ASN", "ASP", "CYS", |
72 |
|
|
"GLN", "GLU", "GLY", "HIS", "ILE", |
73 |
|
|
"LEU", "LYS", "MET", "PHE", "PRO", |
74 |
|
|
"SER", "THR", "TRP", "TYR", "VAL"}; |
75 |
|
|
|
76 |
|
|
/* ------------------------------ class variables -------------------------------------*/ |
77 |
|
|
|
78 |
|
|
static Connection conn; // global database connection |
79 |
|
|
|
80 |
|
|
/* -------------------------------- functions -----------------------------------------*/ |
81 |
|
|
|
82 |
|
|
// return the number of an amino acid by its three letter code |
83 |
|
|
public static int aa_three2num(String threeLetterCode) { |
84 |
|
|
if(threeLetterCode.equalsIgnoreCase("ALA")) return 1; |
85 |
|
|
if(threeLetterCode.equalsIgnoreCase("ARG")) return 2; |
86 |
|
|
if(threeLetterCode.equalsIgnoreCase("ASN")) return 3; |
87 |
|
|
if(threeLetterCode.equalsIgnoreCase("ASP")) return 4; |
88 |
|
|
if(threeLetterCode.equalsIgnoreCase("CYS")) return 5; |
89 |
|
|
if(threeLetterCode.equalsIgnoreCase("GLN")) return 6; |
90 |
|
|
if(threeLetterCode.equalsIgnoreCase("GLU")) return 7; |
91 |
|
|
if(threeLetterCode.equalsIgnoreCase("GLY")) return 8; |
92 |
|
|
if(threeLetterCode.equalsIgnoreCase("HIS")) return 9; |
93 |
|
|
if(threeLetterCode.equalsIgnoreCase("ILE")) return 10; |
94 |
|
|
if(threeLetterCode.equalsIgnoreCase("LEU")) return 11; |
95 |
|
|
if(threeLetterCode.equalsIgnoreCase("LYS")) return 12; |
96 |
|
|
if(threeLetterCode.equalsIgnoreCase("MET")) return 13; |
97 |
|
|
if(threeLetterCode.equalsIgnoreCase("PHE")) return 14; |
98 |
|
|
if(threeLetterCode.equalsIgnoreCase("PRO")) return 15; |
99 |
|
|
if(threeLetterCode.equalsIgnoreCase("SER")) return 16; |
100 |
|
|
if(threeLetterCode.equalsIgnoreCase("THR")) return 17; |
101 |
|
|
if(threeLetterCode.equalsIgnoreCase("TRP")) return 18; |
102 |
|
|
if(threeLetterCode.equalsIgnoreCase("TYR")) return 19; |
103 |
|
|
if(threeLetterCode.equalsIgnoreCase("VAL")) return 20; |
104 |
|
|
return -1; |
105 |
|
|
} |
106 |
|
|
|
107 |
|
|
public static String nbhGetHashValue(int[] nbh) { |
108 |
|
|
|
109 |
|
|
char currentChar; |
110 |
|
|
StringBuffer resultBuffer = new StringBuffer("????????????????????"); |
111 |
|
|
|
112 |
|
|
// nbh[i] is defined for i=1..20 |
113 |
|
|
for(int i = 1; i <= 20; i++) { |
114 |
|
|
currentChar = '?'; |
115 |
|
|
if(nbh[i] < 0) |
116 |
|
|
System.err.println("Error in function nbhGetHashValue: " + |
117 |
|
|
"array entry nbh[" + i + "] = " + nbh[i] +" is negative."); |
118 |
|
|
//else if(nbh[i] <= 9) currentChar = '0' + nbh[i]; |
119 |
|
|
//else if(nbh[i] <= 9 + 26) currentChar = 'A' + nbh[i]; |
120 |
|
|
else if(nbh[i] > 26 + 9) |
121 |
|
|
System.err.println("Error in function nbhGetHashValue: " + |
122 |
|
|
"array entry nbh[" + i + "] = " + nbh[i] + " is too big."); |
123 |
|
|
else currentChar = Character.forDigit(nbh[i], 26 + 10); |
124 |
|
|
resultBuffer.setCharAt(i-1, currentChar); |
125 |
|
|
} |
126 |
|
|
return resultBuffer.toString(); |
127 |
|
|
} |
128 |
|
|
|
129 |
|
|
|
130 |
|
|
// open connection to database |
131 |
|
|
public static void openDBConnection() { |
132 |
|
|
|
133 |
|
|
try { // try to load driver |
134 |
|
|
Class.forName(dbDriver).newInstance(); |
135 |
|
|
try { |
136 |
|
|
conn = DriverManager.getConnection(dbServer, dbUserName, dbPassword); |
137 |
|
|
} catch (SQLException E) { |
138 |
|
|
System.err.println("Error: Unable to open database connection."); |
139 |
|
|
System.err.println("SQLException: " + E.getMessage()); |
140 |
|
|
System.err.println("SQLState: " + E.getSQLState()); |
141 |
|
|
System.err.println("VendorError: " + E.getErrorCode()); |
142 |
|
|
} // end try/catch connection |
143 |
|
|
} // end try load Driver |
144 |
|
|
catch (Exception E) { |
145 |
|
|
System.err.println("Error: Unable to load database driver."); |
146 |
|
|
E.printStackTrace(); |
147 |
|
|
} // end catch |
148 |
|
|
|
149 |
|
|
return; |
150 |
|
|
} |
151 |
|
|
|
152 |
|
|
// close database connection |
153 |
|
|
public static void closeDBConnection() { |
154 |
|
|
|
155 |
|
|
try { |
156 |
|
|
conn.close(); |
157 |
|
|
} catch (SQLException E) { |
158 |
|
|
System.out.println("Error: Unable to close connection."); |
159 |
|
|
System.out.println("SQLException: " + E.getMessage()); |
160 |
|
|
System.out.println("SQLState: " + E.getSQLState()); |
161 |
|
|
System.out.println("VendorError: " + E.getErrorCode()); |
162 |
|
|
System.exit(1); |
163 |
|
|
} // end try/catch connection |
164 |
|
|
|
165 |
|
|
return; |
166 |
|
|
} |
167 |
|
|
|
168 |
|
|
// create new neighbourhood table if not exists |
169 |
|
|
public static void createNeighbourhoodTableIfNotExists() { |
170 |
|
|
Statement stmt; |
171 |
|
|
String query; |
172 |
|
|
try { |
173 |
|
|
query = "CREATE TABLE IF NOT EXISTS " + nbhTable + " " + nbhTableDefinition + ";"; |
174 |
|
|
stmt = conn.createStatement(); |
175 |
|
|
stmt.execute(query); |
176 |
|
|
} catch (SQLException E) { |
177 |
|
|
System.err.println("Error: Unable to create neighbourhood table."); |
178 |
|
|
System.err.println("SQLException: " + E.getMessage()); |
179 |
|
|
System.err.println("SQLState: " + E.getSQLState()); |
180 |
|
|
System.err.println("VendorError: " + E.getErrorCode()); |
181 |
|
|
} // end catch |
182 |
|
|
|
183 |
|
|
return; |
184 |
|
|
} |
185 |
|
|
|
186 |
|
|
// insert neighbourhood entries for each graph with from <= graph_id <= to |
187 |
|
|
public static void insertNeighbourhoods(int fromIdx, int toIdx) { |
188 |
|
|
Statement graphsStmt, |
189 |
|
|
nodesStmt, |
190 |
|
|
edgesStmt; |
191 |
|
|
ResultSet graphsRs, |
192 |
|
|
nodesRs, |
193 |
|
|
edgesRs; |
194 |
|
|
String query, |
195 |
|
|
chainId = "?", |
196 |
|
|
resType, |
197 |
|
|
nbhResType, |
198 |
|
|
hash; |
199 |
|
|
int graphId = 0, |
200 |
|
|
entryId, |
201 |
|
|
resNum = 0, |
202 |
|
|
numGraphs = 0, |
203 |
|
|
numNodes = 0, |
204 |
|
|
// existingNodes, |
205 |
|
|
resTypeNum, |
206 |
|
|
nbhResCount, |
207 |
|
|
nbhResTypeNum, |
208 |
|
|
nbhSize; |
209 |
|
|
int[] nbh; |
210 |
|
|
|
211 |
|
|
try { |
212 |
|
|
// get number of graphs |
213 |
|
|
graphsStmt = conn.createStatement(); |
214 |
|
|
query = "SELECT COUNT(*) FROM " + listTable + |
215 |
|
|
" WHERE graph_id >= " + fromIdx + " AND graph_id <= " + toIdx + ";"; |
216 |
|
|
graphsRs = graphsStmt.executeQuery(query); |
217 |
|
|
if(graphsRs.next()) { |
218 |
|
|
numGraphs = graphsRs.getInt(1); |
219 |
|
|
} |
220 |
|
|
graphsRs.close(); |
221 |
|
|
graphsStmt.close(); |
222 |
|
|
System.out.println("Processing " + numGraphs + (numGraphs==1?" graph...":" graphs...")); |
223 |
|
|
|
224 |
|
|
// iterate over all graphs |
225 |
|
|
graphsStmt = conn.createStatement(); |
226 |
|
|
query = "SELECT graph_id, entry_id FROM " + listTable + |
227 |
|
|
" WHERE graph_id >= " + fromIdx + " AND graph_id <= " + toIdx + ";"; |
228 |
|
|
graphsRs = graphsStmt.executeQuery(query); |
229 |
|
|
numGraphs = 0; |
230 |
|
|
while(graphsRs.next()) { |
231 |
|
|
graphId = graphsRs.getInt(1); |
232 |
|
|
entryId = graphsRs.getInt(2); |
233 |
|
|
|
234 |
|
|
// check whether neighbourhoods for this graph already exist |
235 |
|
|
// nodesStmt = conn.createStatement(); |
236 |
|
|
// query = "SELECT COUNT(*) FROM " + nbhTable + " WHERE graph_id = " + graphId + ";"; |
237 |
|
|
// nodesRs = nodesStmt.executeQuery(query); |
238 |
|
|
// if(nodesRs.next()) { |
239 |
|
|
// existingNodes = nodesRs.getInt(1); |
240 |
|
|
// if(existingNodes > 0) { |
241 |
|
|
// System.out.println("Warning: Neighbourhoods for the graph with graph_id = " |
242 |
|
|
// + graphId + " already exist."); |
243 |
|
|
// } |
244 |
|
|
// } |
245 |
|
|
// nodesRs.close(); |
246 |
|
|
// nodesStmt.close(); |
247 |
|
|
|
248 |
|
|
// extract all nodes and edges for this graph into temporary tables |
249 |
|
|
nodesStmt = conn.createStatement(); |
250 |
|
|
nodesStmt.addBatch("DROP TEMPORARY TABLE IF EXISTS " + tempNodes + ";"); |
251 |
|
|
nodesStmt.addBatch("DROP TEMPORARY TABLE IF EXISTS " + tempEdges + ";"); |
252 |
|
|
nodesStmt.addBatch("CREATE TEMPORARY TABLE " + tempNodes + |
253 |
|
|
" SELECT * FROM " + nodeTable + " WHERE graph_id = " + graphId + ";"); |
254 |
|
|
nodesStmt.addBatch("CREATE TEMPORARY TABLE " + tempEdges + |
255 |
|
|
" SELECT * FROM " + edgeTable + " WHERE graph_id = " + graphId + |
256 |
|
|
" AND " + edgeType + " > 0;"); // take only relevant edges |
257 |
|
|
// if necessary create indices to speed up queries |
258 |
|
|
nodesStmt.executeBatch(); |
259 |
|
|
nodesStmt.close(); |
260 |
|
|
|
261 |
|
|
// iterate over all nodes for this graph |
262 |
|
|
nodesStmt = conn.createStatement(); |
263 |
|
|
query = "SELECT cid, num, res FROM "+ tempNodes + ";"; // graph_id matches automatically |
264 |
|
|
nodesRs = nodesStmt.executeQuery(query); |
265 |
|
|
while(nodesRs.next()) { |
266 |
|
|
chainId = nodesRs.getString(1); |
267 |
|
|
resNum = nodesRs.getInt(2); |
268 |
|
|
resType = nodesRs.getString(3); |
269 |
|
|
resTypeNum = aa_three2num(resType); |
270 |
|
|
if(resTypeNum < 1 || resTypeNum > 20) { |
271 |
|
|
System.err.println("Warning: Non standard amino acid " + resType + |
272 |
|
|
" at position " + resNum + " in chain " + chainId + |
273 |
|
|
" of graph " + graphId + "."); |
274 |
|
|
} |
275 |
|
|
|
276 |
|
|
// get residue counts from edges table |
277 |
|
|
edgesStmt = conn.createStatement(); |
278 |
|
|
query = "SELECT j_res AS res, COUNT(*) AS count FROM "+ tempEdges + |
279 |
|
|
" WHERE i_cid = '" + chainId + "' AND i_num = " + resNum + |
280 |
|
|
" GROUP BY j_res;"; |
281 |
|
|
//System.out.println(query); |
282 |
|
|
edgesRs = edgesStmt.executeQuery(query); |
283 |
|
|
|
284 |
|
|
nbh = new int[21]; // set all residue counts to zero |
285 |
|
|
nbhSize = 0; |
286 |
|
|
while(edgesRs.next()) { |
287 |
|
|
nbhResType = edgesRs.getString(1); |
288 |
|
|
nbhResCount = edgesRs.getInt(2); |
289 |
|
|
nbhResTypeNum = aa_three2num(nbhResType); |
290 |
|
|
if(nbhResTypeNum >= 1 && nbhResTypeNum <= 20) { |
291 |
|
|
nbh[nbhResTypeNum] = nbhResCount; |
292 |
|
|
nbhSize += nbhResCount; |
293 |
|
|
} |
294 |
|
|
} |
295 |
|
|
edgesRs.close(); |
296 |
|
|
edgesStmt.close(); |
297 |
|
|
|
298 |
|
|
// update nbh table |
299 |
|
|
hash = nbhGetHashValue(nbh); |
300 |
|
|
query = "INSERT INTO " + nbhTable + " (graph_id, cid, num, res, entry_id, size, hash, " |
301 |
|
|
+ "ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, " |
302 |
|
|
+ "LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL) " + |
303 |
|
|
"VALUES (" + graphId + ",'" + chainId + "'," + resNum + ",'" |
304 |
|
|
+ resType + "'," + entryId + "," + nbhSize + ",'" + hash + "'," |
305 |
|
|
+ nbh[1] + "," + nbh[2] + "," + nbh[3] + "," + nbh[4] + "," + nbh[5] + "," |
306 |
|
|
+ nbh[6] + "," + nbh[7] + "," + nbh[8] + "," + nbh[9] + "," + nbh[10] + "," |
307 |
|
|
+ nbh[11] + "," + nbh[12] + "," + nbh[13] + "," + nbh[14] + "," + nbh[15] + "," |
308 |
|
|
+ nbh[16] + "," + nbh[17] + "," + nbh[18] + "," + nbh[19] + "," + nbh[20] + ");"; |
309 |
|
|
edgesStmt = conn.createStatement(); |
310 |
|
|
if(edgesStmt.executeUpdate(query) != 1) { |
311 |
|
|
System.err.println("Error: Insert into table " + nbhTable + " failed."); |
312 |
|
|
System.exit(1); |
313 |
|
|
} |
314 |
|
|
edgesStmt.close(); |
315 |
|
|
numNodes++; |
316 |
|
|
} // end of while over nodes |
317 |
|
|
nodesRs.close(); |
318 |
|
|
nodesStmt.close(); |
319 |
|
|
numGraphs++; |
320 |
|
|
if(numGraphs % 1000 == 0) { |
321 |
|
|
System.out.print(numGraphs + " "); |
322 |
|
|
System.out.flush(); |
323 |
|
|
} |
324 |
|
|
System.gc(); // clean up |
325 |
|
|
} // end while over all graphs |
326 |
|
|
graphsRs.close(); |
327 |
|
|
graphsStmt.close(); |
328 |
|
|
} catch (SQLException E) { |
329 |
|
|
System.err.println("Error in function insertNeighbourhoods:"); |
330 |
|
|
System.err.println("graphId = "+ graphId +", chainId = "+ chainId +", resNum = "+ resNum); |
331 |
|
|
System.err.println("SQLException: " + E.getMessage()); |
332 |
|
|
System.err.println("SQLState: " + E.getSQLState()); |
333 |
|
|
System.err.println("VendorError: " + E.getErrorCode()); |
334 |
|
|
} // end catch |
335 |
|
|
|
336 |
|
|
System.out.println(); |
337 |
|
|
System.out.println("Extracted "+ numNodes + " neighbourhood" + (numNodes==1?"":"s") |
338 |
|
|
+ " from " + numGraphs + " graph" + (numGraphs==1?"":"s") + "."); |
339 |
|
|
return; |
340 |
|
|
} |
341 |
|
|
|
342 |
|
|
public static void createIndicesOnNeighbourhoodTable() { |
343 |
|
|
// index on graph_id |
344 |
|
|
// index on graph_id, cid, num |
345 |
|
|
// index on res |
346 |
|
|
// index on size |
347 |
|
|
// index on hash |
348 |
|
|
} |
349 |
|
|
|
350 |
|
|
/* ----------------------------------- main -------------------------------------------*/ |
351 |
|
|
|
352 |
|
|
public static void main(String[] args) { |
353 |
|
|
|
354 |
|
|
int from = 0, |
355 |
|
|
to = 0; |
356 |
|
|
|
357 |
|
|
// read command line parameters |
358 |
|
|
if ( args.length != 2) { |
359 |
|
|
System.err.println("Usage: makeNbhTbl [start_index] [end_index]"); |
360 |
|
|
System.exit(1); |
361 |
|
|
} else { |
362 |
|
|
|
363 |
|
|
// read parameter 1 |
364 |
|
|
|
365 |
|
|
try { |
366 |
|
|
from = Integer.parseInt(args[0]); |
367 |
|
|
if(from <= 0) { |
368 |
|
|
System.err.println("Parameter 1 has to be positive"); |
369 |
|
|
System.exit(1); |
370 |
|
|
} |
371 |
|
|
} |
372 |
|
|
catch(NumberFormatException e) { |
373 |
|
|
System.err.println("Parameter 1 has to be an integer"); |
374 |
|
|
System.exit(1); |
375 |
|
|
} |
376 |
|
|
|
377 |
|
|
// read parameter 2 |
378 |
|
|
|
379 |
|
|
try { |
380 |
|
|
to = Integer.parseInt(args[1]); |
381 |
|
|
if(to <= 0) { |
382 |
|
|
System.err.println("Parameter 2 has to be positive"); |
383 |
|
|
System.exit(1); |
384 |
|
|
} else |
385 |
|
|
if(from > to) { |
386 |
|
|
System.err.println("Start index has to be smaller or equal than end index"); |
387 |
|
|
System.exit(1); |
388 |
|
|
} |
389 |
|
|
} |
390 |
|
|
catch(NumberFormatException e) { |
391 |
|
|
System.err.println("Parameter 2 has to be an integer"); |
392 |
|
|
System.exit(1); |
393 |
|
|
} |
394 |
|
|
|
395 |
|
|
} |
396 |
|
|
|
397 |
|
|
// execute |
398 |
|
|
openDBConnection(); |
399 |
|
|
createNeighbourhoodTableIfNotExists(); |
400 |
|
|
insertNeighbourhoods(from, to); |
401 |
|
|
createIndicesOnNeighbourhoodTable(); |
402 |
|
|
closeDBConnection(); |
403 |
|
|
|
404 |
|
|
} // end of main |
405 |
|
|
|
406 |
|
|
} // end of class makeNbhTbl |