1 |
package proteinstructure; |
2 |
|
3 |
import java.io.File; |
4 |
import java.io.FileOutputStream; |
5 |
import java.io.IOException; |
6 |
import java.io.PrintStream; |
7 |
import java.sql.ResultSet; |
8 |
import java.sql.SQLException; |
9 |
import java.sql.Statement; |
10 |
import java.util.Collection; |
11 |
import java.util.HashMap; |
12 |
import java.util.Locale; |
13 |
import java.util.TreeMap; |
14 |
|
15 |
import tools.MySQLConnection; |
16 |
|
17 |
import edu.uci.ics.jung.graph.util.Pair; |
18 |
|
19 |
/** |
20 |
* A Residue Interaction Graph |
21 |
* |
22 |
*/ |
23 |
public class RIGraph extends ProtStructGraph<RIGNode,RIGEdge> { |
24 |
|
25 |
private static final long serialVersionUID = 1L; |
26 |
|
27 |
private static final String SINGLEMODELS_DB = "ioannis"; |
28 |
|
29 |
// fields |
30 |
protected double distCutoff; |
31 |
protected String contactType; // use AAinfo.isValidContactType() to test for validity |
32 |
|
33 |
public RIGraph() { |
34 |
super(); |
35 |
this.distCutoff=0; |
36 |
this.contactType=null; |
37 |
} |
38 |
|
39 |
/** |
40 |
* Constructs a RIGraph with a sequence but no edges |
41 |
* @param sequence |
42 |
*/ |
43 |
public RIGraph(String sequence) { |
44 |
super(); |
45 |
this.sequence = sequence; |
46 |
this.fullLength = sequence.length(); |
47 |
this.distCutoff=0; |
48 |
this.contactType=null; |
49 |
serials2nodes = new TreeMap<Integer,RIGNode>(); |
50 |
for(int i=0; i < sequence.length(); i++) { |
51 |
RIGNode node = new RIGNode(i+1,AAinfo.oneletter2threeletter(Character.toString(sequence.charAt(i)))); |
52 |
this.addVertex(node); |
53 |
serials2nodes.put(i+1, node); |
54 |
} |
55 |
} |
56 |
|
57 |
/** |
58 |
* Returns the contact type of this RIGraph |
59 |
* @return |
60 |
*/ |
61 |
public String getContactType() { |
62 |
return contactType; |
63 |
} |
64 |
|
65 |
/** |
66 |
* Sets the contact type of this RIGraph |
67 |
* @param ct the contact type |
68 |
*/ |
69 |
public void setContactType(String contactType) { |
70 |
this.contactType=contactType; |
71 |
} |
72 |
|
73 |
/** |
74 |
* Returns the distance cutoff for this RIGraph. |
75 |
* @return the distance cutoff |
76 |
*/ |
77 |
public double getCutoff(){ |
78 |
return distCutoff; |
79 |
} |
80 |
|
81 |
/** |
82 |
* Sets the distance cutoff for this RIGraph. |
83 |
* @param distCutoff the distance cutoff |
84 |
*/ |
85 |
public void setCutoff(double distCutoff) { |
86 |
this.distCutoff = distCutoff; |
87 |
} |
88 |
|
89 |
/** |
90 |
* Returns a RIGNbhood that contains the neighbourhood of given RIGNode |
91 |
* @param node |
92 |
* @return |
93 |
*/ |
94 |
public RIGNbhood getNbhood (RIGNode node) { |
95 |
Collection<RIGNode> nbs = this.getNeighbors(node); |
96 |
RIGNbhood nbhood = new RIGNbhood(node); |
97 |
for (RIGNode nb:nbs) { |
98 |
nbhood.put(nb.getResidueSerial(), nb); |
99 |
} |
100 |
return nbhood; |
101 |
} |
102 |
|
103 |
/** |
104 |
* Returns a RIGNbhood that contains the 2nd shell neighbourhood of given RIGNode |
105 |
* @param node |
106 |
* @return |
107 |
*/ |
108 |
public RIGNbhood getSecondShellNbhood (RIGNode node) { |
109 |
Collection<RIGNode> nbs = this.getNeighbors(node); |
110 |
RIGNbhood nbhood = new RIGNbhood(node); |
111 |
for (RIGNode nb:nbs) { |
112 |
for (RIGNode nb2:this.getNeighbors(nb)) { |
113 |
if (nb2!=node) { |
114 |
// RIGNbhood is a TreeMap that should take care of not inserting duplicates |
115 |
nbhood.put(nb2.getResidueSerial(),nb2); |
116 |
} |
117 |
} |
118 |
} |
119 |
return nbhood; |
120 |
} |
121 |
|
122 |
/** |
123 |
* Returns a RIGCommonNbhood that contains common neighbours of given RIGNodes iNode, jNode |
124 |
* @param iNode |
125 |
* @param jNode |
126 |
* @return |
127 |
*/ |
128 |
public RIGCommonNbhood getCommonNbhood(RIGNode iNode, RIGNode jNode) { |
129 |
Collection<RIGNode> iNbs = this.getNeighbors(iNode); |
130 |
Collection<RIGNode> jNbs = this.getNeighbors(jNode); |
131 |
boolean connected = false; |
132 |
if (this.findEdge(iNode, jNode)!=null) connected = true; |
133 |
RIGCommonNbhood comNbhood = new RIGCommonNbhood(iNode, jNode, connected); |
134 |
for (RIGNode iNb: iNbs) { |
135 |
if (jNbs.contains(iNb)) { |
136 |
comNbhood.put(iNb.getResidueSerial(), iNb); |
137 |
} |
138 |
} |
139 |
return comNbhood; |
140 |
} |
141 |
|
142 |
/** |
143 |
* Returns all common neighborhood sizes for each cell of the contact map (contact or non-contact) |
144 |
* @return |
145 |
*/ |
146 |
public HashMap<Pair<RIGNode>,Integer> getAllCommonNbhSizes() { |
147 |
HashMap<Pair<RIGNode>,Integer> comNbhSizes = new HashMap<Pair<RIGNode>, Integer>(); |
148 |
boolean directed = this.isDirected(); |
149 |
for (RIGNode n1:this.getVertices()) { |
150 |
for (RIGNode n2:this.getVertices()) { |
151 |
if (directed) { |
152 |
if (n1!=n2) { |
153 |
comNbhSizes.put(new Pair<RIGNode>(n1,n2),getCommonNbhood(n1, n2).size()); |
154 |
} |
155 |
} else { |
156 |
if (n1.getResidueSerial()<n2.getResidueSerial()) { |
157 |
comNbhSizes.put(new Pair<RIGNode>(n1,n2),getCommonNbhood(n1, n2).size()); |
158 |
} |
159 |
} |
160 |
} |
161 |
} |
162 |
return comNbhSizes; |
163 |
} |
164 |
|
165 |
public int getContactRange(RIGEdge edge) { |
166 |
Pair<RIGNode> pair = this.getEndpoints(edge); |
167 |
return Math.abs(pair.getFirst().getResidueSerial()-pair.getSecond().getResidueSerial()); |
168 |
} |
169 |
|
170 |
//TODO evaluatePrediction methods should be in ProtStructGraph. |
171 |
// But to be able to put them there we would need to pass here a Transformer that gets atom or residue serials depending if we are in AI or RI Graph |
172 |
/** |
173 |
* Evaluate this graph (assuming it is a prediction) against an original graph |
174 |
* @param originalGraph |
175 |
* @return |
176 |
*/ |
177 |
public PredEval evaluatePrediction(RIGraph originalGraph) { |
178 |
return evaluatePrediction(originalGraph, 1); |
179 |
} |
180 |
|
181 |
/** |
182 |
* Evaluate this graph (assuming it is a prediction) against an original graph, |
183 |
* considering only edges with sequence separation at least minSeqSep. |
184 |
* @param originalGraph |
185 |
* @param minSeqSep |
186 |
* @return |
187 |
*/ |
188 |
public PredEval evaluatePrediction(RIGraph originalGraph, int minSeqSep) { |
189 |
|
190 |
Collection<RIGEdge> predictedContacts = this.getEdges(); |
191 |
Collection<RIGEdge> origContacts = originalGraph.getEdges(); |
192 |
// total predicted contacts |
193 |
int predicted = 0; |
194 |
for(RIGEdge e:predictedContacts) { |
195 |
if(this.getContactRange(e) >= minSeqSep) { |
196 |
predicted++; |
197 |
} |
198 |
} |
199 |
|
200 |
// total native contacts |
201 |
int original = 0; |
202 |
for(RIGEdge e:origContacts) { |
203 |
if(originalGraph.getContactRange(e) >= minSeqSep) { |
204 |
original++; |
205 |
} |
206 |
} |
207 |
|
208 |
// total size of contact map (potential contacts) |
209 |
int cmtotal = 0; |
210 |
if (originalGraph.isDirected()){ |
211 |
cmtotal = (originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep); |
212 |
} else { |
213 |
cmtotal = (int)(((originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep))/2); |
214 |
} |
215 |
int TruePos=0, FalsePos=0, TrueNeg=0, FalseNeg=0; |
216 |
|
217 |
// directed/ non-directed graphs should be both fine with this code |
218 |
// the only thing that changes between directed/non-directed is the count of total cells in contact map (taken care for above) |
219 |
for (RIGEdge predictedCont:predictedContacts){ |
220 |
if(this.getContactRange(predictedCont) >= minSeqSep) { |
221 |
Pair<RIGNode> predNodePair = this.getEndpoints(predictedCont); |
222 |
RIGNode node1inOrig = originalGraph.getNodeFromSerial(predNodePair.getFirst().getResidueSerial()); |
223 |
RIGNode node2inOrig = originalGraph.getNodeFromSerial(predNodePair.getSecond().getResidueSerial()); |
224 |
if (originalGraph.findEdge(node1inOrig, node2inOrig)!=null) { |
225 |
TruePos++; |
226 |
} |
227 |
else { |
228 |
FalsePos++; |
229 |
} |
230 |
} |
231 |
} |
232 |
|
233 |
for (RIGEdge origCont:origContacts) { |
234 |
if(originalGraph.getContactRange(origCont) >= minSeqSep) { |
235 |
Pair<RIGNode> origNodePair = originalGraph.getEndpoints(origCont); |
236 |
RIGNode node1inPred = this.getNodeFromSerial(origNodePair.getFirst().getResidueSerial()); |
237 |
RIGNode node2inPred = this.getNodeFromSerial(origNodePair.getSecond().getResidueSerial()); |
238 |
if (this.findEdge(node1inPred,node2inPred)==null) { |
239 |
FalseNeg++; |
240 |
} |
241 |
} |
242 |
} |
243 |
TrueNeg=cmtotal-TruePos-FalsePos-FalseNeg; |
244 |
PredEval eval = new PredEval(TruePos,FalsePos,TrueNeg,FalseNeg,0,predicted,original,cmtotal); |
245 |
return eval; |
246 |
} |
247 |
|
248 |
/** |
249 |
* Write graph to given db, using our db graph aglappe format, |
250 |
* i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge |
251 |
* @param conn |
252 |
* @param db |
253 |
* @throws SQLException |
254 |
*/ |
255 |
//TODO we might want to move this to a graph i/o class |
256 |
public void write_graph_to_db(MySQLConnection conn, String db) throws SQLException{ |
257 |
|
258 |
// values we fix to constant |
259 |
String CW = "1"; |
260 |
String CR = "(true)"; |
261 |
String EXPBB = "0"; |
262 |
String ctStr = contactType; |
263 |
String weightedStr = "0"; |
264 |
String directedStr = isDirected()?"1":"0"; |
265 |
|
266 |
if (contactType.endsWith("_CAGLY")) { |
267 |
ctStr = contactType.replace("_CAGLY", ""); |
268 |
} |
269 |
if (ctStr.equals("ALL")) { |
270 |
ctStr = "BB+SC+BB/SC"; |
271 |
} |
272 |
if (AAinfo.isValidMultiAtomContactType(contactType)) { |
273 |
CW = ctStr; |
274 |
weightedStr = "1"; |
275 |
} |
276 |
if (contactType.endsWith("_CAGLY") || contactType.equals("Cb")) { |
277 |
EXPBB = "-1"; |
278 |
} |
279 |
if (minSeqSep != -1) { |
280 |
CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))"; |
281 |
} |
282 |
|
283 |
int pgraphid=0; |
284 |
int graphid=0; |
285 |
String sql = "SELECT graph_id FROM "+db+".chain_graph " + |
286 |
" WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" + |
287 |
" AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = '"+EXPBB+"'" + |
288 |
" AND method = 'rc-cutoff';"; |
289 |
Statement stmt = conn.createStatement(); |
290 |
ResultSet rsst = stmt.executeQuery(sql); |
291 |
if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid |
292 |
pgraphid = rsst.getInt(1); |
293 |
} else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid) |
294 |
// we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment) |
295 |
String pdbChainCodeStr = pdbChainCode; |
296 |
if (!pdbChainCode.equals("NULL")) { |
297 |
pdbChainCodeStr="'"+pdbChainCode+"'"; |
298 |
} |
299 |
sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " + |
300 |
"VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())"; |
301 |
Statement stmt2 = conn.createStatement(); |
302 |
stmt2.executeUpdate(sql); |
303 |
// now we take the newly assigned graph_id as pgraphid |
304 |
sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1"; |
305 |
ResultSet rsst2 = stmt2.executeQuery(sql); |
306 |
if (rsst2.next()){ |
307 |
pgraphid = rsst2.getInt(1); |
308 |
} |
309 |
stmt2.close(); |
310 |
rsst2.close(); |
311 |
} |
312 |
rsst.close(); |
313 |
// now we insert the graph info into single_model_graph |
314 |
// 1st we grab the single_model_id |
315 |
int singlemodelid = 0; |
316 |
sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+ |
317 |
" dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';"; |
318 |
rsst = stmt.executeQuery(sql); |
319 |
if (rsst.next()){ |
320 |
singlemodelid = rsst.getInt(1); |
321 |
} |
322 |
rsst.close(); |
323 |
// and then insert to single_model_graph |
324 |
sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " + |
325 |
" VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())"; |
326 |
stmt.executeUpdate(sql); |
327 |
// and we grab the graph_id just assigned in single_model_graph |
328 |
sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1"; |
329 |
rsst = stmt.executeQuery(sql); |
330 |
if (rsst.next()){ |
331 |
graphid = rsst.getInt(1); |
332 |
} |
333 |
rsst.close(); |
334 |
stmt.close(); |
335 |
|
336 |
// inserting nodes |
337 |
// get the max node in db |
338 |
int maxNodeId = 0; |
339 |
sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;"; |
340 |
stmt = conn.createStatement(); |
341 |
rsst = stmt.executeQuery(sql); |
342 |
if (rsst.next()){ |
343 |
maxNodeId = rsst.getInt(1); |
344 |
} |
345 |
rsst.close(); |
346 |
stmt.close(); |
347 |
|
348 |
stmt = conn.createStatement(); |
349 |
for (int resser:serials2nodes.keySet()) { |
350 |
RIGNode node = serials2nodes.get(resser); |
351 |
String res = AAinfo.threeletter2oneletter(node.getResidueType()); |
352 |
RIGNbhood nbh = this.getNbhood(node); |
353 |
String secStructType = null; |
354 |
String secStructId = null; |
355 |
String sheetSerial = null; |
356 |
String turn = null; |
357 |
SecStrucElement sselem = node.getSecStrucElement(); |
358 |
if (sselem!=null){ |
359 |
secStructType = quote(Character.toString(sselem.getType())); |
360 |
secStructId = quote(sselem.getId()); |
361 |
char sheetSerialChar = sselem.getSheetSerial(); |
362 |
if (sheetSerialChar != 0) { |
363 |
sheetSerial = quote(Character.toString(sheetSerialChar)); |
364 |
} |
365 |
turn = sselem.isTurn()?"1":"0"; |
366 |
} |
367 |
if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out |
368 |
sql = "INSERT INTO "+db+".single_model_node "+ |
369 |
" (graph_id, node_id, cid, num, res, "+ |
370 |
" sstype, ssid, sheet_serial, turn, "+ |
371 |
" k, k_in, k_out, "+ |
372 |
" n, nwg, n_num) " + |
373 |
" VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+ |
374 |
" "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+ |
375 |
(getPredecessorCount(node)+getSuccessorCount(node))+", "+getPredecessorCount(node)+", "+getSuccessorCount(node)+", "+ |
376 |
"'"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')"; |
377 |
} else { // we insert k (and no k_in or k_out) |
378 |
sql = "INSERT INTO "+db+".single_model_node "+ |
379 |
" (graph_id, node_id, cid, num, res, "+ |
380 |
" sstype, ssid, sheet_serial, turn, "+ |
381 |
" k, n, nwg, n_num) " + |
382 |
" VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+ |
383 |
" "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+ |
384 |
getNeighborCount(node)+", '"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')"; |
385 |
} |
386 |
stmt.executeUpdate(sql); |
387 |
} |
388 |
|
389 |
// inserting edges |
390 |
// get the max weight |
391 |
double maxWeight = 0; |
392 |
for (RIGEdge cont:getEdges()) { |
393 |
maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight; |
394 |
} |
395 |
for (RIGEdge cont:getEdges()){ |
396 |
RIGNode i_node = getEndpoints(cont).getFirst(); |
397 |
RIGNode j_node = getEndpoints(cont).getSecond(); |
398 |
String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType()); |
399 |
String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType()); |
400 |
|
401 |
String i_secStructType = null; |
402 |
String i_secStructId = null; |
403 |
String i_sheetSerial = null; |
404 |
String i_turn = null; |
405 |
SecStrucElement i_sselem = i_node.getSecStrucElement(); |
406 |
if (i_sselem!=null){ |
407 |
i_secStructType = quote(Character.toString(i_sselem.getType())); |
408 |
i_secStructId = quote(i_sselem.getId()); |
409 |
char sheetSerialChar = i_sselem.getSheetSerial(); |
410 |
if (sheetSerialChar != 0) { |
411 |
i_sheetSerial = quote(Character.toString(sheetSerialChar)); |
412 |
} |
413 |
i_turn = i_sselem.isTurn()?"1":"0"; |
414 |
} |
415 |
|
416 |
String j_secStructType = null; |
417 |
String j_secStructId = null; |
418 |
String j_sheetSerial = null; |
419 |
String j_turn = null; |
420 |
SecStrucElement j_sselem = j_node.getSecStrucElement(); |
421 |
if (j_sselem!=null){ |
422 |
j_secStructType = quote(Character.toString(j_sselem.getType())); |
423 |
j_secStructId = quote(j_sselem.getId()); |
424 |
char sheetSerialChar = j_sselem.getSheetSerial(); |
425 |
if (sheetSerialChar != 0) { |
426 |
j_sheetSerial = quote(Character.toString(sheetSerialChar)); |
427 |
} |
428 |
j_turn = j_sselem.isTurn()?"1":"0"; |
429 |
} |
430 |
|
431 |
sql = "INSERT INTO "+db+".single_model_edge "+ |
432 |
" (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+ |
433 |
" j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " + |
434 |
" VALUES ("+graphid+", "+(maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+ |
435 |
(maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+ |
436 |
cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+")"; |
437 |
stmt.executeUpdate(sql); |
438 |
if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention |
439 |
// so we insert the reverse contact by swapping i, j in insertion |
440 |
sql = "INSERT INTO "+db+".single_model_edge "+ |
441 |
" (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+ |
442 |
" j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " + |
443 |
" VALUES ("+graphid+", "+(maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+ |
444 |
(maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+ |
445 |
cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+")"; |
446 |
stmt.executeUpdate(sql); |
447 |
} |
448 |
} |
449 |
|
450 |
stmt.close(); |
451 |
|
452 |
} |
453 |
|
454 |
/** |
455 |
* Write graph to given db, using our db graph aglappe format, |
456 |
* i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge |
457 |
* @param conn |
458 |
* @param db |
459 |
* @throws SQLException |
460 |
*/ |
461 |
//TODO we might want to move this to a graph i/o class |
462 |
public void write_graph_to_db_fast(MySQLConnection conn, String db) throws SQLException, IOException { |
463 |
|
464 |
// values we fix to constant |
465 |
String CW = "1"; |
466 |
String CR = "(true)"; |
467 |
String EXPBB = "0"; |
468 |
String ctStr = contactType; |
469 |
String weightedStr = "0"; |
470 |
String directedStr = isDirected()?"1":"0"; |
471 |
|
472 |
if (contactType.endsWith("_CAGLY")) { |
473 |
ctStr = contactType.replace("_CAGLY", ""); |
474 |
} |
475 |
if (ctStr.equals("ALL")) { |
476 |
ctStr = "BB+SC+BB/SC"; |
477 |
} |
478 |
if (AAinfo.isValidMultiAtomContactType(contactType)) { |
479 |
CW = ctStr; |
480 |
weightedStr = "1"; |
481 |
} |
482 |
if (contactType.endsWith("_CAGLY") || contactType.equals("Cb")) { |
483 |
EXPBB = "-1"; |
484 |
} |
485 |
if (minSeqSep != -1) { |
486 |
CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))"; |
487 |
} |
488 |
|
489 |
int pgraphid=0; |
490 |
int graphid=0; |
491 |
String sql = "SELECT graph_id FROM "+db+".chain_graph " + |
492 |
" WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" + |
493 |
" AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = '"+EXPBB+"'" + |
494 |
" AND method = 'rc-cutoff';"; |
495 |
Statement stmt = conn.createStatement(); |
496 |
ResultSet rsst = stmt.executeQuery(sql); |
497 |
if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid |
498 |
pgraphid = rsst.getInt(1); |
499 |
} else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid) |
500 |
// we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment) |
501 |
String pdbChainCodeStr = pdbChainCode; |
502 |
if (!pdbChainCode.equals("NULL")) { |
503 |
pdbChainCodeStr="'"+pdbChainCode+"'"; |
504 |
} |
505 |
sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " + |
506 |
"VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())"; |
507 |
Statement stmt2 = conn.createStatement(); |
508 |
stmt2.executeUpdate(sql); |
509 |
// now we take the newly assigned graph_id as pgraphid |
510 |
sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1"; |
511 |
ResultSet rsst2 = stmt2.executeQuery(sql); |
512 |
if (rsst2.next()){ |
513 |
pgraphid = rsst2.getInt(1); |
514 |
} |
515 |
stmt2.close(); |
516 |
rsst2.close(); |
517 |
} |
518 |
rsst.close(); |
519 |
// now we insert the graph info into single_model_graph |
520 |
// 1st we grab the single_model_id |
521 |
int singlemodelid = 0; |
522 |
sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+ |
523 |
" dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';"; |
524 |
rsst = stmt.executeQuery(sql); |
525 |
if (rsst.next()){ |
526 |
singlemodelid = rsst.getInt(1); |
527 |
} |
528 |
rsst.close(); |
529 |
// and then insert to single_model_graph |
530 |
sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " + |
531 |
" VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())"; |
532 |
stmt.executeUpdate(sql); |
533 |
// and we grab the graph_id just assigned in single_model_graph |
534 |
sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1"; |
535 |
rsst = stmt.executeQuery(sql); |
536 |
if (rsst.next()){ |
537 |
graphid = rsst.getInt(1); |
538 |
} |
539 |
rsst.close(); |
540 |
stmt.close(); |
541 |
|
542 |
// inserting nodes |
543 |
PrintStream nodesOut = new PrintStream(new FileOutputStream(graphid+"_nodes.txt")); |
544 |
// get the max node in db |
545 |
int maxNodeId = 0; |
546 |
sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;"; |
547 |
stmt = conn.createStatement(); |
548 |
rsst = stmt.executeQuery(sql); |
549 |
if (rsst.next()){ |
550 |
maxNodeId = rsst.getInt(1); |
551 |
} |
552 |
rsst.close(); |
553 |
stmt.close(); |
554 |
|
555 |
for (int resser:serials2nodes.keySet()) { |
556 |
|
557 |
RIGNode node = serials2nodes.get(resser); |
558 |
String res = AAinfo.threeletter2oneletter(node.getResidueType()); |
559 |
RIGNbhood nbh = this.getNbhood(node); |
560 |
String secStructType = "\\N"; |
561 |
String secStructId = "\\N"; |
562 |
String sheetSerial = "\\N"; |
563 |
String turn = null; |
564 |
SecStrucElement sselem = node.getSecStrucElement(); |
565 |
if (sselem!=null){ |
566 |
secStructType = Character.toString(sselem.getType()); |
567 |
secStructId = sselem.getId(); |
568 |
char sheetSerialChar = sselem.getSheetSerial(); |
569 |
if (sheetSerialChar != 0) { |
570 |
sheetSerial = Character.toString(sheetSerialChar); |
571 |
} |
572 |
turn = sselem.isTurn()?"1":"0"; |
573 |
} |
574 |
if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out |
575 |
nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+ |
576 |
secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+ |
577 |
(getPredecessorCount(node)+getSuccessorCount(node))+"\t"+getPredecessorCount(node)+"\t"+getSuccessorCount(node)+"\t"+ |
578 |
nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials()); |
579 |
} else { // we insert k (and no k_in or k_out) |
580 |
nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+ |
581 |
secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+ |
582 |
getNeighborCount(node)+"\t"+"\\N"+"\t"+"\\N"+"\t"+ |
583 |
nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials()); |
584 |
} |
585 |
} |
586 |
nodesOut.close(); |
587 |
stmt = conn.createStatement(); |
588 |
sql = "LOAD DATA LOCAL INFILE '"+graphid+"_nodes.txt' INTO TABLE "+db+".single_model_node "+ |
589 |
" (graph_id, node_id, cid, num, res, "+ |
590 |
" sstype, ssid, sheet_serial, turn, "+ |
591 |
" k, k_in, k_out, n, nwg, n_num);"; |
592 |
stmt.executeUpdate(sql); |
593 |
File fileToDelete = new File(graphid+"_nodes.txt"); |
594 |
if (fileToDelete.exists()) { |
595 |
fileToDelete.delete(); |
596 |
} |
597 |
|
598 |
// inserting edges |
599 |
PrintStream edgesOut = new PrintStream(new FileOutputStream(graphid+"_edges.txt")); |
600 |
// get the max weight |
601 |
double maxWeight = 0; |
602 |
for (RIGEdge cont:getEdges()) { |
603 |
maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight; |
604 |
} |
605 |
for (RIGEdge cont:getEdges()){ |
606 |
RIGNode i_node = getEndpoints(cont).getFirst(); |
607 |
RIGNode j_node = getEndpoints(cont).getSecond(); |
608 |
String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType()); |
609 |
String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType()); |
610 |
|
611 |
String i_secStructType = "\\N"; |
612 |
String i_secStructId = "\\N"; |
613 |
String i_sheetSerial = "\\N"; |
614 |
String i_turn = null; |
615 |
SecStrucElement i_sselem = i_node.getSecStrucElement(); |
616 |
if (i_sselem!=null){ |
617 |
i_secStructType = Character.toString(i_sselem.getType()); |
618 |
i_secStructId = i_sselem.getId(); |
619 |
char sheetSerialChar = i_sselem.getSheetSerial(); |
620 |
if (sheetSerialChar != 0) { |
621 |
i_sheetSerial = Character.toString(sheetSerialChar); |
622 |
} |
623 |
i_turn = i_sselem.isTurn()?"1":"0"; |
624 |
} |
625 |
|
626 |
String j_secStructType = "\\N"; |
627 |
String j_secStructId = "\\N"; |
628 |
String j_sheetSerial = "\\N"; |
629 |
String j_turn = null; |
630 |
SecStrucElement j_sselem = j_node.getSecStrucElement(); |
631 |
if (j_sselem!=null){ |
632 |
j_secStructType = Character.toString(j_sselem.getType()); |
633 |
j_secStructId = j_sselem.getId(); |
634 |
char sheetSerialChar = j_sselem.getSheetSerial(); |
635 |
if (sheetSerialChar != 0) { |
636 |
j_sheetSerial = Character.toString(sheetSerialChar); |
637 |
} |
638 |
j_turn = j_sselem.isTurn()?"1":"0"; |
639 |
} |
640 |
|
641 |
edgesOut.println(graphid+"\t"+(maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+ |
642 |
(maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+ |
643 |
cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight)); |
644 |
if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention |
645 |
// so we insert the reverse contact by swapping i, j in insertion |
646 |
edgesOut.println(graphid+"\t"+(maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+ |
647 |
(maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+ |
648 |
cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight)); |
649 |
} |
650 |
} |
651 |
edgesOut.close(); |
652 |
sql = "LOAD DATA LOCAL INFILE '"+graphid+"_edges.txt' INTO TABLE "+db+".single_model_edge "+ |
653 |
" (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+ |
654 |
" j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight);"; |
655 |
stmt.executeUpdate(sql); |
656 |
stmt.close(); |
657 |
fileToDelete = new File(graphid+"_edges.txt"); |
658 |
if (fileToDelete.exists()) { |
659 |
fileToDelete.delete(); |
660 |
} |
661 |
|
662 |
} |
663 |
|
664 |
/** Single quotes the given string */ |
665 |
private String quote(String s) { |
666 |
return ("'"+s+"'"); |
667 |
} |
668 |
|
669 |
/** |
670 |
* Write graph to given outfile in aglappe format |
671 |
* @param outfile |
672 |
* @throws IOException |
673 |
*/ |
674 |
//TODO we might want to move this to a graph i/o class |
675 |
public void writeToFile (String outfile) throws IOException { |
676 |
PrintStream Out = new PrintStream(new FileOutputStream(outfile)); |
677 |
Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION); |
678 |
Out.println("#SEQUENCE: "+sequence); |
679 |
Out.println("#PDB: "+pdbCode); |
680 |
Out.println("#PDB CHAIN CODE: "+pdbChainCode); |
681 |
Out.println("#CHAIN: "+chainCode); |
682 |
Out.println("#CT: "+contactType); |
683 |
Out.println("#CUTOFF: "+distCutoff); |
684 |
for (RIGEdge cont:getEdges()){ |
685 |
Pair<RIGNode> pair = getEndpoints(cont); |
686 |
int i_resser=pair.getFirst().getResidueSerial(); |
687 |
int j_resser=pair.getSecond().getResidueSerial(); |
688 |
//BEWARE!! here we write weights while in writeToDb we write atomWeights (consistent with what we do in FileRIGraph) TODO do we want this behaviour? |
689 |
double weight=cont.getWeight(); |
690 |
Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t%6.3f\n",weight); |
691 |
} |
692 |
Out.close(); |
693 |
} |
694 |
} |