1 |
package proteinstructure; |
2 |
|
3 |
import java.io.File; |
4 |
import java.io.FileOutputStream; |
5 |
import java.io.IOException; |
6 |
import java.io.PrintStream; |
7 |
import java.sql.ResultSet; |
8 |
import java.sql.SQLException; |
9 |
import java.sql.Statement; |
10 |
import java.util.Collection; |
11 |
import java.util.HashMap; |
12 |
import java.util.Locale; |
13 |
import java.util.TreeMap; |
14 |
|
15 |
import tools.MySQLConnection; |
16 |
|
17 |
import edu.uci.ics.jung.graph.util.Pair; |
18 |
|
19 |
/** |
20 |
* A Residue Interaction Graph |
21 |
* |
22 |
*/ |
23 |
public class RIGraph extends ProtStructGraph<RIGNode,RIGEdge> { |
24 |
|
25 |
private static final long serialVersionUID = 1L; |
26 |
|
27 |
private static final String SINGLEMODELS_DB = "ioannis"; |
28 |
|
29 |
// fields |
30 |
protected double distCutoff; |
31 |
protected String contactType; // use AAinfo.isValidContactType() to test for validity |
32 |
|
33 |
public RIGraph() { |
34 |
super(); |
35 |
this.distCutoff=0; |
36 |
this.contactType=null; |
37 |
} |
38 |
|
39 |
/** |
40 |
* Constructs a RIGraph with a sequence but no edges |
41 |
* @param sequence |
42 |
*/ |
43 |
public RIGraph(String sequence) { |
44 |
super(); |
45 |
this.sequence = sequence; |
46 |
this.fullLength = sequence.length(); |
47 |
this.distCutoff=0; |
48 |
this.contactType=null; |
49 |
serials2nodes = new TreeMap<Integer,RIGNode>(); |
50 |
for(int i=0; i < sequence.length(); i++) { |
51 |
RIGNode node = new RIGNode(i+1,AAinfo.oneletter2threeletter(Character.toString(sequence.charAt(i)))); |
52 |
this.addVertex(node); |
53 |
serials2nodes.put(i+1, node); |
54 |
} |
55 |
} |
56 |
|
57 |
/** |
58 |
* Returns the contact type of this RIGraph |
59 |
* @return |
60 |
*/ |
61 |
public String getContactType() { |
62 |
return contactType; |
63 |
} |
64 |
|
65 |
/** |
66 |
* Sets the contact type of this RIGraph |
67 |
* @param ct the contact type |
68 |
*/ |
69 |
public void setContactType(String contactType) { |
70 |
this.contactType=contactType; |
71 |
} |
72 |
|
73 |
/** |
74 |
* Returns the distance cutoff for this RIGraph. |
75 |
* @return the distance cutoff |
76 |
*/ |
77 |
public double getCutoff(){ |
78 |
return distCutoff; |
79 |
} |
80 |
|
81 |
/** |
82 |
* Sets the distance cutoff for this RIGraph. |
83 |
* @param distCutoff the distance cutoff |
84 |
*/ |
85 |
public void setCutoff(double distCutoff) { |
86 |
this.distCutoff = distCutoff; |
87 |
} |
88 |
|
89 |
/** |
90 |
* Returns a RIGNbhood that contains the neighbourhood of given RIGNode |
91 |
* @param node |
92 |
* @return |
93 |
*/ |
94 |
public RIGNbhood getNbhood (RIGNode node) { |
95 |
Collection<RIGNode> nbs = this.getNeighbors(node); |
96 |
RIGNbhood nbhood = new RIGNbhood(node); |
97 |
for (RIGNode nb:nbs) { |
98 |
nbhood.put(nb.getResidueSerial(), nb); |
99 |
} |
100 |
return nbhood; |
101 |
} |
102 |
|
103 |
/** |
104 |
* Returns a RIGNbhood that contains the 2nd shell neighbourhood of given RIGNode |
105 |
* @param node |
106 |
* @return |
107 |
*/ |
108 |
public RIGNbhood getSecondShellNbhood (RIGNode node) { |
109 |
Collection<RIGNode> nbs = this.getNeighbors(node); |
110 |
RIGNbhood nbhood = new RIGNbhood(node); |
111 |
for (RIGNode nb:nbs) { |
112 |
for (RIGNode nb2:this.getNeighbors(nb)) { |
113 |
if (nb2!=node) { |
114 |
// RIGNbhood is a TreeMap that should take care of not inserting duplicates |
115 |
nbhood.put(nb2.getResidueSerial(),nb2); |
116 |
} |
117 |
} |
118 |
} |
119 |
return nbhood; |
120 |
} |
121 |
|
122 |
/** |
123 |
* Returns a RIGCommonNbhood that contains common neighbours of given RIGNodes iNode, jNode |
124 |
* @param iNode |
125 |
* @param jNode |
126 |
* @return |
127 |
*/ |
128 |
public RIGCommonNbhood getCommonNbhood(RIGNode iNode, RIGNode jNode) { |
129 |
Collection<RIGNode> iNbs = this.getNeighbors(iNode); |
130 |
Collection<RIGNode> jNbs = this.getNeighbors(jNode); |
131 |
boolean connected = false; |
132 |
//NOTE in DIRECTED case this means strictly an edge from iNode to jNode |
133 |
if (this.findEdge(iNode, jNode)!=null) connected = true; |
134 |
RIGCommonNbhood comNbhood = new RIGCommonNbhood(iNode, jNode, connected); |
135 |
for (RIGNode iNb: iNbs) { |
136 |
if (jNbs.contains(iNb)) { |
137 |
comNbhood.put(iNb.getResidueSerial(), iNb); |
138 |
} |
139 |
} |
140 |
return comNbhood; |
141 |
} |
142 |
|
143 |
/** |
144 |
* Returns all common neighborhood sizes for each cell of the contact map (contact or non-contact) |
145 |
* @return |
146 |
*/ |
147 |
public HashMap<Pair<RIGNode>,Integer> getAllCommonNbhSizes() { |
148 |
HashMap<Pair<RIGNode>,Integer> comNbhSizes = new HashMap<Pair<RIGNode>, Integer>(); |
149 |
boolean directed = this.isDirected(); |
150 |
for (RIGNode n1:this.getVertices()) { |
151 |
for (RIGNode n2:this.getVertices()) { |
152 |
if (directed) { |
153 |
if (n1!=n2) { |
154 |
comNbhSizes.put(new Pair<RIGNode>(n1,n2),getCommonNbhood(n1, n2).size()); |
155 |
} |
156 |
} else { |
157 |
if (n1.getResidueSerial()<n2.getResidueSerial()) { |
158 |
comNbhSizes.put(new Pair<RIGNode>(n1,n2),getCommonNbhood(n1, n2).size()); |
159 |
} |
160 |
} |
161 |
} |
162 |
} |
163 |
return comNbhSizes; |
164 |
} |
165 |
|
166 |
public int getContactRange(RIGEdge edge) { |
167 |
Pair<RIGNode> pair = this.getEndpoints(edge); |
168 |
return Math.abs(pair.getFirst().getResidueSerial()-pair.getSecond().getResidueSerial()); |
169 |
} |
170 |
|
171 |
//TODO evaluatePrediction methods should be in ProtStructGraph. |
172 |
// But to be able to put them there we would need to pass here a Transformer that gets atom or residue serials depending if we are in AI or RI Graph |
173 |
/** |
174 |
* Evaluate this graph (assuming it is a prediction) against an original graph |
175 |
* @param originalGraph |
176 |
* @return |
177 |
*/ |
178 |
public PredEval evaluatePrediction(RIGraph originalGraph) { |
179 |
return evaluatePrediction(originalGraph, 1); |
180 |
} |
181 |
|
182 |
/** |
183 |
* Evaluate this graph (assuming it is a prediction) against an original graph, |
184 |
* considering only edges with sequence separation at least minSeqSep. |
185 |
* @param originalGraph |
186 |
* @param minSeqSep |
187 |
* @return |
188 |
*/ |
189 |
public PredEval evaluatePrediction(RIGraph originalGraph, int minSeqSep) { |
190 |
|
191 |
Collection<RIGEdge> predictedContacts = this.getEdges(); |
192 |
Collection<RIGEdge> origContacts = originalGraph.getEdges(); |
193 |
// total predicted contacts |
194 |
int predicted = 0; |
195 |
for(RIGEdge e:predictedContacts) { |
196 |
if(this.getContactRange(e) >= minSeqSep) { |
197 |
predicted++; |
198 |
} |
199 |
} |
200 |
|
201 |
// total native contacts |
202 |
int original = 0; |
203 |
for(RIGEdge e:origContacts) { |
204 |
if(originalGraph.getContactRange(e) >= minSeqSep) { |
205 |
original++; |
206 |
} |
207 |
} |
208 |
|
209 |
// total size of contact map (potential contacts) |
210 |
int cmtotal = 0; |
211 |
if (originalGraph.isDirected()){ |
212 |
cmtotal = (originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep); |
213 |
} else { |
214 |
cmtotal = (int)(((originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep))/2); |
215 |
} |
216 |
int TruePos=0, FalsePos=0, TrueNeg=0, FalseNeg=0; |
217 |
|
218 |
// directed/ non-directed graphs should be both fine with this code |
219 |
// the only thing that changes between directed/non-directed is the count of total cells in contact map (taken care for above) |
220 |
for (RIGEdge predictedCont:predictedContacts){ |
221 |
if(this.getContactRange(predictedCont) >= minSeqSep) { |
222 |
Pair<RIGNode> predNodePair = this.getEndpoints(predictedCont); |
223 |
RIGNode node1inOrig = originalGraph.getNodeFromSerial(predNodePair.getFirst().getResidueSerial()); |
224 |
RIGNode node2inOrig = originalGraph.getNodeFromSerial(predNodePair.getSecond().getResidueSerial()); |
225 |
//NOTE order of nodes in findEdge doesn't matter if UNDIRECTED. |
226 |
//It does matter if DIRECTED. However even in that case we are fine because we use same order in this graph |
227 |
if (originalGraph.findEdge(node1inOrig, node2inOrig)!=null) { |
228 |
TruePos++; |
229 |
} |
230 |
else { |
231 |
FalsePos++; |
232 |
} |
233 |
} |
234 |
} |
235 |
|
236 |
for (RIGEdge origCont:origContacts) { |
237 |
if(originalGraph.getContactRange(origCont) >= minSeqSep) { |
238 |
Pair<RIGNode> origNodePair = originalGraph.getEndpoints(origCont); |
239 |
RIGNode node1inPred = this.getNodeFromSerial(origNodePair.getFirst().getResidueSerial()); |
240 |
RIGNode node2inPred = this.getNodeFromSerial(origNodePair.getSecond().getResidueSerial()); |
241 |
//NOTE order of nodes in findEdge doesn't matter if UNDIRECTED. |
242 |
//It does matter if DIRECTED. However even in that case we are fine because we use same order in originalGraph |
243 |
if (this.findEdge(node1inPred,node2inPred)==null) { |
244 |
FalseNeg++; |
245 |
} |
246 |
} |
247 |
} |
248 |
TrueNeg=cmtotal-TruePos-FalsePos-FalseNeg; |
249 |
PredEval eval = new PredEval(TruePos,FalsePos,TrueNeg,FalseNeg,0,predicted,original,cmtotal); |
250 |
return eval; |
251 |
} |
252 |
|
253 |
/** |
254 |
* Write graph to given db, using our db graph aglappe format, |
255 |
* i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge |
256 |
* @param conn |
257 |
* @param db |
258 |
* @throws SQLException |
259 |
*/ |
260 |
//TODO we might want to move this to a graph i/o class |
261 |
//TODO refactor to writeToDb. Get rid of this and only keep fast one?? |
262 |
public void write_graph_to_db(MySQLConnection conn, String db) throws SQLException{ |
263 |
|
264 |
// values we fix to constant |
265 |
String CW = "1"; |
266 |
String CR = "(true)"; |
267 |
String EXPBB = "0"; |
268 |
String ctStr = contactType; |
269 |
String weightedStr = "0"; |
270 |
String directedStr = isDirected()?"1":"0"; |
271 |
|
272 |
if (contactType.endsWith("_CAGLY")) { |
273 |
ctStr = contactType.replace("_CAGLY", ""); |
274 |
} |
275 |
if (ctStr.equals("ALL")) { |
276 |
ctStr = "BB+SC+BB/SC"; |
277 |
} |
278 |
if (AAinfo.isValidMultiAtomContactType(contactType)) { |
279 |
CW = ctStr; |
280 |
weightedStr = "1"; |
281 |
} |
282 |
if (contactType.endsWith("_CAGLY") || contactType.equals("Cb")) { |
283 |
EXPBB = "-1"; |
284 |
} |
285 |
if (minSeqSep != -1) { |
286 |
CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))"; |
287 |
} |
288 |
|
289 |
int pgraphid=0; |
290 |
int graphid=0; |
291 |
String sql = "SELECT graph_id FROM "+db+".chain_graph " + |
292 |
" WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" + |
293 |
" AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = '"+EXPBB+"'" + |
294 |
" AND method = 'rc-cutoff';"; |
295 |
Statement stmt = conn.createStatement(); |
296 |
ResultSet rsst = stmt.executeQuery(sql); |
297 |
if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid |
298 |
pgraphid = rsst.getInt(1); |
299 |
} else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid) |
300 |
// we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment) |
301 |
String pdbChainCodeStr = pdbChainCode; |
302 |
if (!pdbChainCode.equals("NULL")) { |
303 |
pdbChainCodeStr="'"+pdbChainCode+"'"; |
304 |
} |
305 |
sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " + |
306 |
"VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())"; |
307 |
Statement stmt2 = conn.createStatement(); |
308 |
stmt2.executeUpdate(sql); |
309 |
// now we take the newly assigned graph_id as pgraphid |
310 |
sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1"; |
311 |
ResultSet rsst2 = stmt2.executeQuery(sql); |
312 |
if (rsst2.next()){ |
313 |
pgraphid = rsst2.getInt(1); |
314 |
} |
315 |
stmt2.close(); |
316 |
rsst2.close(); |
317 |
} |
318 |
rsst.close(); |
319 |
// now we insert the graph info into single_model_graph |
320 |
// 1st we grab the single_model_id |
321 |
int singlemodelid = 0; |
322 |
sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+ |
323 |
" dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';"; |
324 |
rsst = stmt.executeQuery(sql); |
325 |
if (rsst.next()){ |
326 |
singlemodelid = rsst.getInt(1); |
327 |
} |
328 |
rsst.close(); |
329 |
// and then insert to single_model_graph |
330 |
sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " + |
331 |
" VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())"; |
332 |
stmt.executeUpdate(sql); |
333 |
// and we grab the graph_id just assigned in single_model_graph |
334 |
sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1"; |
335 |
rsst = stmt.executeQuery(sql); |
336 |
if (rsst.next()){ |
337 |
graphid = rsst.getInt(1); |
338 |
} |
339 |
rsst.close(); |
340 |
stmt.close(); |
341 |
|
342 |
// inserting nodes |
343 |
// get the max node in db |
344 |
int maxNodeId = 0; |
345 |
sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;"; |
346 |
stmt = conn.createStatement(); |
347 |
rsst = stmt.executeQuery(sql); |
348 |
if (rsst.next()){ |
349 |
maxNodeId = rsst.getInt(1); |
350 |
} |
351 |
rsst.close(); |
352 |
stmt.close(); |
353 |
|
354 |
stmt = conn.createStatement(); |
355 |
for (int resser:serials2nodes.keySet()) { |
356 |
RIGNode node = serials2nodes.get(resser); |
357 |
String res = AAinfo.threeletter2oneletter(node.getResidueType()); |
358 |
RIGNbhood nbh = this.getNbhood(node); |
359 |
String secStructType = null; |
360 |
String secStructId = null; |
361 |
String sheetSerial = null; |
362 |
String turn = null; |
363 |
SecStrucElement sselem = node.getSecStrucElement(); |
364 |
if (sselem!=null){ |
365 |
secStructType = quote(Character.toString(sselem.getType())); |
366 |
secStructId = quote(sselem.getId()); |
367 |
char sheetSerialChar = sselem.getSheetSerial(); |
368 |
if (sheetSerialChar != 0) { |
369 |
sheetSerial = quote(Character.toString(sheetSerialChar)); |
370 |
} |
371 |
turn = sselem.isTurn()?"1":"0"; |
372 |
} |
373 |
if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out |
374 |
sql = "INSERT INTO "+db+".single_model_node "+ |
375 |
" (graph_id, node_id, cid, num, res, "+ |
376 |
" sstype, ssid, sheet_serial, turn, "+ |
377 |
" k, k_in, k_out, "+ |
378 |
" n, nwg, n_num) " + |
379 |
" VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+ |
380 |
" "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+ |
381 |
(inDegree(node)+outDegree(node))+", "+inDegree(node)+", "+outDegree(node)+", "+ |
382 |
"'"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')"; |
383 |
} else { // we insert k (and no k_in or k_out) |
384 |
sql = "INSERT INTO "+db+".single_model_node "+ |
385 |
" (graph_id, node_id, cid, num, res, "+ |
386 |
" sstype, ssid, sheet_serial, turn, "+ |
387 |
" k, n, nwg, n_num) " + |
388 |
" VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+ |
389 |
" "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+ |
390 |
degree(node)+", '"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')"; |
391 |
} |
392 |
stmt.executeUpdate(sql); |
393 |
} |
394 |
|
395 |
// inserting edges |
396 |
// get the max weight |
397 |
double maxWeight = 0; |
398 |
for (RIGEdge cont:getEdges()) { |
399 |
maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight; |
400 |
} |
401 |
for (RIGEdge cont:getEdges()){ |
402 |
RIGNode i_node = getEndpoints(cont).getFirst(); |
403 |
RIGNode j_node = getEndpoints(cont).getSecond(); |
404 |
String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType()); |
405 |
String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType()); |
406 |
|
407 |
String i_secStructType = null; |
408 |
String i_secStructId = null; |
409 |
String i_sheetSerial = null; |
410 |
String i_turn = null; |
411 |
SecStrucElement i_sselem = i_node.getSecStrucElement(); |
412 |
if (i_sselem!=null){ |
413 |
i_secStructType = quote(Character.toString(i_sselem.getType())); |
414 |
i_secStructId = quote(i_sselem.getId()); |
415 |
char sheetSerialChar = i_sselem.getSheetSerial(); |
416 |
if (sheetSerialChar != 0) { |
417 |
i_sheetSerial = quote(Character.toString(sheetSerialChar)); |
418 |
} |
419 |
i_turn = i_sselem.isTurn()?"1":"0"; |
420 |
} |
421 |
|
422 |
String j_secStructType = null; |
423 |
String j_secStructId = null; |
424 |
String j_sheetSerial = null; |
425 |
String j_turn = null; |
426 |
SecStrucElement j_sselem = j_node.getSecStrucElement(); |
427 |
if (j_sselem!=null){ |
428 |
j_secStructType = quote(Character.toString(j_sselem.getType())); |
429 |
j_secStructId = quote(j_sselem.getId()); |
430 |
char sheetSerialChar = j_sselem.getSheetSerial(); |
431 |
if (sheetSerialChar != 0) { |
432 |
j_sheetSerial = quote(Character.toString(sheetSerialChar)); |
433 |
} |
434 |
j_turn = j_sselem.isTurn()?"1":"0"; |
435 |
} |
436 |
|
437 |
sql = "INSERT INTO "+db+".single_model_edge "+ |
438 |
" (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+ |
439 |
" j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " + |
440 |
" VALUES ("+graphid+", "+(maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+ |
441 |
(maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+ |
442 |
cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+")"; |
443 |
stmt.executeUpdate(sql); |
444 |
if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention |
445 |
// so we insert the reverse contact by swapping i, j in insertion |
446 |
sql = "INSERT INTO "+db+".single_model_edge "+ |
447 |
" (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+ |
448 |
" j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " + |
449 |
" VALUES ("+graphid+", "+(maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+ |
450 |
(maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+ |
451 |
cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+")"; |
452 |
stmt.executeUpdate(sql); |
453 |
} |
454 |
} |
455 |
|
456 |
stmt.close(); |
457 |
|
458 |
} |
459 |
|
460 |
/** |
461 |
* Write graph to given db, using our db graph aglappe format, |
462 |
* i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge |
463 |
* @param conn |
464 |
* @param db |
465 |
* @throws SQLException |
466 |
*/ |
467 |
//TODO we might want to move this to a graph i/o class |
468 |
//TODO refactor to writeToDbFast |
469 |
public void write_graph_to_db_fast(MySQLConnection conn, String db) throws SQLException, IOException { |
470 |
|
471 |
// values we fix to constant |
472 |
String CW = "1"; |
473 |
String CR = "(true)"; |
474 |
String EXPBB = "0"; |
475 |
String ctStr = contactType; |
476 |
String weightedStr = "0"; |
477 |
String directedStr = isDirected()?"1":"0"; |
478 |
|
479 |
if (contactType.endsWith("_CAGLY")) { |
480 |
ctStr = contactType.replace("_CAGLY", ""); |
481 |
} |
482 |
if (ctStr.equals("ALL")) { |
483 |
ctStr = "BB+SC+BB/SC"; |
484 |
} |
485 |
if (AAinfo.isValidMultiAtomContactType(contactType)) { |
486 |
CW = ctStr; |
487 |
weightedStr = "1"; |
488 |
} |
489 |
if (contactType.endsWith("_CAGLY") || contactType.equals("Cb")) { |
490 |
EXPBB = "-1"; |
491 |
} |
492 |
if (minSeqSep != -1) { |
493 |
CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))"; |
494 |
} |
495 |
|
496 |
int pgraphid=0; |
497 |
int graphid=0; |
498 |
String sql = "SELECT graph_id FROM "+db+".chain_graph " + |
499 |
" WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" + |
500 |
" AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = '"+EXPBB+"'" + |
501 |
" AND method = 'rc-cutoff';"; |
502 |
Statement stmt = conn.createStatement(); |
503 |
ResultSet rsst = stmt.executeQuery(sql); |
504 |
if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid |
505 |
pgraphid = rsst.getInt(1); |
506 |
} else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid) |
507 |
// we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment) |
508 |
String pdbChainCodeStr = pdbChainCode; |
509 |
if (!pdbChainCode.equals("NULL")) { |
510 |
pdbChainCodeStr="'"+pdbChainCode+"'"; |
511 |
} |
512 |
sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " + |
513 |
"VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())"; |
514 |
Statement stmt2 = conn.createStatement(); |
515 |
stmt2.executeUpdate(sql); |
516 |
// now we take the newly assigned graph_id as pgraphid |
517 |
sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1"; |
518 |
ResultSet rsst2 = stmt2.executeQuery(sql); |
519 |
if (rsst2.next()){ |
520 |
pgraphid = rsst2.getInt(1); |
521 |
} |
522 |
stmt2.close(); |
523 |
rsst2.close(); |
524 |
} |
525 |
rsst.close(); |
526 |
// now we insert the graph info into single_model_graph |
527 |
// 1st we grab the single_model_id |
528 |
int singlemodelid = 0; |
529 |
sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+ |
530 |
" dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';"; |
531 |
rsst = stmt.executeQuery(sql); |
532 |
if (rsst.next()){ |
533 |
singlemodelid = rsst.getInt(1); |
534 |
} |
535 |
rsst.close(); |
536 |
// and then insert to single_model_graph |
537 |
sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " + |
538 |
" VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())"; |
539 |
stmt.executeUpdate(sql); |
540 |
// and we grab the graph_id just assigned in single_model_graph |
541 |
sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1"; |
542 |
rsst = stmt.executeQuery(sql); |
543 |
if (rsst.next()){ |
544 |
graphid = rsst.getInt(1); |
545 |
} |
546 |
rsst.close(); |
547 |
stmt.close(); |
548 |
|
549 |
// inserting nodes |
550 |
PrintStream nodesOut = new PrintStream(new FileOutputStream(graphid+"_nodes.txt")); |
551 |
// get the max node in db |
552 |
int maxNodeId = 0; |
553 |
sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;"; |
554 |
stmt = conn.createStatement(); |
555 |
rsst = stmt.executeQuery(sql); |
556 |
if (rsst.next()){ |
557 |
maxNodeId = rsst.getInt(1); |
558 |
} |
559 |
rsst.close(); |
560 |
stmt.close(); |
561 |
|
562 |
for (int resser:serials2nodes.keySet()) { |
563 |
|
564 |
RIGNode node = serials2nodes.get(resser); |
565 |
String res = AAinfo.threeletter2oneletter(node.getResidueType()); |
566 |
RIGNbhood nbh = this.getNbhood(node); |
567 |
String secStructType = "\\N"; |
568 |
String secStructId = "\\N"; |
569 |
String sheetSerial = "\\N"; |
570 |
String turn = null; |
571 |
SecStrucElement sselem = node.getSecStrucElement(); |
572 |
if (sselem!=null){ |
573 |
secStructType = Character.toString(sselem.getType()); |
574 |
secStructId = sselem.getId(); |
575 |
char sheetSerialChar = sselem.getSheetSerial(); |
576 |
if (sheetSerialChar != 0) { |
577 |
sheetSerial = Character.toString(sheetSerialChar); |
578 |
} |
579 |
turn = sselem.isTurn()?"1":"0"; |
580 |
} |
581 |
if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out |
582 |
nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+ |
583 |
secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+ |
584 |
(inDegree(node)+outDegree(node))+"\t"+inDegree(node)+"\t"+outDegree(node)+"\t"+ |
585 |
nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials()); |
586 |
} else { // we insert k (and no k_in or k_out) |
587 |
nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+ |
588 |
secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+ |
589 |
degree(node)+"\t"+"\\N"+"\t"+"\\N"+"\t"+ |
590 |
nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials()); |
591 |
} |
592 |
} |
593 |
nodesOut.close(); |
594 |
stmt = conn.createStatement(); |
595 |
sql = "LOAD DATA LOCAL INFILE '"+graphid+"_nodes.txt' INTO TABLE "+db+".single_model_node "+ |
596 |
" (graph_id, node_id, cid, num, res, "+ |
597 |
" sstype, ssid, sheet_serial, turn, "+ |
598 |
" k, k_in, k_out, n, nwg, n_num);"; |
599 |
stmt.executeUpdate(sql); |
600 |
File fileToDelete = new File(graphid+"_nodes.txt"); |
601 |
if (fileToDelete.exists()) { |
602 |
fileToDelete.delete(); |
603 |
} |
604 |
|
605 |
// inserting edges |
606 |
PrintStream edgesOut = new PrintStream(new FileOutputStream(graphid+"_edges.txt")); |
607 |
// get the max weight |
608 |
double maxWeight = 0; |
609 |
for (RIGEdge cont:getEdges()) { |
610 |
maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight; |
611 |
} |
612 |
for (RIGEdge cont:getEdges()){ |
613 |
RIGNode i_node = getEndpoints(cont).getFirst(); |
614 |
RIGNode j_node = getEndpoints(cont).getSecond(); |
615 |
String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType()); |
616 |
String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType()); |
617 |
|
618 |
String i_secStructType = "\\N"; |
619 |
String i_secStructId = "\\N"; |
620 |
String i_sheetSerial = "\\N"; |
621 |
String i_turn = null; |
622 |
SecStrucElement i_sselem = i_node.getSecStrucElement(); |
623 |
if (i_sselem!=null){ |
624 |
i_secStructType = Character.toString(i_sselem.getType()); |
625 |
i_secStructId = i_sselem.getId(); |
626 |
char sheetSerialChar = i_sselem.getSheetSerial(); |
627 |
if (sheetSerialChar != 0) { |
628 |
i_sheetSerial = Character.toString(sheetSerialChar); |
629 |
} |
630 |
i_turn = i_sselem.isTurn()?"1":"0"; |
631 |
} |
632 |
|
633 |
String j_secStructType = "\\N"; |
634 |
String j_secStructId = "\\N"; |
635 |
String j_sheetSerial = "\\N"; |
636 |
String j_turn = null; |
637 |
SecStrucElement j_sselem = j_node.getSecStrucElement(); |
638 |
if (j_sselem!=null){ |
639 |
j_secStructType = Character.toString(j_sselem.getType()); |
640 |
j_secStructId = j_sselem.getId(); |
641 |
char sheetSerialChar = j_sselem.getSheetSerial(); |
642 |
if (sheetSerialChar != 0) { |
643 |
j_sheetSerial = Character.toString(sheetSerialChar); |
644 |
} |
645 |
j_turn = j_sselem.isTurn()?"1":"0"; |
646 |
} |
647 |
|
648 |
edgesOut.println(graphid+"\t"+(maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+ |
649 |
(maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+ |
650 |
cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight)); |
651 |
if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention |
652 |
// so we insert the reverse contact by swapping i, j in insertion |
653 |
edgesOut.println(graphid+"\t"+(maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+ |
654 |
(maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+ |
655 |
cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight)); |
656 |
} |
657 |
} |
658 |
edgesOut.close(); |
659 |
sql = "LOAD DATA LOCAL INFILE '"+graphid+"_edges.txt' INTO TABLE "+db+".single_model_edge "+ |
660 |
" (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+ |
661 |
" j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight);"; |
662 |
stmt.executeUpdate(sql); |
663 |
stmt.close(); |
664 |
fileToDelete = new File(graphid+"_edges.txt"); |
665 |
if (fileToDelete.exists()) { |
666 |
fileToDelete.delete(); |
667 |
} |
668 |
|
669 |
} |
670 |
|
671 |
/** Single quotes the given string */ |
672 |
private String quote(String s) { |
673 |
return ("'"+s+"'"); |
674 |
} |
675 |
|
676 |
/** |
677 |
* Write graph to given outfile in aglappe format |
678 |
* @param outfile |
679 |
* @throws IOException |
680 |
*/ |
681 |
//TODO we might want to move this to a graph i/o class |
682 |
//TODO refactor to writeToFile |
683 |
public void write_graph_to_file (String outfile) throws IOException { |
684 |
PrintStream Out = new PrintStream(new FileOutputStream(outfile)); |
685 |
Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION); |
686 |
Out.println("#SEQUENCE: "+sequence); |
687 |
Out.println("#PDB: "+pdbCode); |
688 |
Out.println("#PDB CHAIN CODE: "+pdbChainCode); |
689 |
Out.println("#CHAIN: "+chainCode); |
690 |
Out.println("#CT: "+contactType); |
691 |
Out.println("#CUTOFF: "+distCutoff); |
692 |
for (RIGEdge cont:getEdges()){ |
693 |
Pair<RIGNode> pair = getEndpoints(cont); |
694 |
int i_resser=pair.getFirst().getResidueSerial(); |
695 |
int j_resser=pair.getSecond().getResidueSerial(); |
696 |
//BEWARE!! here we write weights while in writeToDb we write atomWeights (consistent with what we do in FileRIGraph) TODO do we want this behaviour? |
697 |
double weight=cont.getWeight(); |
698 |
Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t%6.3f\n",weight); |
699 |
} |
700 |
Out.close(); |
701 |
} |
702 |
|
703 |
/** |
704 |
* Write graph to given outfile in network(Ioannis) format |
705 |
* @param graphId |
706 |
* @param dirName |
707 |
* @throws IOException |
708 |
*/ |
709 |
//TODO we might want to move this to a graph i/o class |
710 |
//TODO refactor |
711 |
public void writeUndirUnweightGraphToNetworkFiles (int graphId, String dirName) throws IOException { |
712 |
if (isDirected()) { |
713 |
System.err.println("This method is only for undirected graphs!"); |
714 |
return; |
715 |
} |
716 |
|
717 |
String filePrefix = dirName + "/" + String.valueOf(graphId)+"_"+pdbCode+"_"+chainCode+"_"+contactType.replaceAll("/", ".")+"_"+String.valueOf(distCutoff).replaceAll("\\.", "_")+"_"; |
718 |
PrintStream Out = new PrintStream(new FileOutputStream(filePrefix+"edges.txt")); |
719 |
for (RIGEdge cont:getEdges()){ |
720 |
Pair<RIGNode> pair = getEndpoints(cont); |
721 |
int i_resser=pair.getFirst().getResidueSerial(); |
722 |
int j_resser=pair.getSecond().getResidueSerial(); |
723 |
if (i_resser < j_resser) { |
724 |
Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t"+1+"\t"+"1.000"+"\n"); |
725 |
} |
726 |
} |
727 |
Out.close(); |
728 |
Out = new PrintStream(new FileOutputStream(filePrefix+"nodes.txt")); |
729 |
for (int resser:this.serials2nodes.keySet()) { |
730 |
RIGNode node = this.getNodeFromSerial(resser); |
731 |
String res = AAinfo.threeletter2oneletter(node.getResidueType()); |
732 |
Out.printf(Locale.US,resser+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+degree(node)+"\t"+degree(node)+"\n"); |
733 |
|
734 |
} |
735 |
Out.close(); |
736 |
} |
737 |
|
738 |
/** |
739 |
* Compares this RIGraph to given RIGraph returning 3 graphs: common, onlythis, onlyother in a HashMap |
740 |
* @param other |
741 |
* @return |
742 |
* @throws Exception |
743 |
*/ |
744 |
//TODO not sure what kind of return we want, for now is a HashMap with three graph objects |
745 |
public HashMap<String,RIGraph> compare(RIGraph other) throws Exception{ |
746 |
//first check that other has same sequence than this, otherwise throw exception |
747 |
if (this.getFullLength()!=other.getFullLength()) { |
748 |
//TODO throw specific exception |
749 |
throw new Exception("Sequence of 2 graphs to compare differ, can't compare them."); |
750 |
} else { |
751 |
for (int resser:this.serials2nodes.keySet()){ |
752 |
String this_res = getNodeFromSerial(resser).getResidueType(); |
753 |
String other_res = other.getNodeFromSerial(resser).getResidueType(); |
754 |
if (!this_res.equals("X") && !other_res.equals("X") && !this_res.equals(other_res)) { |
755 |
//TODO throw specific exception |
756 |
throw new Exception("Sequence of 2 graphs to compare differ, can't compare them."); |
757 |
} |
758 |
} |
759 |
} |
760 |
//NOTE: the common graph will have same node/edge properties as this graph, |
761 |
// which doesn't make a lot of sense, but anyway one has to choose between this or other, |
762 |
// or otherwise make some kind of merge, e.g. merge the weights by averaging? |
763 |
RIGraph commongraph = this.copy(); |
764 |
RIGraph onlythisgraph = this.copy(); |
765 |
RIGraph onlyothergraph = other.copy(); |
766 |
|
767 |
for (RIGEdge cont:this.getEdges()){ |
768 |
Pair<RIGNode> pair = this.getEndpoints(cont); |
769 |
int i_resser = pair.getFirst().getResidueSerial(); |
770 |
int j_resser = pair.getSecond().getResidueSerial(); |
771 |
if (other.findEdge(other.getNodeFromSerial(i_resser), other.getNodeFromSerial(j_resser))!=null) { |
772 |
onlythisgraph.removeEdge(onlythisgraph.findEdge(onlythisgraph.getNodeFromSerial(i_resser),onlythisgraph.getNodeFromSerial(j_resser))); |
773 |
onlyothergraph.removeEdge(onlyothergraph.findEdge(onlyothergraph.getNodeFromSerial(i_resser),onlyothergraph.getNodeFromSerial(j_resser))); |
774 |
} else { |
775 |
commongraph.removeEdge(commongraph.findEdge(commongraph.getNodeFromSerial(i_resser),commongraph.getNodeFromSerial(j_resser))); |
776 |
} |
777 |
} |
778 |
|
779 |
HashMap<String,RIGraph> result = new HashMap<String,RIGraph>(); |
780 |
result.put("common", commongraph); |
781 |
result.put("onlythis", onlythisgraph); |
782 |
result.put("onlyother",onlyothergraph); |
783 |
return result; |
784 |
} |
785 |
|
786 |
/** |
787 |
* Returns a new RIGraph copy (deep) of this one |
788 |
* @return |
789 |
*/ |
790 |
public RIGraph copy() { |
791 |
RIGraph newGraph = new RIGraph(); |
792 |
newGraph.setPdbCode(pdbCode); |
793 |
newGraph.setPdbChainCode(pdbChainCode); |
794 |
newGraph.setChainCode(chainCode); |
795 |
newGraph.setModel(model); |
796 |
newGraph.setContactType(contactType); |
797 |
newGraph.setCutoff(distCutoff); |
798 |
newGraph.setSequence(sequence); |
799 |
|
800 |
// copying nodes and serials2nodes |
801 |
TreeMap<Integer,RIGNode> newSerials2nodes = new TreeMap<Integer,RIGNode>(); |
802 |
for (RIGNode node:this.getVertices()) { |
803 |
RIGNode newNode = node.copy(); |
804 |
newGraph.addVertex(newNode); |
805 |
newSerials2nodes.put(newNode.getResidueSerial(),newNode); |
806 |
} |
807 |
newGraph.setSerials2NodesMap(newSerials2nodes); |
808 |
|
809 |
// copying edges |
810 |
for (RIGEdge edge:this.getEdges()) { |
811 |
Pair<RIGNode> pair = this.getEndpoints(edge); |
812 |
int i_resser = pair.getFirst().getResidueSerial(); |
813 |
int j_resser = pair.getSecond().getResidueSerial(); |
814 |
// EdgeType enum should copy correctly because enums are treated as ints in copying (always deep copied) |
815 |
newGraph.addEdge(edge.copy(), newGraph.getNodeFromSerial(i_resser), newGraph.getNodeFromSerial(j_resser), this.getEdgeType(edge)); |
816 |
} |
817 |
|
818 |
// copying the SecondaryStructure object by retrieving all references from the new nodes |
819 |
SecondaryStructure secStruct = new SecondaryStructure(); |
820 |
for (RIGNode node:newGraph.getVertices()) { |
821 |
SecStrucElement sselem = node.getSecStrucElement(); |
822 |
if (sselem!=null && !secStruct.contains(sselem)) { |
823 |
secStruct.add(sselem); |
824 |
} |
825 |
} |
826 |
newGraph.setSecondaryStructure(secStruct); |
827 |
|
828 |
return newGraph; |
829 |
} |
830 |
} |