ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/branches/aglappe-jung/proteinstructure/RIGraph.java
Revision: 421
Committed: Thu Nov 22 19:42:11 2007 UTC (16 years, 10 months ago) by duarte
File size: 27285 byte(s)
Log Message:
Now graph generation in Pdb also works when we pass a crossed contact type with overlapping atom sets, e.g. ALL/BB (changes were: added a couple of new conditions in Box and in Pdb's AIGraph getGraph(ct,cutoff))
Loads of better comments
Line File contents
1 package proteinstructure;
2
3 import java.io.File;
4 import java.io.FileOutputStream;
5 import java.io.IOException;
6 import java.io.PrintStream;
7 import java.sql.ResultSet;
8 import java.sql.SQLException;
9 import java.sql.Statement;
10 import java.util.Collection;
11 import java.util.HashMap;
12 import java.util.Locale;
13 import java.util.TreeMap;
14
15 import tools.MySQLConnection;
16
17 import edu.uci.ics.jung.graph.util.Pair;
18
19 /**
20 * A Residue Interaction Graph
21 *
22 */
23 public class RIGraph extends ProtStructGraph<RIGNode,RIGEdge> {
24
25 private static final long serialVersionUID = 1L;
26
27 private static final String SINGLEMODELS_DB = "ioannis";
28
29 // fields
30 protected double distCutoff;
31 protected String contactType; // use AAinfo.isValidContactType() to test for validity
32
33 public RIGraph() {
34 super();
35 this.distCutoff=0;
36 this.contactType=null;
37 }
38
39 /**
40 * Constructs a RIGraph with a sequence but no edges
41 * @param sequence
42 */
43 public RIGraph(String sequence) {
44 super();
45 this.sequence = sequence;
46 this.fullLength = sequence.length();
47 this.distCutoff=0;
48 this.contactType=null;
49 serials2nodes = new TreeMap<Integer,RIGNode>();
50 for(int i=0; i < sequence.length(); i++) {
51 RIGNode node = new RIGNode(i+1,AAinfo.oneletter2threeletter(Character.toString(sequence.charAt(i))));
52 this.addVertex(node);
53 serials2nodes.put(i+1, node);
54 }
55 }
56
57 /**
58 * Returns the contact type of this RIGraph
59 * @return
60 */
61 public String getContactType() {
62 return contactType;
63 }
64
65 /**
66 * Sets the contact type of this RIGraph
67 * @param ct the contact type
68 */
69 public void setContactType(String contactType) {
70 this.contactType=contactType;
71 }
72
73 /**
74 * Returns the distance cutoff for this RIGraph.
75 * @return the distance cutoff
76 */
77 public double getCutoff(){
78 return distCutoff;
79 }
80
81 /**
82 * Sets the distance cutoff for this RIGraph.
83 * @param distCutoff the distance cutoff
84 */
85 public void setCutoff(double distCutoff) {
86 this.distCutoff = distCutoff;
87 }
88
89 /**
90 * Returns a RIGNbhood that contains the neighbourhood of given RIGNode
91 * @param node
92 * @return
93 */
94 public RIGNbhood getNbhood (RIGNode node) {
95 Collection<RIGNode> nbs = this.getNeighbors(node);
96 RIGNbhood nbhood = new RIGNbhood(node);
97 for (RIGNode nb:nbs) {
98 nbhood.put(nb.getResidueSerial(), nb);
99 }
100 return nbhood;
101 }
102
103 /**
104 * Returns a RIGNbhood that contains the 2nd shell neighbourhood of given RIGNode
105 * @param node
106 * @return
107 */
108 public RIGNbhood getSecondShellNbhood (RIGNode node) {
109 Collection<RIGNode> nbs = this.getNeighbors(node);
110 RIGNbhood nbhood = new RIGNbhood(node);
111 for (RIGNode nb:nbs) {
112 for (RIGNode nb2:this.getNeighbors(nb)) {
113 if (nb2!=node) {
114 // RIGNbhood is a TreeMap that should take care of not inserting duplicates
115 nbhood.put(nb2.getResidueSerial(),nb2);
116 }
117 }
118 }
119 return nbhood;
120 }
121
122 /**
123 * Returns a RIGCommonNbhood that contains common neighbours of given RIGNodes iNode, jNode
124 * @param iNode
125 * @param jNode
126 * @return
127 */
128 public RIGCommonNbhood getCommonNbhood(RIGNode iNode, RIGNode jNode) {
129 Collection<RIGNode> iNbs = this.getNeighbors(iNode);
130 Collection<RIGNode> jNbs = this.getNeighbors(jNode);
131 boolean connected = false;
132 //NOTE in DIRECTED case this means strictly an edge from iNode to jNode
133 if (this.findEdge(iNode, jNode)!=null) connected = true;
134 RIGCommonNbhood comNbhood = new RIGCommonNbhood(iNode, jNode, connected);
135 for (RIGNode iNb: iNbs) {
136 if (jNbs.contains(iNb)) {
137 comNbhood.put(iNb.getResidueSerial(), iNb);
138 }
139 }
140 return comNbhood;
141 }
142
143 /**
144 * Returns all common neighborhood sizes for each cell of the contact map (contact or non-contact)
145 * @return
146 */
147 public HashMap<Pair<RIGNode>,Integer> getAllCommonNbhSizes() {
148 HashMap<Pair<RIGNode>,Integer> comNbhSizes = new HashMap<Pair<RIGNode>, Integer>();
149 boolean directed = this.isDirected();
150 for (RIGNode n1:this.getVertices()) {
151 for (RIGNode n2:this.getVertices()) {
152 if (directed) {
153 if (n1!=n2) {
154 comNbhSizes.put(new Pair<RIGNode>(n1,n2),getCommonNbhood(n1, n2).size());
155 }
156 } else {
157 if (n1.getResidueSerial()<n2.getResidueSerial()) {
158 comNbhSizes.put(new Pair<RIGNode>(n1,n2),getCommonNbhood(n1, n2).size());
159 }
160 }
161 }
162 }
163 return comNbhSizes;
164 }
165
166 public int getContactRange(RIGEdge edge) {
167 Pair<RIGNode> pair = this.getEndpoints(edge);
168 return Math.abs(pair.getFirst().getResidueSerial()-pair.getSecond().getResidueSerial());
169 }
170
171 //TODO evaluatePrediction methods should be in ProtStructGraph.
172 // But to be able to put them there we would need to pass here a Transformer that gets atom or residue serials depending if we are in AI or RI Graph
173 /**
174 * Evaluate this graph (assuming it is a prediction) against an original graph
175 * @param originalGraph
176 * @return
177 */
178 public PredEval evaluatePrediction(RIGraph originalGraph) {
179 return evaluatePrediction(originalGraph, 1);
180 }
181
182 /**
183 * Evaluate this graph (assuming it is a prediction) against an original graph,
184 * considering only edges with sequence separation at least minSeqSep.
185 * @param originalGraph
186 * @param minSeqSep
187 * @return
188 */
189 public PredEval evaluatePrediction(RIGraph originalGraph, int minSeqSep) {
190
191 Collection<RIGEdge> predictedContacts = this.getEdges();
192 Collection<RIGEdge> origContacts = originalGraph.getEdges();
193 // total predicted contacts
194 int predicted = 0;
195 for(RIGEdge e:predictedContacts) {
196 if(this.getContactRange(e) >= minSeqSep) {
197 predicted++;
198 }
199 }
200
201 // total native contacts
202 int original = 0;
203 for(RIGEdge e:origContacts) {
204 if(originalGraph.getContactRange(e) >= minSeqSep) {
205 original++;
206 }
207 }
208
209 // total size of contact map (potential contacts)
210 int cmtotal = 0;
211 if (originalGraph.isDirected()){
212 cmtotal = (originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep);
213 } else {
214 cmtotal = (int)(((originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep))/2);
215 }
216 int TruePos=0, FalsePos=0, TrueNeg=0, FalseNeg=0;
217
218 // directed/ non-directed graphs should be both fine with this code
219 // the only thing that changes between directed/non-directed is the count of total cells in contact map (taken care for above)
220 for (RIGEdge predictedCont:predictedContacts){
221 if(this.getContactRange(predictedCont) >= minSeqSep) {
222 Pair<RIGNode> predNodePair = this.getEndpoints(predictedCont);
223 RIGNode node1inOrig = originalGraph.getNodeFromSerial(predNodePair.getFirst().getResidueSerial());
224 RIGNode node2inOrig = originalGraph.getNodeFromSerial(predNodePair.getSecond().getResidueSerial());
225 //NOTE order of nodes in findEdge doesn't matter if UNDIRECTED.
226 //It does matter if DIRECTED. However even in that case we are fine because we use same order in this graph
227 if (originalGraph.findEdge(node1inOrig, node2inOrig)!=null) {
228 TruePos++;
229 }
230 else {
231 FalsePos++;
232 }
233 }
234 }
235
236 for (RIGEdge origCont:origContacts) {
237 if(originalGraph.getContactRange(origCont) >= minSeqSep) {
238 Pair<RIGNode> origNodePair = originalGraph.getEndpoints(origCont);
239 RIGNode node1inPred = this.getNodeFromSerial(origNodePair.getFirst().getResidueSerial());
240 RIGNode node2inPred = this.getNodeFromSerial(origNodePair.getSecond().getResidueSerial());
241 //NOTE order of nodes in findEdge doesn't matter if UNDIRECTED.
242 //It does matter if DIRECTED. However even in that case we are fine because we use same order in originalGraph
243 if (this.findEdge(node1inPred,node2inPred)==null) {
244 FalseNeg++;
245 }
246 }
247 }
248 TrueNeg=cmtotal-TruePos-FalsePos-FalseNeg;
249 PredEval eval = new PredEval(TruePos,FalsePos,TrueNeg,FalseNeg,0,predicted,original,cmtotal);
250 return eval;
251 }
252
253 /**
254 * Write graph to given db, using our db graph aglappe format,
255 * i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge
256 * @param conn
257 * @param db
258 * @throws SQLException
259 */
260 //TODO we might want to move this to a graph i/o class
261 public void write_graph_to_db(MySQLConnection conn, String db) throws SQLException{
262
263 // values we fix to constant
264 String CW = "1";
265 String CR = "(true)";
266 String EXPBB = "0";
267 String ctStr = contactType;
268 String weightedStr = "0";
269 String directedStr = isDirected()?"1":"0";
270
271 if (contactType.endsWith("_CAGLY")) {
272 ctStr = contactType.replace("_CAGLY", "");
273 }
274 if (ctStr.equals("ALL")) {
275 ctStr = "BB+SC+BB/SC";
276 }
277 if (AAinfo.isValidMultiAtomContactType(contactType)) {
278 CW = ctStr;
279 weightedStr = "1";
280 }
281 if (contactType.endsWith("_CAGLY") || contactType.equals("Cb")) {
282 EXPBB = "-1";
283 }
284 if (minSeqSep != -1) {
285 CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))";
286 }
287
288 int pgraphid=0;
289 int graphid=0;
290 String sql = "SELECT graph_id FROM "+db+".chain_graph " +
291 " WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" +
292 " AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = '"+EXPBB+"'" +
293 " AND method = 'rc-cutoff';";
294 Statement stmt = conn.createStatement();
295 ResultSet rsst = stmt.executeQuery(sql);
296 if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid
297 pgraphid = rsst.getInt(1);
298 } else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid)
299 // we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment)
300 String pdbChainCodeStr = pdbChainCode;
301 if (!pdbChainCode.equals("NULL")) {
302 pdbChainCodeStr="'"+pdbChainCode+"'";
303 }
304 sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " +
305 "VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())";
306 Statement stmt2 = conn.createStatement();
307 stmt2.executeUpdate(sql);
308 // now we take the newly assigned graph_id as pgraphid
309 sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1";
310 ResultSet rsst2 = stmt2.executeQuery(sql);
311 if (rsst2.next()){
312 pgraphid = rsst2.getInt(1);
313 }
314 stmt2.close();
315 rsst2.close();
316 }
317 rsst.close();
318 // now we insert the graph info into single_model_graph
319 // 1st we grab the single_model_id
320 int singlemodelid = 0;
321 sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+
322 " dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';";
323 rsst = stmt.executeQuery(sql);
324 if (rsst.next()){
325 singlemodelid = rsst.getInt(1);
326 }
327 rsst.close();
328 // and then insert to single_model_graph
329 sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " +
330 " VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())";
331 stmt.executeUpdate(sql);
332 // and we grab the graph_id just assigned in single_model_graph
333 sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1";
334 rsst = stmt.executeQuery(sql);
335 if (rsst.next()){
336 graphid = rsst.getInt(1);
337 }
338 rsst.close();
339 stmt.close();
340
341 // inserting nodes
342 // get the max node in db
343 int maxNodeId = 0;
344 sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;";
345 stmt = conn.createStatement();
346 rsst = stmt.executeQuery(sql);
347 if (rsst.next()){
348 maxNodeId = rsst.getInt(1);
349 }
350 rsst.close();
351 stmt.close();
352
353 stmt = conn.createStatement();
354 for (int resser:serials2nodes.keySet()) {
355 RIGNode node = serials2nodes.get(resser);
356 String res = AAinfo.threeletter2oneletter(node.getResidueType());
357 RIGNbhood nbh = this.getNbhood(node);
358 String secStructType = null;
359 String secStructId = null;
360 String sheetSerial = null;
361 String turn = null;
362 SecStrucElement sselem = node.getSecStrucElement();
363 if (sselem!=null){
364 secStructType = quote(Character.toString(sselem.getType()));
365 secStructId = quote(sselem.getId());
366 char sheetSerialChar = sselem.getSheetSerial();
367 if (sheetSerialChar != 0) {
368 sheetSerial = quote(Character.toString(sheetSerialChar));
369 }
370 turn = sselem.isTurn()?"1":"0";
371 }
372 if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out
373 sql = "INSERT INTO "+db+".single_model_node "+
374 " (graph_id, node_id, cid, num, res, "+
375 " sstype, ssid, sheet_serial, turn, "+
376 " k, k_in, k_out, "+
377 " n, nwg, n_num) " +
378 " VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+
379 " "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+
380 (getPredecessorCount(node)+getSuccessorCount(node))+", "+getPredecessorCount(node)+", "+getSuccessorCount(node)+", "+
381 "'"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')";
382 } else { // we insert k (and no k_in or k_out)
383 sql = "INSERT INTO "+db+".single_model_node "+
384 " (graph_id, node_id, cid, num, res, "+
385 " sstype, ssid, sheet_serial, turn, "+
386 " k, n, nwg, n_num) " +
387 " VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+
388 " "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+
389 getNeighborCount(node)+", '"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')";
390 }
391 stmt.executeUpdate(sql);
392 }
393
394 // inserting edges
395 // get the max weight
396 double maxWeight = 0;
397 for (RIGEdge cont:getEdges()) {
398 maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight;
399 }
400 for (RIGEdge cont:getEdges()){
401 RIGNode i_node = getEndpoints(cont).getFirst();
402 RIGNode j_node = getEndpoints(cont).getSecond();
403 String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType());
404 String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType());
405
406 String i_secStructType = null;
407 String i_secStructId = null;
408 String i_sheetSerial = null;
409 String i_turn = null;
410 SecStrucElement i_sselem = i_node.getSecStrucElement();
411 if (i_sselem!=null){
412 i_secStructType = quote(Character.toString(i_sselem.getType()));
413 i_secStructId = quote(i_sselem.getId());
414 char sheetSerialChar = i_sselem.getSheetSerial();
415 if (sheetSerialChar != 0) {
416 i_sheetSerial = quote(Character.toString(sheetSerialChar));
417 }
418 i_turn = i_sselem.isTurn()?"1":"0";
419 }
420
421 String j_secStructType = null;
422 String j_secStructId = null;
423 String j_sheetSerial = null;
424 String j_turn = null;
425 SecStrucElement j_sselem = j_node.getSecStrucElement();
426 if (j_sselem!=null){
427 j_secStructType = quote(Character.toString(j_sselem.getType()));
428 j_secStructId = quote(j_sselem.getId());
429 char sheetSerialChar = j_sselem.getSheetSerial();
430 if (sheetSerialChar != 0) {
431 j_sheetSerial = quote(Character.toString(sheetSerialChar));
432 }
433 j_turn = j_sselem.isTurn()?"1":"0";
434 }
435
436 sql = "INSERT INTO "+db+".single_model_edge "+
437 " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
438 " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " +
439 " VALUES ("+graphid+", "+(maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+
440 (maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+
441 cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+")";
442 stmt.executeUpdate(sql);
443 if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention
444 // so we insert the reverse contact by swapping i, j in insertion
445 sql = "INSERT INTO "+db+".single_model_edge "+
446 " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
447 " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " +
448 " VALUES ("+graphid+", "+(maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+
449 (maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+
450 cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+")";
451 stmt.executeUpdate(sql);
452 }
453 }
454
455 stmt.close();
456
457 }
458
459 /**
460 * Write graph to given db, using our db graph aglappe format,
461 * i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge
462 * @param conn
463 * @param db
464 * @throws SQLException
465 */
466 //TODO we might want to move this to a graph i/o class
467 public void write_graph_to_db_fast(MySQLConnection conn, String db) throws SQLException, IOException {
468
469 // values we fix to constant
470 String CW = "1";
471 String CR = "(true)";
472 String EXPBB = "0";
473 String ctStr = contactType;
474 String weightedStr = "0";
475 String directedStr = isDirected()?"1":"0";
476
477 if (contactType.endsWith("_CAGLY")) {
478 ctStr = contactType.replace("_CAGLY", "");
479 }
480 if (ctStr.equals("ALL")) {
481 ctStr = "BB+SC+BB/SC";
482 }
483 if (AAinfo.isValidMultiAtomContactType(contactType)) {
484 CW = ctStr;
485 weightedStr = "1";
486 }
487 if (contactType.endsWith("_CAGLY") || contactType.equals("Cb")) {
488 EXPBB = "-1";
489 }
490 if (minSeqSep != -1) {
491 CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))";
492 }
493
494 int pgraphid=0;
495 int graphid=0;
496 String sql = "SELECT graph_id FROM "+db+".chain_graph " +
497 " WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" +
498 " AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = '"+EXPBB+"'" +
499 " AND method = 'rc-cutoff';";
500 Statement stmt = conn.createStatement();
501 ResultSet rsst = stmt.executeQuery(sql);
502 if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid
503 pgraphid = rsst.getInt(1);
504 } else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid)
505 // we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment)
506 String pdbChainCodeStr = pdbChainCode;
507 if (!pdbChainCode.equals("NULL")) {
508 pdbChainCodeStr="'"+pdbChainCode+"'";
509 }
510 sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " +
511 "VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())";
512 Statement stmt2 = conn.createStatement();
513 stmt2.executeUpdate(sql);
514 // now we take the newly assigned graph_id as pgraphid
515 sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1";
516 ResultSet rsst2 = stmt2.executeQuery(sql);
517 if (rsst2.next()){
518 pgraphid = rsst2.getInt(1);
519 }
520 stmt2.close();
521 rsst2.close();
522 }
523 rsst.close();
524 // now we insert the graph info into single_model_graph
525 // 1st we grab the single_model_id
526 int singlemodelid = 0;
527 sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+
528 " dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';";
529 rsst = stmt.executeQuery(sql);
530 if (rsst.next()){
531 singlemodelid = rsst.getInt(1);
532 }
533 rsst.close();
534 // and then insert to single_model_graph
535 sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " +
536 " VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())";
537 stmt.executeUpdate(sql);
538 // and we grab the graph_id just assigned in single_model_graph
539 sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1";
540 rsst = stmt.executeQuery(sql);
541 if (rsst.next()){
542 graphid = rsst.getInt(1);
543 }
544 rsst.close();
545 stmt.close();
546
547 // inserting nodes
548 PrintStream nodesOut = new PrintStream(new FileOutputStream(graphid+"_nodes.txt"));
549 // get the max node in db
550 int maxNodeId = 0;
551 sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;";
552 stmt = conn.createStatement();
553 rsst = stmt.executeQuery(sql);
554 if (rsst.next()){
555 maxNodeId = rsst.getInt(1);
556 }
557 rsst.close();
558 stmt.close();
559
560 for (int resser:serials2nodes.keySet()) {
561
562 RIGNode node = serials2nodes.get(resser);
563 String res = AAinfo.threeletter2oneletter(node.getResidueType());
564 RIGNbhood nbh = this.getNbhood(node);
565 String secStructType = "\\N";
566 String secStructId = "\\N";
567 String sheetSerial = "\\N";
568 String turn = null;
569 SecStrucElement sselem = node.getSecStrucElement();
570 if (sselem!=null){
571 secStructType = Character.toString(sselem.getType());
572 secStructId = sselem.getId();
573 char sheetSerialChar = sselem.getSheetSerial();
574 if (sheetSerialChar != 0) {
575 sheetSerial = Character.toString(sheetSerialChar);
576 }
577 turn = sselem.isTurn()?"1":"0";
578 }
579 if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out
580 nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+
581 secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+
582 (getPredecessorCount(node)+getSuccessorCount(node))+"\t"+getPredecessorCount(node)+"\t"+getSuccessorCount(node)+"\t"+
583 nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials());
584 } else { // we insert k (and no k_in or k_out)
585 nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+
586 secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+
587 getNeighborCount(node)+"\t"+"\\N"+"\t"+"\\N"+"\t"+
588 nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials());
589 }
590 }
591 nodesOut.close();
592 stmt = conn.createStatement();
593 sql = "LOAD DATA LOCAL INFILE '"+graphid+"_nodes.txt' INTO TABLE "+db+".single_model_node "+
594 " (graph_id, node_id, cid, num, res, "+
595 " sstype, ssid, sheet_serial, turn, "+
596 " k, k_in, k_out, n, nwg, n_num);";
597 stmt.executeUpdate(sql);
598 File fileToDelete = new File(graphid+"_nodes.txt");
599 if (fileToDelete.exists()) {
600 fileToDelete.delete();
601 }
602
603 // inserting edges
604 PrintStream edgesOut = new PrintStream(new FileOutputStream(graphid+"_edges.txt"));
605 // get the max weight
606 double maxWeight = 0;
607 for (RIGEdge cont:getEdges()) {
608 maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight;
609 }
610 for (RIGEdge cont:getEdges()){
611 RIGNode i_node = getEndpoints(cont).getFirst();
612 RIGNode j_node = getEndpoints(cont).getSecond();
613 String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType());
614 String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType());
615
616 String i_secStructType = "\\N";
617 String i_secStructId = "\\N";
618 String i_sheetSerial = "\\N";
619 String i_turn = null;
620 SecStrucElement i_sselem = i_node.getSecStrucElement();
621 if (i_sselem!=null){
622 i_secStructType = Character.toString(i_sselem.getType());
623 i_secStructId = i_sselem.getId();
624 char sheetSerialChar = i_sselem.getSheetSerial();
625 if (sheetSerialChar != 0) {
626 i_sheetSerial = Character.toString(sheetSerialChar);
627 }
628 i_turn = i_sselem.isTurn()?"1":"0";
629 }
630
631 String j_secStructType = "\\N";
632 String j_secStructId = "\\N";
633 String j_sheetSerial = "\\N";
634 String j_turn = null;
635 SecStrucElement j_sselem = j_node.getSecStrucElement();
636 if (j_sselem!=null){
637 j_secStructType = Character.toString(j_sselem.getType());
638 j_secStructId = j_sselem.getId();
639 char sheetSerialChar = j_sselem.getSheetSerial();
640 if (sheetSerialChar != 0) {
641 j_sheetSerial = Character.toString(sheetSerialChar);
642 }
643 j_turn = j_sselem.isTurn()?"1":"0";
644 }
645
646 edgesOut.println(graphid+"\t"+(maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+
647 (maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+
648 cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight));
649 if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention
650 // so we insert the reverse contact by swapping i, j in insertion
651 edgesOut.println(graphid+"\t"+(maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+
652 (maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+
653 cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight));
654 }
655 }
656 edgesOut.close();
657 sql = "LOAD DATA LOCAL INFILE '"+graphid+"_edges.txt' INTO TABLE "+db+".single_model_edge "+
658 " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
659 " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight);";
660 stmt.executeUpdate(sql);
661 stmt.close();
662 fileToDelete = new File(graphid+"_edges.txt");
663 if (fileToDelete.exists()) {
664 fileToDelete.delete();
665 }
666
667 }
668
669 /** Single quotes the given string */
670 private String quote(String s) {
671 return ("'"+s+"'");
672 }
673
674 /**
675 * Write graph to given outfile in aglappe format
676 * @param outfile
677 * @throws IOException
678 */
679 //TODO we might want to move this to a graph i/o class
680 public void writeToFile (String outfile) throws IOException {
681 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
682 Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION);
683 Out.println("#SEQUENCE: "+sequence);
684 Out.println("#PDB: "+pdbCode);
685 Out.println("#PDB CHAIN CODE: "+pdbChainCode);
686 Out.println("#CHAIN: "+chainCode);
687 Out.println("#CT: "+contactType);
688 Out.println("#CUTOFF: "+distCutoff);
689 for (RIGEdge cont:getEdges()){
690 Pair<RIGNode> pair = getEndpoints(cont);
691 int i_resser=pair.getFirst().getResidueSerial();
692 int j_resser=pair.getSecond().getResidueSerial();
693 //BEWARE!! here we write weights while in writeToDb we write atomWeights (consistent with what we do in FileRIGraph) TODO do we want this behaviour?
694 double weight=cont.getWeight();
695 Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t%6.3f\n",weight);
696 }
697 Out.close();
698 }
699 }