ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/branches/aglappe-jung/proteinstructure/RIGraph.java
Revision: 424
Committed: Fri Nov 23 14:21:33 2007 UTC (16 years, 10 months ago) by duarte
File size: 32650 byte(s)
Log Message:
Added last i/o method from old Graph (write to file in Ioannis network format)
Using now the more correct degree, indegree, outdegree methods instead of count neighbor methods
Fixed the write_graph_to_file method name in executables in default package

Line File contents
1 package proteinstructure;
2
3 import java.io.File;
4 import java.io.FileOutputStream;
5 import java.io.IOException;
6 import java.io.PrintStream;
7 import java.sql.ResultSet;
8 import java.sql.SQLException;
9 import java.sql.Statement;
10 import java.util.Collection;
11 import java.util.HashMap;
12 import java.util.Locale;
13 import java.util.TreeMap;
14
15 import tools.MySQLConnection;
16
17 import edu.uci.ics.jung.graph.util.Pair;
18
19 /**
20 * A Residue Interaction Graph
21 *
22 */
23 public class RIGraph extends ProtStructGraph<RIGNode,RIGEdge> {
24
25 private static final long serialVersionUID = 1L;
26
27 private static final String SINGLEMODELS_DB = "ioannis";
28
29 // fields
30 protected double distCutoff;
31 protected String contactType; // use AAinfo.isValidContactType() to test for validity
32
33 public RIGraph() {
34 super();
35 this.distCutoff=0;
36 this.contactType=null;
37 }
38
39 /**
40 * Constructs a RIGraph with a sequence but no edges
41 * @param sequence
42 */
43 public RIGraph(String sequence) {
44 super();
45 this.sequence = sequence;
46 this.fullLength = sequence.length();
47 this.distCutoff=0;
48 this.contactType=null;
49 serials2nodes = new TreeMap<Integer,RIGNode>();
50 for(int i=0; i < sequence.length(); i++) {
51 RIGNode node = new RIGNode(i+1,AAinfo.oneletter2threeletter(Character.toString(sequence.charAt(i))));
52 this.addVertex(node);
53 serials2nodes.put(i+1, node);
54 }
55 }
56
57 /**
58 * Returns the contact type of this RIGraph
59 * @return
60 */
61 public String getContactType() {
62 return contactType;
63 }
64
65 /**
66 * Sets the contact type of this RIGraph
67 * @param ct the contact type
68 */
69 public void setContactType(String contactType) {
70 this.contactType=contactType;
71 }
72
73 /**
74 * Returns the distance cutoff for this RIGraph.
75 * @return the distance cutoff
76 */
77 public double getCutoff(){
78 return distCutoff;
79 }
80
81 /**
82 * Sets the distance cutoff for this RIGraph.
83 * @param distCutoff the distance cutoff
84 */
85 public void setCutoff(double distCutoff) {
86 this.distCutoff = distCutoff;
87 }
88
89 /**
90 * Returns a RIGNbhood that contains the neighbourhood of given RIGNode
91 * @param node
92 * @return
93 */
94 public RIGNbhood getNbhood (RIGNode node) {
95 Collection<RIGNode> nbs = this.getNeighbors(node);
96 RIGNbhood nbhood = new RIGNbhood(node);
97 for (RIGNode nb:nbs) {
98 nbhood.put(nb.getResidueSerial(), nb);
99 }
100 return nbhood;
101 }
102
103 /**
104 * Returns a RIGNbhood that contains the 2nd shell neighbourhood of given RIGNode
105 * @param node
106 * @return
107 */
108 public RIGNbhood getSecondShellNbhood (RIGNode node) {
109 Collection<RIGNode> nbs = this.getNeighbors(node);
110 RIGNbhood nbhood = new RIGNbhood(node);
111 for (RIGNode nb:nbs) {
112 for (RIGNode nb2:this.getNeighbors(nb)) {
113 if (nb2!=node) {
114 // RIGNbhood is a TreeMap that should take care of not inserting duplicates
115 nbhood.put(nb2.getResidueSerial(),nb2);
116 }
117 }
118 }
119 return nbhood;
120 }
121
122 /**
123 * Returns a RIGCommonNbhood that contains common neighbours of given RIGNodes iNode, jNode
124 * @param iNode
125 * @param jNode
126 * @return
127 */
128 public RIGCommonNbhood getCommonNbhood(RIGNode iNode, RIGNode jNode) {
129 Collection<RIGNode> iNbs = this.getNeighbors(iNode);
130 Collection<RIGNode> jNbs = this.getNeighbors(jNode);
131 boolean connected = false;
132 //NOTE in DIRECTED case this means strictly an edge from iNode to jNode
133 if (this.findEdge(iNode, jNode)!=null) connected = true;
134 RIGCommonNbhood comNbhood = new RIGCommonNbhood(iNode, jNode, connected);
135 for (RIGNode iNb: iNbs) {
136 if (jNbs.contains(iNb)) {
137 comNbhood.put(iNb.getResidueSerial(), iNb);
138 }
139 }
140 return comNbhood;
141 }
142
143 /**
144 * Returns all common neighborhood sizes for each cell of the contact map (contact or non-contact)
145 * @return
146 */
147 public HashMap<Pair<RIGNode>,Integer> getAllCommonNbhSizes() {
148 HashMap<Pair<RIGNode>,Integer> comNbhSizes = new HashMap<Pair<RIGNode>, Integer>();
149 boolean directed = this.isDirected();
150 for (RIGNode n1:this.getVertices()) {
151 for (RIGNode n2:this.getVertices()) {
152 if (directed) {
153 if (n1!=n2) {
154 comNbhSizes.put(new Pair<RIGNode>(n1,n2),getCommonNbhood(n1, n2).size());
155 }
156 } else {
157 if (n1.getResidueSerial()<n2.getResidueSerial()) {
158 comNbhSizes.put(new Pair<RIGNode>(n1,n2),getCommonNbhood(n1, n2).size());
159 }
160 }
161 }
162 }
163 return comNbhSizes;
164 }
165
166 public int getContactRange(RIGEdge edge) {
167 Pair<RIGNode> pair = this.getEndpoints(edge);
168 return Math.abs(pair.getFirst().getResidueSerial()-pair.getSecond().getResidueSerial());
169 }
170
171 //TODO evaluatePrediction methods should be in ProtStructGraph.
172 // But to be able to put them there we would need to pass here a Transformer that gets atom or residue serials depending if we are in AI or RI Graph
173 /**
174 * Evaluate this graph (assuming it is a prediction) against an original graph
175 * @param originalGraph
176 * @return
177 */
178 public PredEval evaluatePrediction(RIGraph originalGraph) {
179 return evaluatePrediction(originalGraph, 1);
180 }
181
182 /**
183 * Evaluate this graph (assuming it is a prediction) against an original graph,
184 * considering only edges with sequence separation at least minSeqSep.
185 * @param originalGraph
186 * @param minSeqSep
187 * @return
188 */
189 public PredEval evaluatePrediction(RIGraph originalGraph, int minSeqSep) {
190
191 Collection<RIGEdge> predictedContacts = this.getEdges();
192 Collection<RIGEdge> origContacts = originalGraph.getEdges();
193 // total predicted contacts
194 int predicted = 0;
195 for(RIGEdge e:predictedContacts) {
196 if(this.getContactRange(e) >= minSeqSep) {
197 predicted++;
198 }
199 }
200
201 // total native contacts
202 int original = 0;
203 for(RIGEdge e:origContacts) {
204 if(originalGraph.getContactRange(e) >= minSeqSep) {
205 original++;
206 }
207 }
208
209 // total size of contact map (potential contacts)
210 int cmtotal = 0;
211 if (originalGraph.isDirected()){
212 cmtotal = (originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep);
213 } else {
214 cmtotal = (int)(((originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep))/2);
215 }
216 int TruePos=0, FalsePos=0, TrueNeg=0, FalseNeg=0;
217
218 // directed/ non-directed graphs should be both fine with this code
219 // the only thing that changes between directed/non-directed is the count of total cells in contact map (taken care for above)
220 for (RIGEdge predictedCont:predictedContacts){
221 if(this.getContactRange(predictedCont) >= minSeqSep) {
222 Pair<RIGNode> predNodePair = this.getEndpoints(predictedCont);
223 RIGNode node1inOrig = originalGraph.getNodeFromSerial(predNodePair.getFirst().getResidueSerial());
224 RIGNode node2inOrig = originalGraph.getNodeFromSerial(predNodePair.getSecond().getResidueSerial());
225 //NOTE order of nodes in findEdge doesn't matter if UNDIRECTED.
226 //It does matter if DIRECTED. However even in that case we are fine because we use same order in this graph
227 if (originalGraph.findEdge(node1inOrig, node2inOrig)!=null) {
228 TruePos++;
229 }
230 else {
231 FalsePos++;
232 }
233 }
234 }
235
236 for (RIGEdge origCont:origContacts) {
237 if(originalGraph.getContactRange(origCont) >= minSeqSep) {
238 Pair<RIGNode> origNodePair = originalGraph.getEndpoints(origCont);
239 RIGNode node1inPred = this.getNodeFromSerial(origNodePair.getFirst().getResidueSerial());
240 RIGNode node2inPred = this.getNodeFromSerial(origNodePair.getSecond().getResidueSerial());
241 //NOTE order of nodes in findEdge doesn't matter if UNDIRECTED.
242 //It does matter if DIRECTED. However even in that case we are fine because we use same order in originalGraph
243 if (this.findEdge(node1inPred,node2inPred)==null) {
244 FalseNeg++;
245 }
246 }
247 }
248 TrueNeg=cmtotal-TruePos-FalsePos-FalseNeg;
249 PredEval eval = new PredEval(TruePos,FalsePos,TrueNeg,FalseNeg,0,predicted,original,cmtotal);
250 return eval;
251 }
252
253 /**
254 * Write graph to given db, using our db graph aglappe format,
255 * i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge
256 * @param conn
257 * @param db
258 * @throws SQLException
259 */
260 //TODO we might want to move this to a graph i/o class
261 //TODO refactor to writeToDb. Get rid of this and only keep fast one??
262 public void write_graph_to_db(MySQLConnection conn, String db) throws SQLException{
263
264 // values we fix to constant
265 String CW = "1";
266 String CR = "(true)";
267 String EXPBB = "0";
268 String ctStr = contactType;
269 String weightedStr = "0";
270 String directedStr = isDirected()?"1":"0";
271
272 if (contactType.endsWith("_CAGLY")) {
273 ctStr = contactType.replace("_CAGLY", "");
274 }
275 if (ctStr.equals("ALL")) {
276 ctStr = "BB+SC+BB/SC";
277 }
278 if (AAinfo.isValidMultiAtomContactType(contactType)) {
279 CW = ctStr;
280 weightedStr = "1";
281 }
282 if (contactType.endsWith("_CAGLY") || contactType.equals("Cb")) {
283 EXPBB = "-1";
284 }
285 if (minSeqSep != -1) {
286 CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))";
287 }
288
289 int pgraphid=0;
290 int graphid=0;
291 String sql = "SELECT graph_id FROM "+db+".chain_graph " +
292 " WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" +
293 " AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = '"+EXPBB+"'" +
294 " AND method = 'rc-cutoff';";
295 Statement stmt = conn.createStatement();
296 ResultSet rsst = stmt.executeQuery(sql);
297 if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid
298 pgraphid = rsst.getInt(1);
299 } else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid)
300 // we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment)
301 String pdbChainCodeStr = pdbChainCode;
302 if (!pdbChainCode.equals("NULL")) {
303 pdbChainCodeStr="'"+pdbChainCode+"'";
304 }
305 sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " +
306 "VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())";
307 Statement stmt2 = conn.createStatement();
308 stmt2.executeUpdate(sql);
309 // now we take the newly assigned graph_id as pgraphid
310 sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1";
311 ResultSet rsst2 = stmt2.executeQuery(sql);
312 if (rsst2.next()){
313 pgraphid = rsst2.getInt(1);
314 }
315 stmt2.close();
316 rsst2.close();
317 }
318 rsst.close();
319 // now we insert the graph info into single_model_graph
320 // 1st we grab the single_model_id
321 int singlemodelid = 0;
322 sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+
323 " dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';";
324 rsst = stmt.executeQuery(sql);
325 if (rsst.next()){
326 singlemodelid = rsst.getInt(1);
327 }
328 rsst.close();
329 // and then insert to single_model_graph
330 sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " +
331 " VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())";
332 stmt.executeUpdate(sql);
333 // and we grab the graph_id just assigned in single_model_graph
334 sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1";
335 rsst = stmt.executeQuery(sql);
336 if (rsst.next()){
337 graphid = rsst.getInt(1);
338 }
339 rsst.close();
340 stmt.close();
341
342 // inserting nodes
343 // get the max node in db
344 int maxNodeId = 0;
345 sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;";
346 stmt = conn.createStatement();
347 rsst = stmt.executeQuery(sql);
348 if (rsst.next()){
349 maxNodeId = rsst.getInt(1);
350 }
351 rsst.close();
352 stmt.close();
353
354 stmt = conn.createStatement();
355 for (int resser:serials2nodes.keySet()) {
356 RIGNode node = serials2nodes.get(resser);
357 String res = AAinfo.threeletter2oneletter(node.getResidueType());
358 RIGNbhood nbh = this.getNbhood(node);
359 String secStructType = null;
360 String secStructId = null;
361 String sheetSerial = null;
362 String turn = null;
363 SecStrucElement sselem = node.getSecStrucElement();
364 if (sselem!=null){
365 secStructType = quote(Character.toString(sselem.getType()));
366 secStructId = quote(sselem.getId());
367 char sheetSerialChar = sselem.getSheetSerial();
368 if (sheetSerialChar != 0) {
369 sheetSerial = quote(Character.toString(sheetSerialChar));
370 }
371 turn = sselem.isTurn()?"1":"0";
372 }
373 if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out
374 sql = "INSERT INTO "+db+".single_model_node "+
375 " (graph_id, node_id, cid, num, res, "+
376 " sstype, ssid, sheet_serial, turn, "+
377 " k, k_in, k_out, "+
378 " n, nwg, n_num) " +
379 " VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+
380 " "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+
381 (inDegree(node)+outDegree(node))+", "+inDegree(node)+", "+outDegree(node)+", "+
382 "'"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')";
383 } else { // we insert k (and no k_in or k_out)
384 sql = "INSERT INTO "+db+".single_model_node "+
385 " (graph_id, node_id, cid, num, res, "+
386 " sstype, ssid, sheet_serial, turn, "+
387 " k, n, nwg, n_num) " +
388 " VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+
389 " "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+
390 degree(node)+", '"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')";
391 }
392 stmt.executeUpdate(sql);
393 }
394
395 // inserting edges
396 // get the max weight
397 double maxWeight = 0;
398 for (RIGEdge cont:getEdges()) {
399 maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight;
400 }
401 for (RIGEdge cont:getEdges()){
402 RIGNode i_node = getEndpoints(cont).getFirst();
403 RIGNode j_node = getEndpoints(cont).getSecond();
404 String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType());
405 String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType());
406
407 String i_secStructType = null;
408 String i_secStructId = null;
409 String i_sheetSerial = null;
410 String i_turn = null;
411 SecStrucElement i_sselem = i_node.getSecStrucElement();
412 if (i_sselem!=null){
413 i_secStructType = quote(Character.toString(i_sselem.getType()));
414 i_secStructId = quote(i_sselem.getId());
415 char sheetSerialChar = i_sselem.getSheetSerial();
416 if (sheetSerialChar != 0) {
417 i_sheetSerial = quote(Character.toString(sheetSerialChar));
418 }
419 i_turn = i_sselem.isTurn()?"1":"0";
420 }
421
422 String j_secStructType = null;
423 String j_secStructId = null;
424 String j_sheetSerial = null;
425 String j_turn = null;
426 SecStrucElement j_sselem = j_node.getSecStrucElement();
427 if (j_sselem!=null){
428 j_secStructType = quote(Character.toString(j_sselem.getType()));
429 j_secStructId = quote(j_sselem.getId());
430 char sheetSerialChar = j_sselem.getSheetSerial();
431 if (sheetSerialChar != 0) {
432 j_sheetSerial = quote(Character.toString(sheetSerialChar));
433 }
434 j_turn = j_sselem.isTurn()?"1":"0";
435 }
436
437 sql = "INSERT INTO "+db+".single_model_edge "+
438 " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
439 " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " +
440 " VALUES ("+graphid+", "+(maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+
441 (maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+
442 cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+")";
443 stmt.executeUpdate(sql);
444 if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention
445 // so we insert the reverse contact by swapping i, j in insertion
446 sql = "INSERT INTO "+db+".single_model_edge "+
447 " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
448 " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " +
449 " VALUES ("+graphid+", "+(maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+
450 (maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+
451 cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+")";
452 stmt.executeUpdate(sql);
453 }
454 }
455
456 stmt.close();
457
458 }
459
460 /**
461 * Write graph to given db, using our db graph aglappe format,
462 * i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge
463 * @param conn
464 * @param db
465 * @throws SQLException
466 */
467 //TODO we might want to move this to a graph i/o class
468 //TODO refactor to writeToDbFast
469 public void write_graph_to_db_fast(MySQLConnection conn, String db) throws SQLException, IOException {
470
471 // values we fix to constant
472 String CW = "1";
473 String CR = "(true)";
474 String EXPBB = "0";
475 String ctStr = contactType;
476 String weightedStr = "0";
477 String directedStr = isDirected()?"1":"0";
478
479 if (contactType.endsWith("_CAGLY")) {
480 ctStr = contactType.replace("_CAGLY", "");
481 }
482 if (ctStr.equals("ALL")) {
483 ctStr = "BB+SC+BB/SC";
484 }
485 if (AAinfo.isValidMultiAtomContactType(contactType)) {
486 CW = ctStr;
487 weightedStr = "1";
488 }
489 if (contactType.endsWith("_CAGLY") || contactType.equals("Cb")) {
490 EXPBB = "-1";
491 }
492 if (minSeqSep != -1) {
493 CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))";
494 }
495
496 int pgraphid=0;
497 int graphid=0;
498 String sql = "SELECT graph_id FROM "+db+".chain_graph " +
499 " WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" +
500 " AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = '"+EXPBB+"'" +
501 " AND method = 'rc-cutoff';";
502 Statement stmt = conn.createStatement();
503 ResultSet rsst = stmt.executeQuery(sql);
504 if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid
505 pgraphid = rsst.getInt(1);
506 } else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid)
507 // we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment)
508 String pdbChainCodeStr = pdbChainCode;
509 if (!pdbChainCode.equals("NULL")) {
510 pdbChainCodeStr="'"+pdbChainCode+"'";
511 }
512 sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " +
513 "VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())";
514 Statement stmt2 = conn.createStatement();
515 stmt2.executeUpdate(sql);
516 // now we take the newly assigned graph_id as pgraphid
517 sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1";
518 ResultSet rsst2 = stmt2.executeQuery(sql);
519 if (rsst2.next()){
520 pgraphid = rsst2.getInt(1);
521 }
522 stmt2.close();
523 rsst2.close();
524 }
525 rsst.close();
526 // now we insert the graph info into single_model_graph
527 // 1st we grab the single_model_id
528 int singlemodelid = 0;
529 sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+
530 " dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';";
531 rsst = stmt.executeQuery(sql);
532 if (rsst.next()){
533 singlemodelid = rsst.getInt(1);
534 }
535 rsst.close();
536 // and then insert to single_model_graph
537 sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " +
538 " VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())";
539 stmt.executeUpdate(sql);
540 // and we grab the graph_id just assigned in single_model_graph
541 sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1";
542 rsst = stmt.executeQuery(sql);
543 if (rsst.next()){
544 graphid = rsst.getInt(1);
545 }
546 rsst.close();
547 stmt.close();
548
549 // inserting nodes
550 PrintStream nodesOut = new PrintStream(new FileOutputStream(graphid+"_nodes.txt"));
551 // get the max node in db
552 int maxNodeId = 0;
553 sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;";
554 stmt = conn.createStatement();
555 rsst = stmt.executeQuery(sql);
556 if (rsst.next()){
557 maxNodeId = rsst.getInt(1);
558 }
559 rsst.close();
560 stmt.close();
561
562 for (int resser:serials2nodes.keySet()) {
563
564 RIGNode node = serials2nodes.get(resser);
565 String res = AAinfo.threeletter2oneletter(node.getResidueType());
566 RIGNbhood nbh = this.getNbhood(node);
567 String secStructType = "\\N";
568 String secStructId = "\\N";
569 String sheetSerial = "\\N";
570 String turn = null;
571 SecStrucElement sselem = node.getSecStrucElement();
572 if (sselem!=null){
573 secStructType = Character.toString(sselem.getType());
574 secStructId = sselem.getId();
575 char sheetSerialChar = sselem.getSheetSerial();
576 if (sheetSerialChar != 0) {
577 sheetSerial = Character.toString(sheetSerialChar);
578 }
579 turn = sselem.isTurn()?"1":"0";
580 }
581 if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out
582 nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+
583 secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+
584 (inDegree(node)+outDegree(node))+"\t"+inDegree(node)+"\t"+outDegree(node)+"\t"+
585 nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials());
586 } else { // we insert k (and no k_in or k_out)
587 nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+
588 secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+
589 degree(node)+"\t"+"\\N"+"\t"+"\\N"+"\t"+
590 nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials());
591 }
592 }
593 nodesOut.close();
594 stmt = conn.createStatement();
595 sql = "LOAD DATA LOCAL INFILE '"+graphid+"_nodes.txt' INTO TABLE "+db+".single_model_node "+
596 " (graph_id, node_id, cid, num, res, "+
597 " sstype, ssid, sheet_serial, turn, "+
598 " k, k_in, k_out, n, nwg, n_num);";
599 stmt.executeUpdate(sql);
600 File fileToDelete = new File(graphid+"_nodes.txt");
601 if (fileToDelete.exists()) {
602 fileToDelete.delete();
603 }
604
605 // inserting edges
606 PrintStream edgesOut = new PrintStream(new FileOutputStream(graphid+"_edges.txt"));
607 // get the max weight
608 double maxWeight = 0;
609 for (RIGEdge cont:getEdges()) {
610 maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight;
611 }
612 for (RIGEdge cont:getEdges()){
613 RIGNode i_node = getEndpoints(cont).getFirst();
614 RIGNode j_node = getEndpoints(cont).getSecond();
615 String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType());
616 String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType());
617
618 String i_secStructType = "\\N";
619 String i_secStructId = "\\N";
620 String i_sheetSerial = "\\N";
621 String i_turn = null;
622 SecStrucElement i_sselem = i_node.getSecStrucElement();
623 if (i_sselem!=null){
624 i_secStructType = Character.toString(i_sselem.getType());
625 i_secStructId = i_sselem.getId();
626 char sheetSerialChar = i_sselem.getSheetSerial();
627 if (sheetSerialChar != 0) {
628 i_sheetSerial = Character.toString(sheetSerialChar);
629 }
630 i_turn = i_sselem.isTurn()?"1":"0";
631 }
632
633 String j_secStructType = "\\N";
634 String j_secStructId = "\\N";
635 String j_sheetSerial = "\\N";
636 String j_turn = null;
637 SecStrucElement j_sselem = j_node.getSecStrucElement();
638 if (j_sselem!=null){
639 j_secStructType = Character.toString(j_sselem.getType());
640 j_secStructId = j_sselem.getId();
641 char sheetSerialChar = j_sselem.getSheetSerial();
642 if (sheetSerialChar != 0) {
643 j_sheetSerial = Character.toString(sheetSerialChar);
644 }
645 j_turn = j_sselem.isTurn()?"1":"0";
646 }
647
648 edgesOut.println(graphid+"\t"+(maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+
649 (maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+
650 cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight));
651 if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention
652 // so we insert the reverse contact by swapping i, j in insertion
653 edgesOut.println(graphid+"\t"+(maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+
654 (maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+
655 cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight));
656 }
657 }
658 edgesOut.close();
659 sql = "LOAD DATA LOCAL INFILE '"+graphid+"_edges.txt' INTO TABLE "+db+".single_model_edge "+
660 " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
661 " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight);";
662 stmt.executeUpdate(sql);
663 stmt.close();
664 fileToDelete = new File(graphid+"_edges.txt");
665 if (fileToDelete.exists()) {
666 fileToDelete.delete();
667 }
668
669 }
670
671 /** Single quotes the given string */
672 private String quote(String s) {
673 return ("'"+s+"'");
674 }
675
676 /**
677 * Write graph to given outfile in aglappe format
678 * @param outfile
679 * @throws IOException
680 */
681 //TODO we might want to move this to a graph i/o class
682 //TODO refactor to writeToFile
683 public void write_graph_to_file (String outfile) throws IOException {
684 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
685 Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION);
686 Out.println("#SEQUENCE: "+sequence);
687 Out.println("#PDB: "+pdbCode);
688 Out.println("#PDB CHAIN CODE: "+pdbChainCode);
689 Out.println("#CHAIN: "+chainCode);
690 Out.println("#CT: "+contactType);
691 Out.println("#CUTOFF: "+distCutoff);
692 for (RIGEdge cont:getEdges()){
693 Pair<RIGNode> pair = getEndpoints(cont);
694 int i_resser=pair.getFirst().getResidueSerial();
695 int j_resser=pair.getSecond().getResidueSerial();
696 //BEWARE!! here we write weights while in writeToDb we write atomWeights (consistent with what we do in FileRIGraph) TODO do we want this behaviour?
697 double weight=cont.getWeight();
698 Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t%6.3f\n",weight);
699 }
700 Out.close();
701 }
702
703 /**
704 * Write graph to given outfile in network(Ioannis) format
705 * @param graphId
706 * @param dirName
707 * @throws IOException
708 */
709 //TODO we might want to move this to a graph i/o class
710 //TODO refactor
711 public void writeUndirUnweightGraphToNetworkFiles (int graphId, String dirName) throws IOException {
712 if (isDirected()) {
713 System.err.println("This method is only for undirected graphs!");
714 return;
715 }
716
717 String filePrefix = dirName + "/" + String.valueOf(graphId)+"_"+pdbCode+"_"+chainCode+"_"+contactType.replaceAll("/", ".")+"_"+String.valueOf(distCutoff).replaceAll("\\.", "_")+"_";
718 PrintStream Out = new PrintStream(new FileOutputStream(filePrefix+"edges.txt"));
719 for (RIGEdge cont:getEdges()){
720 Pair<RIGNode> pair = getEndpoints(cont);
721 int i_resser=pair.getFirst().getResidueSerial();
722 int j_resser=pair.getSecond().getResidueSerial();
723 if (i_resser < j_resser) {
724 Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t"+1+"\t"+"1.000"+"\n");
725 }
726 }
727 Out.close();
728 Out = new PrintStream(new FileOutputStream(filePrefix+"nodes.txt"));
729 for (int resser:this.serials2nodes.keySet()) {
730 RIGNode node = this.getNodeFromSerial(resser);
731 String res = AAinfo.threeletter2oneletter(node.getResidueType());
732 Out.printf(Locale.US,resser+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+degree(node)+"\t"+degree(node)+"\n");
733
734 }
735 Out.close();
736 }
737
738 /**
739 * Compares this RIGraph to given RIGraph returning 3 graphs: common, onlythis, onlyother in a HashMap
740 * @param other
741 * @return
742 * @throws Exception
743 */
744 //TODO not sure what kind of return we want, for now is a HashMap with three graph objects
745 public HashMap<String,RIGraph> compare(RIGraph other) throws Exception{
746 //first check that other has same sequence than this, otherwise throw exception
747 if (this.getFullLength()!=other.getFullLength()) {
748 //TODO throw specific exception
749 throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
750 } else {
751 for (int resser:this.serials2nodes.keySet()){
752 String this_res = getNodeFromSerial(resser).getResidueType();
753 String other_res = other.getNodeFromSerial(resser).getResidueType();
754 if (!this_res.equals("X") && !other_res.equals("X") && !this_res.equals(other_res)) {
755 //TODO throw specific exception
756 throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
757 }
758 }
759 }
760 //NOTE: the common graph will have same node/edge properties as this graph,
761 // which doesn't make a lot of sense, but anyway one has to choose between this or other,
762 // or otherwise make some kind of merge, e.g. merge the weights by averaging?
763 RIGraph commongraph = this.copy();
764 RIGraph onlythisgraph = this.copy();
765 RIGraph onlyothergraph = other.copy();
766
767 for (RIGEdge cont:this.getEdges()){
768 Pair<RIGNode> pair = this.getEndpoints(cont);
769 int i_resser = pair.getFirst().getResidueSerial();
770 int j_resser = pair.getSecond().getResidueSerial();
771 if (other.findEdge(other.getNodeFromSerial(i_resser), other.getNodeFromSerial(j_resser))!=null) {
772 onlythisgraph.removeEdge(onlythisgraph.findEdge(onlythisgraph.getNodeFromSerial(i_resser),onlythisgraph.getNodeFromSerial(j_resser)));
773 onlyothergraph.removeEdge(onlyothergraph.findEdge(onlyothergraph.getNodeFromSerial(i_resser),onlyothergraph.getNodeFromSerial(j_resser)));
774 } else {
775 commongraph.removeEdge(commongraph.findEdge(commongraph.getNodeFromSerial(i_resser),commongraph.getNodeFromSerial(j_resser)));
776 }
777 }
778
779 HashMap<String,RIGraph> result = new HashMap<String,RIGraph>();
780 result.put("common", commongraph);
781 result.put("onlythis", onlythisgraph);
782 result.put("onlyother",onlyothergraph);
783 return result;
784 }
785
786 /**
787 * Returns a new RIGraph copy (deep) of this one
788 * @return
789 */
790 public RIGraph copy() {
791 RIGraph newGraph = new RIGraph();
792 newGraph.setPdbCode(pdbCode);
793 newGraph.setPdbChainCode(pdbChainCode);
794 newGraph.setChainCode(chainCode);
795 newGraph.setModel(model);
796 newGraph.setContactType(contactType);
797 newGraph.setCutoff(distCutoff);
798 newGraph.setSequence(sequence);
799
800 // copying nodes and serials2nodes
801 TreeMap<Integer,RIGNode> newSerials2nodes = new TreeMap<Integer,RIGNode>();
802 for (RIGNode node:this.getVertices()) {
803 RIGNode newNode = node.copy();
804 newGraph.addVertex(newNode);
805 newSerials2nodes.put(newNode.getResidueSerial(),newNode);
806 }
807 newGraph.setSerials2NodesMap(newSerials2nodes);
808
809 // copying edges
810 for (RIGEdge edge:this.getEdges()) {
811 Pair<RIGNode> pair = this.getEndpoints(edge);
812 int i_resser = pair.getFirst().getResidueSerial();
813 int j_resser = pair.getSecond().getResidueSerial();
814 // EdgeType enum should copy correctly because enums are treated as ints in copying (always deep copied)
815 newGraph.addEdge(edge.copy(), newGraph.getNodeFromSerial(i_resser), newGraph.getNodeFromSerial(j_resser), this.getEdgeType(edge));
816 }
817
818 // copying the SecondaryStructure object by retrieving all references from the new nodes
819 SecondaryStructure secStruct = new SecondaryStructure();
820 for (RIGNode node:newGraph.getVertices()) {
821 SecStrucElement sselem = node.getSecStrucElement();
822 if (sselem!=null && !secStruct.contains(sselem)) {
823 secStruct.add(sselem);
824 }
825 }
826 newGraph.setSecondaryStructure(secStruct);
827
828 return newGraph;
829 }
830 }