ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/branches/aglappe-jung/proteinstructure/RIGraph.java
Revision: 438
Committed: Wed Nov 28 15:44:57 2007 UTC (16 years, 10 months ago) by filippis
File size: 34123 byte(s)
Log Message:
AAInfo
-isValidContactType, isValidSingleAtomContactType, isValidMultipleAtomContactType takes into account the directionality, the overlapping atoms and the "+"
-isOverlapping method has been added

AIGEdge
-copy method added

AIGNode
-equals and copy methods has been added

AIGraph
-getRIGraph now takes argument directed and directionality doesn't depend anymore on isCrossed
-distances are now set for RIGNodes in getRIGraph
-for undirected, crossed graphs we take care not to add parallel RIGedges in getRIGraph
-we set now the secondaryStructure object of RIGraph in getRIGraph
-addGraph has been added

Box
-we throw away wrong check in getDistancesWithinBox - read the comment

DbRIGraph
-constructors now take argument directed and directionality doesn't depend anymore on isCrossed
-distances are read also from db and set for RIGEdges

Pdb
-get_graph now takes as argument directed and directionality doesn't depend anymore on isCrossed
-there is check for forbidden cts in get_graph
-cts with + are taken into account in get_graph

RIGEdge
-setAtomWeight and setDistance methods have been added

RIGNode
-equals method has been added

RIGraph
-getObsLength has been deleted
-we take care now of the new cts (+,undirected crossed), of null secondary structure case and of distances in write_graph_to_db and write_graph_to_db_fast

genDbGraph
-argument for directionality is now added


Line File contents
1 package proteinstructure;
2
3 import java.io.File;
4 import java.io.FileOutputStream;
5 import java.io.IOException;
6 import java.io.PrintStream;
7 import java.sql.ResultSet;
8 import java.sql.SQLException;
9 import java.sql.Statement;
10 import java.util.Collection;
11 import java.util.HashMap;
12 import java.util.Locale;
13 import java.util.TreeMap;
14
15 import tools.MySQLConnection;
16
17 import edu.uci.ics.jung.graph.util.Pair;
18
19 /**
20 * A Residue Interaction Graph
21 *
22 */
23 public class RIGraph extends ProtStructGraph<RIGNode,RIGEdge> {
24
25 private static final long serialVersionUID = 1L;
26
27 private static final String SINGLEMODELS_DB = "ioannis";
28
29 // fields
30 protected double distCutoff;
31 protected String contactType; // use AAinfo.isValidContactType() to test for validity
32
33 public RIGraph() {
34 super();
35 this.distCutoff=0;
36 this.contactType=null;
37 }
38
39 /**
40 * Constructs a RIGraph with a sequence but no edges
41 * @param sequence
42 */
43 public RIGraph(String sequence) {
44 super();
45 this.sequence = sequence;
46 this.fullLength = sequence.length();
47 this.distCutoff=0;
48 this.contactType=null;
49 serials2nodes = new TreeMap<Integer,RIGNode>();
50 for(int i=0; i < sequence.length(); i++) {
51 RIGNode node = new RIGNode(i+1,AAinfo.oneletter2threeletter(Character.toString(sequence.charAt(i))));
52 this.addVertex(node);
53 serials2nodes.put(i+1, node);
54 }
55 }
56
57 /**
58 * Returns the contact type of this RIGraph
59 * @return
60 */
61 public String getContactType() {
62 return contactType;
63 }
64
65 /**
66 * Sets the contact type of this RIGraph
67 * @param ct the contact type
68 */
69 public void setContactType(String contactType) {
70 this.contactType=contactType;
71 }
72
73 /**
74 * Returns the distance cutoff for this RIGraph.
75 * @return the distance cutoff
76 */
77 public double getCutoff(){
78 return distCutoff;
79 }
80
81 /**
82 * Sets the distance cutoff for this RIGraph.
83 * @param distCutoff the distance cutoff
84 */
85 public void setCutoff(double distCutoff) {
86 this.distCutoff = distCutoff;
87 }
88
89 /**
90 * Gets the number of observed residues for this RIGraph
91 * @return
92 */
93 public int getObsLength() {
94 return this.getVertexCount();
95 }
96
97 /**
98 * Returns a RIGNbhood that contains the neighbourhood of given RIGNode
99 * @param node
100 * @return
101 */
102 public RIGNbhood getNbhood (RIGNode node) {
103 Collection<RIGNode> nbs = this.getNeighbors(node);
104 RIGNbhood nbhood = new RIGNbhood(node);
105 for (RIGNode nb:nbs) {
106 nbhood.put(nb.getResidueSerial(), nb);
107 }
108 return nbhood;
109 }
110
111 /**
112 * Returns a RIGNbhood that contains the 2nd shell neighbourhood of given RIGNode
113 * @param node
114 * @return
115 */
116 public RIGNbhood getSecondShellNbhood (RIGNode node) {
117 Collection<RIGNode> nbs = this.getNeighbors(node);
118 RIGNbhood nbhood = new RIGNbhood(node);
119 for (RIGNode nb:nbs) {
120 for (RIGNode nb2:this.getNeighbors(nb)) {
121 if (nb2!=node) {
122 // RIGNbhood is a TreeMap that should take care of not inserting duplicates
123 nbhood.put(nb2.getResidueSerial(),nb2);
124 }
125 }
126 }
127 return nbhood;
128 }
129
130 /**
131 * Returns a RIGCommonNbhood that contains common neighbours of given RIGNodes iNode, jNode
132 * @param iNode
133 * @param jNode
134 * @return
135 */
136 public RIGCommonNbhood getCommonNbhood(RIGNode iNode, RIGNode jNode) {
137 Collection<RIGNode> iNbs = this.getNeighbors(iNode);
138 Collection<RIGNode> jNbs = this.getNeighbors(jNode);
139 boolean connected = false;
140 //NOTE in DIRECTED case this means strictly an edge from iNode to jNode
141 if (this.findEdge(iNode, jNode)!=null) connected = true;
142 RIGCommonNbhood comNbhood = new RIGCommonNbhood(iNode, jNode, connected);
143 for (RIGNode iNb: iNbs) {
144 if (jNbs.contains(iNb)) {
145 comNbhood.put(iNb.getResidueSerial(), iNb);
146 }
147 }
148 return comNbhood;
149 }
150
151 /**
152 * Returns all common neighborhood sizes (if they are >0) for each cell of the contact map (contact or non-contact)
153 * @return
154 */
155 public HashMap<Pair<Integer>,Integer> getAllCommonNbhSizes() {
156 HashMap<Pair<Integer>,Integer> comNbhSizes = new HashMap<Pair<Integer>, Integer>();
157 boolean directed = this.isDirected();
158 for (RIGNode n1:this.getVertices()) {
159 for (RIGNode n2:this.getVertices()) {
160 if (directed) {
161 if (n1!=n2) {
162 int size = getCommonNbhood(n1, n2).size();
163 if (size>0) comNbhSizes.put(new Pair<Integer>(n1.getResidueSerial(),n2.getResidueSerial()),size);
164 }
165 } else {
166 if (n1.getResidueSerial()<n2.getResidueSerial()) {
167 int size = getCommonNbhood(n1, n2).size();
168 if (size>0) comNbhSizes.put(new Pair<Integer>(n1.getResidueSerial(),n2.getResidueSerial()),size);
169 }
170 }
171 }
172 }
173 return comNbhSizes;
174 }
175
176 public int getContactRange(RIGEdge edge) {
177 Pair<RIGNode> pair = this.getEndpoints(edge);
178 return Math.abs(pair.getFirst().getResidueSerial()-pair.getSecond().getResidueSerial());
179 }
180
181 //TODO evaluatePrediction methods should be in ProtStructGraph.
182 // But to be able to put them there we would need to pass here a Transformer that gets atom or residue serials depending if we are in AI or RI Graph
183 /**
184 * Evaluate this graph (assuming it is a prediction) against an original graph
185 * @param originalGraph
186 * @return
187 */
188 public PredEval evaluatePrediction(RIGraph originalGraph) {
189 return evaluatePrediction(originalGraph, 1);
190 }
191
192 /**
193 * Evaluate this graph (assuming it is a prediction) against an original graph,
194 * considering only edges with sequence separation at least minSeqSep.
195 * @param originalGraph
196 * @param minSeqSep
197 * @return
198 */
199 public PredEval evaluatePrediction(RIGraph originalGraph, int minSeqSep) {
200
201 Collection<RIGEdge> predictedContacts = this.getEdges();
202 Collection<RIGEdge> origContacts = originalGraph.getEdges();
203 // total predicted contacts
204 int predicted = 0;
205 for(RIGEdge e:predictedContacts) {
206 if(this.getContactRange(e) >= minSeqSep) {
207 predicted++;
208 }
209 }
210
211 // total native contacts
212 int original = 0;
213 for(RIGEdge e:origContacts) {
214 if(originalGraph.getContactRange(e) >= minSeqSep) {
215 original++;
216 }
217 }
218
219 // total size of contact map (potential contacts)
220 int cmtotal = 0;
221 if (originalGraph.isDirected()){
222 cmtotal = (originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep);
223 } else {
224 cmtotal = (int)(((originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep))/2);
225 }
226 int TruePos=0, FalsePos=0, TrueNeg=0, FalseNeg=0;
227
228 // directed/ non-directed graphs should be both fine with this code
229 // the only thing that changes between directed/non-directed is the count of total cells in contact map (taken care for above)
230 for (RIGEdge predictedCont:predictedContacts){
231 if(this.getContactRange(predictedCont) >= minSeqSep) {
232 Pair<RIGNode> predNodePair = this.getEndpoints(predictedCont);
233 RIGNode node1inOrig = originalGraph.getNodeFromSerial(predNodePair.getFirst().getResidueSerial());
234 RIGNode node2inOrig = originalGraph.getNodeFromSerial(predNodePair.getSecond().getResidueSerial());
235 //NOTE order of nodes in findEdge doesn't matter if UNDIRECTED.
236 //It does matter if DIRECTED. However even in that case we are fine because we use same order in this graph
237 if (originalGraph.findEdge(node1inOrig, node2inOrig)!=null) {
238 TruePos++;
239 }
240 else {
241 FalsePos++;
242 }
243 }
244 }
245
246 for (RIGEdge origCont:origContacts) {
247 if(originalGraph.getContactRange(origCont) >= minSeqSep) {
248 Pair<RIGNode> origNodePair = originalGraph.getEndpoints(origCont);
249 RIGNode node1inPred = this.getNodeFromSerial(origNodePair.getFirst().getResidueSerial());
250 RIGNode node2inPred = this.getNodeFromSerial(origNodePair.getSecond().getResidueSerial());
251 //NOTE order of nodes in findEdge doesn't matter if UNDIRECTED.
252 //It does matter if DIRECTED. However even in that case we are fine because we use same order in originalGraph
253 if (this.findEdge(node1inPred,node2inPred)==null) {
254 FalseNeg++;
255 }
256 }
257 }
258 TrueNeg=cmtotal-TruePos-FalsePos-FalseNeg;
259 PredEval eval = new PredEval(TruePos,FalsePos,TrueNeg,FalseNeg,0,predicted,original,cmtotal);
260 return eval;
261 }
262
263 /**
264 * Write graph to given db, using our db graph aglappe format,
265 * i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge
266 * @param conn
267 * @param db
268 * @throws SQLException
269 */
270 //TODO we might want to move this to a graph i/o class
271 //TODO refactor to writeToDb. Get rid of this and only keep fast one??
272 public void write_graph_to_db(MySQLConnection conn, String db) throws SQLException{
273
274 // values we fix to constant
275 String CW = "1";
276 String CR = "(true)";
277 String EXPBB = "0";
278 String ctStr = contactType;
279 String weightedStr = "0";
280 String directedStr = isDirected()?"1":"0";
281
282 if (contactType.contains("_CAGLY")) {
283 ctStr = contactType.replaceAll("_CAGLY", "");
284 }
285 if (ctStr.equals("ALL")) {
286 ctStr = "BB+SC+BB/SC";
287 }
288
289 if (AAinfo.isValidMultiAtomContactType(contactType, isDirected())) {
290 CW = ctStr;
291 weightedStr = "1";
292 }
293 if (contactType.contains("_CAGLY") || contactType.contains("Cb")) {
294 EXPBB = "-1";
295 }
296 if (minSeqSep != -1) {
297 CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))";
298 }
299
300 int pgraphid=0;
301 int graphid=0;
302 String sql = "SELECT graph_id FROM "+db+".chain_graph " +
303 " WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" +
304 " AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = "+EXPBB+
305 " AND method = 'rc-cutoff';";
306 Statement stmt = conn.createStatement();
307 ResultSet rsst = stmt.executeQuery(sql);
308 if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid
309 pgraphid = rsst.getInt(1);
310 } else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid)
311 // we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment)
312 String pdbChainCodeStr = pdbChainCode;
313 if (!pdbChainCode.equals("NULL")) {
314 pdbChainCodeStr="'"+pdbChainCode+"'";
315 }
316 sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " +
317 "VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+((secondaryStructure!=null)?secondaryStructure.getNumElements():0)+", now())";
318 Statement stmt2 = conn.createStatement();
319 stmt2.executeUpdate(sql);
320 // now we take the newly assigned graph_id as pgraphid
321 sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1";
322 ResultSet rsst2 = stmt2.executeQuery(sql);
323 if (rsst2.next()){
324 pgraphid = rsst2.getInt(1);
325 }
326 stmt2.close();
327 rsst2.close();
328 }
329 rsst.close();
330 // now we insert the graph info into single_model_graph
331 // 1st we grab the single_model_id
332 int singlemodelid = 0;
333 sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+
334 " dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';";
335 rsst = stmt.executeQuery(sql);
336 if (rsst.next()){
337 singlemodelid = rsst.getInt(1);
338 }
339 rsst.close();
340 // and then insert to single_model_graph
341 sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " +
342 " VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())";
343 stmt.executeUpdate(sql);
344 // and we grab the graph_id just assigned in single_model_graph
345 sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1";
346 rsst = stmt.executeQuery(sql);
347 if (rsst.next()){
348 graphid = rsst.getInt(1);
349 }
350 rsst.close();
351 stmt.close();
352
353 // inserting nodes
354 // get the max node in db
355 int maxNodeId = 0;
356 sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;";
357 stmt = conn.createStatement();
358 rsst = stmt.executeQuery(sql);
359 if (rsst.next()){
360 maxNodeId = rsst.getInt(1);
361 }
362 rsst.close();
363 stmt.close();
364
365 stmt = conn.createStatement();
366 for (int resser:serials2nodes.keySet()) {
367 RIGNode node = serials2nodes.get(resser);
368 String res = AAinfo.threeletter2oneletter(node.getResidueType());
369 RIGNbhood nbh = this.getNbhood(node);
370 String secStructType = null;
371 String secStructId = null;
372 String sheetSerial = null;
373 String turn = null;
374 SecStrucElement sselem = node.getSecStrucElement();
375 if (sselem!=null){
376 secStructType = quote(Character.toString(sselem.getType()));
377 secStructId = quote(sselem.getId());
378 char sheetSerialChar = sselem.getSheetSerial();
379 if (sheetSerialChar != 0) {
380 sheetSerial = quote(Character.toString(sheetSerialChar));
381 }
382 turn = sselem.isTurn()?"1":"0";
383 }
384 if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out
385 sql = "INSERT INTO "+db+".single_model_node "+
386 " (graph_id, node_id, cid, num, res, "+
387 " sstype, ssid, sheet_serial, turn, "+
388 " k, k_in, k_out, "+
389 " n, nwg, n_num) " +
390 " VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+
391 " "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+
392 (inDegree(node)+outDegree(node))+", "+inDegree(node)+", "+outDegree(node)+", "+
393 "'"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')";
394 } else { // we insert k (and no k_in or k_out)
395 sql = "INSERT INTO "+db+".single_model_node "+
396 " (graph_id, node_id, cid, num, res, "+
397 " sstype, ssid, sheet_serial, turn, "+
398 " k, n, nwg, n_num) " +
399 " VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+
400 " "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+
401 degree(node)+", '"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')";
402 }
403 stmt.executeUpdate(sql);
404 }
405
406 // inserting edges
407 // get the max weight
408 double maxWeight = 0;
409 for (RIGEdge cont:getEdges()) {
410 maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight;
411 }
412 for (RIGEdge cont:getEdges()){
413 RIGNode i_node = getEndpoints(cont).getFirst();
414 RIGNode j_node = getEndpoints(cont).getSecond();
415 String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType());
416 String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType());
417
418 String i_secStructType = null;
419 String i_secStructId = null;
420 String i_sheetSerial = null;
421 String i_turn = null;
422 SecStrucElement i_sselem = i_node.getSecStrucElement();
423 if (i_sselem!=null){
424 i_secStructType = quote(Character.toString(i_sselem.getType()));
425 i_secStructId = quote(i_sselem.getId());
426 char sheetSerialChar = i_sselem.getSheetSerial();
427 if (sheetSerialChar != 0) {
428 i_sheetSerial = quote(Character.toString(sheetSerialChar));
429 }
430 i_turn = i_sselem.isTurn()?"1":"0";
431 }
432
433 String j_secStructType = null;
434 String j_secStructId = null;
435 String j_sheetSerial = null;
436 String j_turn = null;
437 SecStrucElement j_sselem = j_node.getSecStrucElement();
438 if (j_sselem!=null){
439 j_secStructType = quote(Character.toString(j_sselem.getType()));
440 j_secStructId = quote(j_sselem.getId());
441 char sheetSerialChar = j_sselem.getSheetSerial();
442 if (sheetSerialChar != 0) {
443 j_sheetSerial = quote(Character.toString(sheetSerialChar));
444 }
445 j_turn = j_sselem.isTurn()?"1":"0";
446 }
447
448 sql = "INSERT INTO "+db+".single_model_edge "+
449 " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
450 " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight, distance) " +
451 " VALUES ("+graphid+", "+(maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+
452 (maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+
453 cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+", "+cont.getDistance()+")";
454 stmt.executeUpdate(sql);
455 if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention
456 // so we insert the reverse contact by swapping i, j in insertion
457 sql = "INSERT INTO "+db+".single_model_edge "+
458 " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
459 " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight, distance) " +
460 " VALUES ("+graphid+", "+(maxNodeId+j_node.getResidueSerial())+", '"+chainCode+"', "+j_node.getResidueSerial()+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+
461 (maxNodeId+i_node.getResidueSerial())+", '"+chainCode+"', "+i_node.getResidueSerial()+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+
462 cont.getAtomWeight()+", "+(cont.getAtomWeight()/maxWeight)+", "+cont.getDistance()+")";
463 stmt.executeUpdate(sql);
464 }
465 }
466
467 stmt.close();
468
469 }
470
471 /**
472 * Write graph to given db, using our db graph aglappe format,
473 * i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge
474 * @param conn
475 * @param db
476 * @throws SQLException
477 */
478 //TODO we might want to move this to a graph i/o class
479 //TODO refactor to writeToDbFast
480 public void write_graph_to_db_fast(MySQLConnection conn, String db) throws SQLException, IOException {
481
482 // values we fix to constant
483 String CW = "1";
484 String CR = "(true)";
485 String EXPBB = "0";
486 String ctStr = contactType;
487 String weightedStr = "0";
488 String directedStr = isDirected()?"1":"0";
489
490 if (contactType.contains("_CAGLY")) {
491 ctStr = contactType.replaceAll("_CAGLY", "");
492 }
493 if (ctStr.equals("ALL")) {
494 ctStr = "BB+SC+BB/SC";
495 }
496 if (AAinfo.isValidMultiAtomContactType(contactType, isDirected())) {
497 CW = ctStr;
498 weightedStr = "1";
499 }
500 if (contactType.contains("_CAGLY") || contactType.contains("Cb")) {
501 EXPBB = "-1";
502 }
503 if (minSeqSep != -1) {
504 CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))";
505 }
506
507 int pgraphid=0;
508 int graphid=0;
509 String sql = "SELECT graph_id FROM "+db+".chain_graph " +
510 " WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" +
511 " AND model_serial = "+model+" AND dist = "+distCutoff+" AND expBB = "+EXPBB+
512 " AND method = 'rc-cutoff';";
513 Statement stmt = conn.createStatement();
514 ResultSet rsst = stmt.executeQuery(sql);
515 if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid
516 pgraphid = rsst.getInt(1);
517 } else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid)
518 // we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment)
519 String pdbChainCodeStr = pdbChainCode;
520 if (!pdbChainCode.equals("NULL")) {
521 pdbChainCodeStr="'"+pdbChainCode+"'";
522 }
523 sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " +
524 "VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+distCutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+((secondaryStructure!=null)?secondaryStructure.getNumElements():0)+", now())";
525 Statement stmt2 = conn.createStatement();
526 stmt2.executeUpdate(sql);
527 // now we take the newly assigned graph_id as pgraphid
528 sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1";
529 ResultSet rsst2 = stmt2.executeQuery(sql);
530 if (rsst2.next()){
531 pgraphid = rsst2.getInt(1);
532 }
533 stmt2.close();
534 rsst2.close();
535 }
536 rsst.close();
537 // now we insert the graph info into single_model_graph
538 // 1st we grab the single_model_id
539 int singlemodelid = 0;
540 sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+
541 " dist="+distCutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';";
542 rsst = stmt.executeQuery(sql);
543 if (rsst.next()){
544 singlemodelid = rsst.getInt(1);
545 }
546 rsst.close();
547 // and then insert to single_model_graph
548 sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " +
549 " VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+distCutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())";
550 stmt.executeUpdate(sql);
551 // and we grab the graph_id just assigned in single_model_graph
552 sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1";
553 rsst = stmt.executeQuery(sql);
554 if (rsst.next()){
555 graphid = rsst.getInt(1);
556 }
557 rsst.close();
558 stmt.close();
559
560 // inserting nodes
561 PrintStream nodesOut = new PrintStream(new FileOutputStream(graphid+"_nodes.txt"));
562 // get the max node in db
563 int maxNodeId = 0;
564 sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;";
565 stmt = conn.createStatement();
566 rsst = stmt.executeQuery(sql);
567 if (rsst.next()){
568 maxNodeId = rsst.getInt(1);
569 }
570 rsst.close();
571 stmt.close();
572
573 for (int resser:serials2nodes.keySet()) {
574
575 RIGNode node = serials2nodes.get(resser);
576 String res = AAinfo.threeletter2oneletter(node.getResidueType());
577 RIGNbhood nbh = this.getNbhood(node);
578 String secStructType = "\\N";
579 String secStructId = "\\N";
580 String sheetSerial = "\\N";
581 String turn = null;
582 SecStrucElement sselem = node.getSecStrucElement();
583 if (sselem!=null){
584 secStructType = Character.toString(sselem.getType());
585 secStructId = sselem.getId();
586 char sheetSerialChar = sselem.getSheetSerial();
587 if (sheetSerialChar != 0) {
588 sheetSerial = Character.toString(sheetSerialChar);
589 }
590 turn = sselem.isTurn()?"1":"0";
591 }
592 if (isDirected()){ // we insert k(=k_in+k_out), k_in and k_out
593 nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+
594 secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+
595 (inDegree(node)+outDegree(node))+"\t"+inDegree(node)+"\t"+outDegree(node)+"\t"+
596 nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials());
597 } else { // we insert k (and no k_in or k_out)
598 nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+
599 secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+
600 degree(node)+"\t"+"\\N"+"\t"+"\\N"+"\t"+
601 nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials());
602 }
603 }
604 nodesOut.close();
605 stmt = conn.createStatement();
606 sql = "LOAD DATA LOCAL INFILE '"+graphid+"_nodes.txt' INTO TABLE "+db+".single_model_node "+
607 " (graph_id, node_id, cid, num, res, "+
608 " sstype, ssid, sheet_serial, turn, "+
609 " k, k_in, k_out, n, nwg, n_num);";
610 stmt.executeUpdate(sql);
611 File fileToDelete = new File(graphid+"_nodes.txt");
612 if (fileToDelete.exists()) {
613 fileToDelete.delete();
614 }
615
616 // inserting edges
617 PrintStream edgesOut = new PrintStream(new FileOutputStream(graphid+"_edges.txt"));
618 // get the max weight
619 double maxWeight = 0;
620 for (RIGEdge cont:getEdges()) {
621 maxWeight = (maxWeight<cont.getAtomWeight())?cont.getAtomWeight():maxWeight;
622 }
623 for (RIGEdge cont:getEdges()){
624 RIGNode i_node = getEndpoints(cont).getFirst();
625 RIGNode j_node = getEndpoints(cont).getSecond();
626 String i_res = AAinfo.threeletter2oneletter(i_node.getResidueType());
627 String j_res = AAinfo.threeletter2oneletter(j_node.getResidueType());
628
629 String i_secStructType = "\\N";
630 String i_secStructId = "\\N";
631 String i_sheetSerial = "\\N";
632 String i_turn = null;
633 SecStrucElement i_sselem = i_node.getSecStrucElement();
634 if (i_sselem!=null){
635 i_secStructType = Character.toString(i_sselem.getType());
636 i_secStructId = i_sselem.getId();
637 char sheetSerialChar = i_sselem.getSheetSerial();
638 if (sheetSerialChar != 0) {
639 i_sheetSerial = Character.toString(sheetSerialChar);
640 }
641 i_turn = i_sselem.isTurn()?"1":"0";
642 }
643
644 String j_secStructType = "\\N";
645 String j_secStructId = "\\N";
646 String j_sheetSerial = "\\N";
647 String j_turn = null;
648 SecStrucElement j_sselem = j_node.getSecStrucElement();
649 if (j_sselem!=null){
650 j_secStructType = Character.toString(j_sselem.getType());
651 j_secStructId = j_sselem.getId();
652 char sheetSerialChar = j_sselem.getSheetSerial();
653 if (sheetSerialChar != 0) {
654 j_sheetSerial = Character.toString(sheetSerialChar);
655 }
656 j_turn = j_sselem.isTurn()?"1":"0";
657 }
658
659 edgesOut.println(graphid+"\t"+(maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+
660 (maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+
661 cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight)+"\t"+cont.getDistance());
662 if(!isDirected()) {// we want both side of the matrix in the table to follow Ioannis' convention
663 // so we insert the reverse contact by swapping i, j in insertion
664 edgesOut.println(graphid+"\t"+(maxNodeId+j_node.getResidueSerial())+"\t"+chainCode+"\t"+j_node.getResidueSerial()+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+
665 (maxNodeId+i_node.getResidueSerial())+"\t"+chainCode+"\t"+i_node.getResidueSerial()+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+
666 cont.getAtomWeight()+"\t"+(cont.getAtomWeight()/maxWeight)+"\t"+cont.getDistance());
667 }
668 }
669 edgesOut.close();
670 sql = "LOAD DATA LOCAL INFILE '"+graphid+"_edges.txt' INTO TABLE "+db+".single_model_edge "+
671 " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
672 " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight, distance);";
673 stmt.executeUpdate(sql);
674 stmt.close();
675 fileToDelete = new File(graphid+"_edges.txt");
676 if (fileToDelete.exists()) {
677 fileToDelete.delete();
678 }
679
680 }
681
682 /** Single quotes the given string */
683 private String quote(String s) {
684 return ("'"+s+"'");
685 }
686
687 /**
688 * Write graph to given outfile in aglappe format
689 * @param outfile
690 * @throws IOException
691 */
692 //TODO we might want to move this to a graph i/o class
693 //TODO refactor to writeToFile
694 public void write_graph_to_file (String outfile) throws IOException {
695 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
696 Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION);
697 Out.println("#SEQUENCE: "+sequence);
698 Out.println("#PDB: "+pdbCode);
699 Out.println("#PDB CHAIN CODE: "+pdbChainCode);
700 Out.println("#CHAIN: "+chainCode);
701 Out.println("#CT: "+contactType);
702 Out.println("#CUTOFF: "+distCutoff);
703
704 // we use a temp TreeMap to be able to order the output
705 TreeMap<Pair<Integer>,Double> pairs = new TreeMap<Pair<Integer>,Double>(new IntPairComparator());
706 for (RIGEdge cont:getEdges()){
707 Pair<RIGNode> pair = getEndpoints(cont);
708 int i_resser=pair.getFirst().getResidueSerial();
709 int j_resser=pair.getSecond().getResidueSerial();
710 //BEWARE!! here we write weights while in writeToDb we write atomWeights (consistent with what we do in FileRIGraph) TODO do we want this behaviour?
711 double weight=cont.getWeight();
712 pairs.put(new Pair<Integer>(i_resser,j_resser),weight);
713
714 }
715 for (Pair<Integer> pair:pairs.keySet()) {
716 Out.printf(Locale.US,pair.getFirst()+"\t"+pair.getSecond()+"\t%6.3f\n",pairs.get(pair));
717 }
718 Out.close();
719 }
720
721
722 public void writeToSADPFile (String outfile) throws IOException {
723 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
724 Out.println(this.getFullLength());
725 for (RIGEdge cont:getEdges()){
726 Pair<RIGNode> pair = getEndpoints(cont);
727 int i_resser=pair.getFirst().getResidueSerial() - 1;
728 int j_resser=pair.getSecond().getResidueSerial() - 1 ;
729 //BEWARE!! here we write weights while in writeToDb we write atomWeights (consistent with what we do in FileRIGraph) TODO do we want this behaviour?
730 double weight=cont.getWeight();
731 Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t%1.0f\t1\n",weight);
732 }
733 Out.close();
734 }
735
736
737 /**
738 * Write graph to given outfile in network(Ioannis) format
739 * @param graphId
740 * @param dirName
741 * @throws IOException
742 */
743 //TODO we might want to move this to a graph i/o class
744 //TODO refactor
745 public void writeUndirUnweightGraphToNetworkFiles (int graphId, String dirName) throws IOException {
746 if (isDirected()) {
747 System.err.println("This method is only for undirected graphs!");
748 return;
749 }
750
751 String filePrefix = dirName + "/" + String.valueOf(graphId)+"_"+pdbCode+"_"+chainCode+"_"+contactType.replaceAll("/", ".")+"_"+String.valueOf(distCutoff).replaceAll("\\.", "_")+"_";
752 PrintStream Out = new PrintStream(new FileOutputStream(filePrefix+"edges.txt"));
753 for (RIGEdge cont:getEdges()){
754 Pair<RIGNode> pair = getEndpoints(cont);
755 int i_resser=pair.getFirst().getResidueSerial();
756 int j_resser=pair.getSecond().getResidueSerial();
757 if (i_resser < j_resser) {
758 Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t"+1+"\t"+"1.000"+"\n");
759 }
760 }
761 Out.close();
762 Out = new PrintStream(new FileOutputStream(filePrefix+"nodes.txt"));
763 for (int resser:this.serials2nodes.keySet()) {
764 RIGNode node = this.getNodeFromSerial(resser);
765 String res = AAinfo.threeletter2oneletter(node.getResidueType());
766 Out.printf(Locale.US,resser+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+degree(node)+"\t"+degree(node)+"\n");
767
768 }
769 Out.close();
770 }
771
772 /**
773 * Compares this RIGraph to given RIGraph returning 3 graphs: common, onlythis, onlyother in a HashMap
774 * @param other
775 * @return
776 * @throws Exception
777 */
778 //TODO not sure what kind of return we want, for now is a HashMap with three graph objects
779 public HashMap<String,RIGraph> compare(RIGraph other) throws Exception{
780 //first check that other has same sequence than this, otherwise throw exception
781 if (this.getFullLength()!=other.getFullLength()) {
782 //TODO throw specific exception
783 throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
784 } else {
785 for (int resser:this.serials2nodes.keySet()){
786 String this_res = getNodeFromSerial(resser).getResidueType();
787 String other_res = other.getNodeFromSerial(resser).getResidueType();
788 if (!this_res.equals("X") && !other_res.equals("X") && !this_res.equals(other_res)) {
789 //TODO throw specific exception
790 throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
791 }
792 }
793 }
794 //NOTE: the common graph will have same node/edge properties as this graph,
795 // which doesn't make a lot of sense, but anyway one has to choose between this or other,
796 // or otherwise make some kind of merge, e.g. merge the weights by averaging?
797 RIGraph commongraph = this.copy();
798 RIGraph onlythisgraph = this.copy();
799 RIGraph onlyothergraph = other.copy();
800
801 for (RIGEdge cont:this.getEdges()){
802 Pair<RIGNode> pair = this.getEndpoints(cont);
803 int i_resser = pair.getFirst().getResidueSerial();
804 int j_resser = pair.getSecond().getResidueSerial();
805 if (other.findEdge(other.getNodeFromSerial(i_resser), other.getNodeFromSerial(j_resser))!=null) {
806 onlythisgraph.removeEdge(onlythisgraph.findEdge(onlythisgraph.getNodeFromSerial(i_resser),onlythisgraph.getNodeFromSerial(j_resser)));
807 onlyothergraph.removeEdge(onlyothergraph.findEdge(onlyothergraph.getNodeFromSerial(i_resser),onlyothergraph.getNodeFromSerial(j_resser)));
808 } else {
809 commongraph.removeEdge(commongraph.findEdge(commongraph.getNodeFromSerial(i_resser),commongraph.getNodeFromSerial(j_resser)));
810 }
811 }
812
813 HashMap<String,RIGraph> result = new HashMap<String,RIGraph>();
814 result.put("common", commongraph);
815 result.put("onlythis", onlythisgraph);
816 result.put("onlyother",onlyothergraph);
817 return result;
818 }
819
820 /**
821 * Returns a new RIGraph copy (deep) of this one
822 * @return
823 */
824 public RIGraph copy() {
825 RIGraph newGraph = new RIGraph();
826 newGraph.setPdbCode(pdbCode);
827 newGraph.setPdbChainCode(pdbChainCode);
828 newGraph.setChainCode(chainCode);
829 newGraph.setModel(model);
830 newGraph.setContactType(contactType);
831 newGraph.setCutoff(distCutoff);
832 newGraph.setSequence(sequence);
833
834 // copying nodes and serials2nodes
835 TreeMap<Integer,RIGNode> newSerials2nodes = new TreeMap<Integer,RIGNode>();
836 for (RIGNode node:this.getVertices()) {
837 RIGNode newNode = node.copy();
838 newGraph.addVertex(newNode);
839 newSerials2nodes.put(newNode.getResidueSerial(),newNode);
840 }
841 newGraph.setSerials2NodesMap(newSerials2nodes);
842
843 // copying edges
844 for (RIGEdge edge:this.getEdges()) {
845 Pair<RIGNode> pair = this.getEndpoints(edge);
846 int i_resser = pair.getFirst().getResidueSerial();
847 int j_resser = pair.getSecond().getResidueSerial();
848 // EdgeType enum should copy correctly because enums are treated as ints in copying (always deep copied)
849 newGraph.addEdge(edge.copy(), newGraph.getNodeFromSerial(i_resser), newGraph.getNodeFromSerial(j_resser), this.getEdgeType(edge));
850 }
851
852 // copying the SecondaryStructure object by retrieving all references from the new nodes
853 SecondaryStructure secStruct = new SecondaryStructure();
854 for (RIGNode node:newGraph.getVertices()) {
855 SecStrucElement sselem = node.getSecStrucElement();
856 if (sselem!=null && !secStruct.contains(sselem)) {
857 secStruct.add(sselem);
858 }
859 }
860 newGraph.setSecondaryStructure(secStruct);
861
862 return newGraph;
863 }
864 }