ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 425
Committed: Fri Nov 23 17:55:51 2007 UTC (17 years, 3 months ago) by duarte
File size: 41734 byte(s)
Log Message:
Fixed bug in Graph constructor from sadp's ContactMap
Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 191
3 filippis 407 import java.io.File;
4 duarte 123 import java.io.FileOutputStream;
5     import java.io.PrintStream;
6     import java.io.IOException;
7 duarte 279 import java.util.Collections;
8 lpetzo 372 import java.util.Iterator;
9 duarte 284 import java.util.Locale;
10 duarte 388 import java.util.Set;
11 duarte 129 import java.util.TreeMap;
12 duarte 189 import java.util.HashMap;
13 duarte 123
14 duarte 260 import java.sql.SQLException;
15     import java.sql.Statement;
16    
17     import java.sql.ResultSet;
18    
19 lpetzo 372 import sadp.ContactMap;
20 duarte 260 import tools.MySQLConnection;
21    
22 duarte 207 /**
23     * A residue interaction graph derived from a single chain pdb protein structure
24     *
25     */
26 duarte 123 public class Graph {
27    
28 duarte 144 public final static String GRAPHFILEFORMATVERSION = "1.0";
29 duarte 206
30 stehr 274 private final static String SINGLEMODELS_DB = "ioannis"; //TODO: Is this being used??
31 duarte 260
32 duarte 234 public EdgeSet contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it)
33 duarte 123
34 duarte 207 protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code)
35 stehr 274 protected SecondaryStructure secondaryStructure; // secondary structure annotation for this protein graph
36 duarte 284
37 duarte 207 protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X')
38     protected String pdbCode;
39     protected String chainCode;
40 duarte 208 protected String pdbChainCode;
41 duarte 260 protected int model;
42 duarte 207 protected double cutoff;
43     protected String ct; // the contact type
44 duarte 208 protected boolean directed;
45 filippis 407 protected int minSeqSep = -1;
46     protected int maxSeqSep = -1;
47 duarte 207
48 duarte 159 // fullLength is length of full sequence or:
49     // -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain
50     // -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain
51 duarte 207 protected int fullLength;
52     protected int obsLength; // length without unobserved, non standard aas
53 duarte 159
54 duarte 207 protected int numContacts;
55 duarte 159
56 duarte 207 protected boolean modified;
57 stehr 337
58     /**
59     * Constructs an empty graph object
60     */
61 duarte 207 public Graph() {
62 stehr 337 this.contacts=new EdgeSet();
63     this.cutoff=0;
64     this.nodes=new TreeMap<Integer,String>();
65     this.sequence=null;
66     this.pdbCode=null;
67     this.chainCode=null;
68     this.pdbChainCode=null;
69     this.model=1;
70     this.ct=null;
71     this.fullLength=0;
72     this.obsLength=0;
73     this.numContacts=0;
74     this.modified=false;
75     this.directed=false;
76     this.secondaryStructure = new SecondaryStructure();
77 duarte 207 }
78 stehr 337
79     /**
80     * Constructs a graph with a sequence but no edges
81     */
82     public Graph(String sequence) {
83     this.contacts=new EdgeSet();
84     this.cutoff=0;
85     this.nodes=new TreeMap<Integer,String>();
86     for(int i=0; i < sequence.length(); i++) {
87 stehr 377 nodes.put(i+1, AAinfo.oneletter2threeletter(Character.toString(sequence.charAt(i))));
88 stehr 337 }
89     this.sequence=sequence;
90     this.pdbCode=null;
91     this.chainCode=null;
92     this.pdbChainCode=null;
93     this.model=1;
94     this.ct=null;
95     this.fullLength=sequence.length();
96     this.obsLength=fullLength;
97     this.numContacts=0;
98     this.modified=false;
99     this.directed=false;
100     this.secondaryStructure = new SecondaryStructure();
101     }
102 duarte 374
103 duarte 134 /**
104 duarte 374 * Constructs Graph object by passing EdgeSet with contacts and TreeMap with nodes (res serials and types)
105 duarte 206 * Must also pass contact type, cutoff, pdbCode and chainCode
106 duarte 134 * @param contacts
107     * @param nodes
108     * @param sequence
109     * @param cutoff
110     * @param ct
111 duarte 206 * @param pdbCode
112     * @param chainCode
113 duarte 260 * @param pdbChainCode
114     * @param model
115 duarte 374 * @param secStruct
116 duarte 134 */
117 duarte 374 public Graph (EdgeSet contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode, int model, SecondaryStructure secStruct) {
118 duarte 123 this.contacts=contacts;
119     this.cutoff=cutoff;
120 duarte 129 this.nodes=nodes;
121     this.sequence=sequence;
122 duarte 206 this.pdbCode=pdbCode;
123     this.chainCode=chainCode;
124     this.pdbChainCode=pdbChainCode;
125 duarte 260 this.model=model;
126 duarte 123 this.ct=ct;
127 duarte 279 // in case of pdb was read from file and there was no SEQRES field then fullLength here shouldn't be sequence length but maximum observed residue (see Pdb class)
128     this.fullLength=Math.max(sequence.length(),Collections.max(nodes.keySet()));
129 duarte 159 this.obsLength=nodes.size();
130     this.numContacts=contacts.size();
131 duarte 175 this.modified=false;
132 duarte 208 this.directed=false;
133 duarte 129 if (ct.contains("/")){
134     directed=true;
135     }
136 stehr 274 if(secStruct == null) {
137     // we allow null to be passed to simplify graph construction
138     this.secondaryStructure = new SecondaryStructure();
139     } else {
140     this.secondaryStructure = secStruct;
141     }
142 stehr 217
143 stehr 274 // do some verification checks
144     assert(secondaryStructure != null);
145 stehr 217 assert(this.pdbCode.equals(this.pdbCode.toLowerCase())); // pdb codes should be always lower case
146 duarte 123 }
147 duarte 135
148 duarte 260 /**
149 lpetzo 372 * Constructs a Graph from a sadp.ContactMap.
150     * TODO: This constructor might produce erroneous Graph instances.
151     *
152     * @param cm the ContactMap
153     * @param sequence
154     * @param cutoff
155     * @param ct
156     * @param pdbCode
157     * @param chainCode
158     * @param pdbChainCode
159     * @param model
160     * @param secStruct
161     */
162     public Graph( ContactMap cm, String sequence, double cutoff, String ct, String pdbCode, String chainCode, String pdbChainCode, int model, SecondaryStructure secStruct) {
163    
164 duarte 425 EdgeSet contacts = new EdgeSet();
165     int[][] adjList = cm.getAdjacencyList();
166    
167     // construct the contacts
168     for( int i=0; i<adjList.length; ++i ) {
169     for( int j=0; j<adjList[i].length; ++j ) {
170     // store contacts only once for i < j
171     if( i<adjList[i][j] ) {
172     contacts.add(new Edge(i+1,adjList[i][j]+1));
173     }
174     }
175 lpetzo 372 }
176 duarte 425
177     TreeMap<Integer, String> nodes = new TreeMap<Integer, String>();
178    
179     // construct the mapping from node index to 3-lettercode
180     if( sequence != null ) {
181     for( int i=0; i<sequence.length(); ++i ) {
182     nodes.put(i+1,AAinfo.oneletter2threeletter(sequence.substring(i,i)));
183     }
184     } else {
185     // as there is no sequence information available we assign the
186     // non-defined unknown amino acid 'Xaa' to every residue
187     for( int i=0; i<cm.countNodes(); ++i ) {
188     nodes.put(i+1, "Xaa");
189     }
190 lpetzo 372 }
191    
192     // construct dummy sequence if necessary
193     if( sequence == null ) {
194     StringBuffer sb = new StringBuffer(cm.countNodes());
195     for( int i=0; i<sb.capacity(); ++i ) {
196     sb.append('X');
197     }
198     sequence = sb.toString();
199     }
200    
201     this.contacts=contacts;
202    
203     this.cutoff=cutoff;
204     this.nodes=nodes;
205     this.sequence=sequence;
206     this.pdbCode = pdbCode == null ? "" : pdbCode;
207     this.chainCode = chainCode == null ? "" : chainCode;
208     this.pdbChainCode = pdbChainCode == null ? "" : pdbChainCode;
209     this.model=model;
210     this.ct=ct;
211     // in case of pdb was read from file and there was no SEQRES field then fullLength here shouldn't be sequence length but maximum observed residue (see Pdb class)
212     this.fullLength=Math.max(sequence.length(),Collections.max(nodes.keySet()));
213     this.obsLength=nodes.size();
214     this.numContacts=contacts.size();
215     this.modified=false;
216     this.directed=false;
217     if (ct.contains("/")){
218     directed=true;
219     }
220     if(secStruct == null) {
221     // we allow null to be passed to simplify graph construction
222     this.secondaryStructure = new SecondaryStructure();
223     } else {
224     this.secondaryStructure = secStruct;
225     }
226    
227     // do some verification checks
228     assert(secondaryStructure != null);
229     assert(this.pdbCode.equals(this.pdbCode.toLowerCase())); // pdb codes should be always lower case
230     assert(this.pdbChainCode.equals(this.pdbChainCode.toUpperCase())); // pdb chain codes should be always upper case
231     }
232    
233     /**
234 duarte 260 * Write graph to given db, using our db graph aglappe format,
235     * i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge
236     * @param conn
237     * @param db
238     * @throws SQLException
239     */
240     public void write_graph_to_db(MySQLConnection conn, String db) throws SQLException{
241 filippis 407
242     conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL");
243    
244 duarte 260 // we are fixing these 3 values to what corresponds to our graphs
245     String CW = "1";
246     String CR = "(true)";
247     String EXPBB = "0";
248 filippis 407 String ctStr = ct;
249     String weightedStr = "0";
250     String directedStr = directed?"1":"0";
251 duarte 135
252 filippis 407 if (ct.endsWith("_CAGLY")) {
253     ctStr = ct.replace("_CAGLY", "");
254     }
255     if (ctStr.equals("ALL")) {
256     ctStr = "BB+SC+BB/SC";
257     }
258     if (AAinfo.isValidMultiAtomContactType(ct)) {
259     CW = ctStr;
260     weightedStr = "1";
261     }
262     if (ct.endsWith("_CAGLY") || ct.equals("Cb")) {
263     EXPBB = "-1";
264     }
265     if (minSeqSep != -1) {
266     CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))";
267     }
268    
269 duarte 260 int pgraphid=0;
270     int graphid=0;
271     String sql = "SELECT graph_id FROM "+db+".chain_graph " +
272 filippis 407 " WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" +
273     " AND model_serial = "+model+" AND dist = "+cutoff+" AND expBB = '"+EXPBB+"'" +
274     " AND method = 'rc-cutoff';";
275 duarte 260 Statement stmt = conn.createStatement();
276     ResultSet rsst = stmt.executeQuery(sql);
277     if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid
278     pgraphid = rsst.getInt(1);
279     } else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid)
280     // we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment)
281 duarte 287 String pdbChainCodeStr = pdbChainCode;
282     if (!pdbChainCode.equals("NULL")) {
283     pdbChainCodeStr="'"+pdbChainCode+"'";
284     }
285     sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " +
286     "VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+cutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())";
287 duarte 260 Statement stmt2 = conn.createStatement();
288     stmt2.executeUpdate(sql);
289     // now we take the newly assigned graph_id as pgraphid
290     sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1";
291     ResultSet rsst2 = stmt2.executeQuery(sql);
292     if (rsst2.next()){
293     pgraphid = rsst2.getInt(1);
294     }
295     stmt2.close();
296     rsst2.close();
297     }
298     rsst.close();
299     // now we insert the graph info into single_model_graph
300     // 1st we grab the single_model_id
301     int singlemodelid = 0;
302 filippis 407 sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+
303     " dist="+cutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';";
304 duarte 260 rsst = stmt.executeQuery(sql);
305     if (rsst.next()){
306     singlemodelid = rsst.getInt(1);
307     }
308     rsst.close();
309     // and then insert to single_model_graph
310 filippis 407 sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " +
311     " VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+cutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())";
312 duarte 260 stmt.executeUpdate(sql);
313     // and we grab the graph_id just assigned in single_model_graph
314     sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1";
315     rsst = stmt.executeQuery(sql);
316     if (rsst.next()){
317     graphid = rsst.getInt(1);
318     }
319     rsst.close();
320     stmt.close();
321 filippis 407
322     // inserting nodes
323     // get the max node in db
324     int maxNodeId = 0;
325     sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;";
326     stmt = conn.createStatement();
327     rsst = stmt.executeQuery(sql);
328     if (rsst.next()){
329     maxNodeId = rsst.getInt(1);
330     }
331     rsst.close();
332     stmt.close();
333    
334     stmt = conn.createStatement();
335     for (int resser:nodes.keySet()) {
336     String res = AAinfo.threeletter2oneletter(getResType(resser));
337     NodeNbh nbh = getNodeNbh(resser);
338     String secStructType = null;
339     String secStructId = null;
340     String sheetSerial = null;
341     String turn = null;
342     if (secondaryStructure.getSecStrucElement(resser)!=null){
343     secStructType = quote(Character.toString(secondaryStructure.getSecStrucElement(resser).getType()));
344     secStructId = quote(secondaryStructure.getSecStrucElement(resser).getId());
345     char sheetSerialChar = secondaryStructure.getSecStrucElement(resser).getSheetSerial();
346     if (sheetSerialChar != 0) {
347     sheetSerial = quote(Character.toString(sheetSerialChar));
348     }
349     turn = secondaryStructure.getSecStrucElement(resser).isTurn()?"1":"0";
350     }
351     if (directed){ // we insert k(=k_in+k_out), k_in and k_out
352     sql = "INSERT INTO "+db+".single_model_node "+
353     " (graph_id, node_id, cid, num, res, "+
354     " sstype, ssid, sheet_serial, turn, "+
355     " k, k_in, k_out, "+
356     " n, nwg, n_num) " +
357     " VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+
358     " "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+
359     (getInDegree(resser)+getOutDegree(resser))+", "+getInDegree(resser)+", "+getOutDegree(resser)+", "+
360     "'"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')";
361     } else { // we insert k (and no k_in or k_out)
362     sql = "INSERT INTO "+db+".single_model_node "+
363     " (graph_id, node_id, cid, num, res, "+
364     " sstype, ssid, sheet_serial, turn, "+
365     " k, n, nwg, n_num) " +
366     " VALUES ("+graphid+", "+(maxNodeId+resser)+", '"+chainCode+"', "+resser+", '"+res+"', "+
367     " "+secStructType+", "+secStructId+", "+sheetSerial+", "+turn+", "+
368     getDegree(resser)+", '"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')";
369     }
370     stmt.executeUpdate(sql);
371     }
372    
373 duarte 260 // inserting edges
374 filippis 407 // get the max weight
375     double maxWeight = 0;
376     for (Edge cont:contacts) {
377     maxWeight = (maxWeight<cont.weight)?cont.weight:maxWeight;
378     }
379 duarte 260 for (Edge cont:contacts){
380 duarte 326 String i_res = AAinfo.threeletter2oneletter(getResType(cont.i));
381     String j_res = AAinfo.threeletter2oneletter(getResType(cont.j));
382 filippis 407
383     String i_secStructType = null;
384     String i_secStructId = null;
385     String i_sheetSerial = null;
386     String i_turn = null;
387 duarte 276 if (secondaryStructure.getSecStrucElement(cont.i)!=null){
388 filippis 407 i_secStructType = quote(Character.toString(secondaryStructure.getSecStrucElement(cont.i).getType()));
389     i_secStructId = quote(secondaryStructure.getSecStrucElement(cont.i).getId());
390     char sheetSerialChar = secondaryStructure.getSecStrucElement(cont.i).getSheetSerial();
391     if (sheetSerialChar != 0) {
392     i_sheetSerial = quote(Character.toString(sheetSerialChar));
393     }
394     i_turn = secondaryStructure.getSecStrucElement(cont.i).isTurn()?"1":"0";
395 duarte 276 }
396 filippis 407
397     String j_secStructType = null;
398     String j_secStructId = null;
399     String j_sheetSerial = null;
400     String j_turn = null;
401 duarte 276 if (secondaryStructure.getSecStrucElement(cont.j)!=null){
402 filippis 407 j_secStructType = quote(Character.toString(secondaryStructure.getSecStrucElement(cont.j).getType()));
403     j_secStructId = quote(secondaryStructure.getSecStrucElement(cont.j).getId());
404     char sheetSerialChar = secondaryStructure.getSecStrucElement(cont.j).getSheetSerial();
405     if (sheetSerialChar != 0) {
406     j_sheetSerial = quote(Character.toString(sheetSerialChar));
407     }
408     j_turn = secondaryStructure.getSecStrucElement(cont.j).isTurn()?"1":"0";
409 duarte 276 }
410 filippis 407
411     sql = "INSERT INTO "+db+".single_model_edge "+
412     " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
413     " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " +
414     " VALUES ("+graphid+", "+(maxNodeId+cont.i)+", '"+chainCode+"', "+cont.i+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+
415     (maxNodeId+cont.j)+", '"+chainCode+"', "+cont.j+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+
416     Math.round(cont.weight)+", "+(cont.weight/maxWeight)+")";
417 duarte 260 stmt.executeUpdate(sql);
418 filippis 407 if(!directed) {// we want both side of the matrix in the table to follow Ioannis' convention
419     // so we insert the reverse contact by swapping i, j in insertion
420     sql = "INSERT INTO "+db+".single_model_edge "+
421     " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
422     " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight) " +
423     " VALUES ("+graphid+", "+(maxNodeId+cont.j)+", '"+chainCode+"', "+cont.j+", '"+j_res+"', "+j_secStructType+", "+j_secStructId+", "+j_sheetSerial+", "+j_turn+", "+
424     (maxNodeId+cont.i)+", '"+chainCode+"', "+cont.i+", '"+i_res+"', "+i_secStructType+", "+i_secStructId+", "+i_sheetSerial+", "+i_turn+", "+
425     Math.round(cont.weight)+", "+(cont.weight/maxWeight)+")";
426 duarte 260 stmt.executeUpdate(sql);
427     }
428     }
429 filippis 407
430     stmt.close();
431     }
432    
433     /**
434     * Write graph to given db, using our db graph aglappe format,
435     * i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge
436     * @param conn
437     * @param db
438     * @throws SQLException
439     */
440     public void write_graph_to_db_fast(MySQLConnection conn, String db) throws SQLException, IOException {
441    
442     conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL");
443    
444     // we are fixing these 3 values to what corresponds to our graphs
445     String CW = "1";
446     String CR = "(true)";
447     String EXPBB = "0";
448     String ctStr = ct;
449     String weightedStr = "0";
450     String directedStr = directed?"1":"0";
451    
452     if (ct.endsWith("_CAGLY")) {
453     ctStr = ct.replace("_CAGLY", "");
454     }
455     if (ctStr.equals("ALL")) {
456     ctStr = "BB+SC+BB/SC";
457     }
458     if (AAinfo.isValidMultiAtomContactType(ct)) {
459     CW = ctStr;
460     weightedStr = "1";
461     }
462     if (ct.endsWith("_CAGLY") || ct.equals("Cb")) {
463     EXPBB = "-1";
464     }
465     if (minSeqSep != -1) {
466     CR = "((i_cid!=j_cid)OR(abs(i_num-j_num)>="+minSeqSep+"))";
467     }
468    
469     int pgraphid=0;
470     int graphid=0;
471     String sql = "SELECT graph_id FROM "+db+".chain_graph " +
472     " WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"'" +
473     " AND model_serial = "+model+" AND dist = "+cutoff+" AND expBB = '"+EXPBB+"'" +
474     " AND method = 'rc-cutoff';";
475     Statement stmt = conn.createStatement();
476     ResultSet rsst = stmt.executeQuery(sql);
477     if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid
478     pgraphid = rsst.getInt(1);
479     } else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid)
480     // we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment)
481     String pdbChainCodeStr = pdbChainCode;
482     if (!pdbChainCode.equals("NULL")) {
483     pdbChainCodeStr="'"+pdbChainCode+"'";
484     }
485     sql = "INSERT INTO "+db+".chain_graph (accession_code,chain_pdb_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " +
486     "VALUES ('"+pdbCode+"', "+pdbChainCodeStr+",'"+chainCode+"', "+model+", "+cutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())";
487     Statement stmt2 = conn.createStatement();
488     stmt2.executeUpdate(sql);
489     // now we take the newly assigned graph_id as pgraphid
490     sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1";
491     ResultSet rsst2 = stmt2.executeQuery(sql);
492     if (rsst2.next()){
493     pgraphid = rsst2.getInt(1);
494     }
495     stmt2.close();
496     rsst2.close();
497     }
498     rsst.close();
499     // now we insert the graph info into single_model_graph
500     // 1st we grab the single_model_id
501     int singlemodelid = 0;
502     sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE "+
503     " dist="+cutoff+" AND expBB="+EXPBB+" AND CW='"+CW+"' AND CT='"+ctStr+"' AND CR='"+CR+"';";
504     rsst = stmt.executeQuery(sql);
505     if (rsst.next()){
506     singlemodelid = rsst.getInt(1);
507     }
508     rsst.close();
509     // and then insert to single_model_graph
510     sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,w,d,num_nodes,date) " +
511     " VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+cutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+weightedStr+", "+directedStr+", "+getObsLength()+", now())";
512     stmt.executeUpdate(sql);
513     // and we grab the graph_id just assigned in single_model_graph
514     sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1";
515     rsst = stmt.executeQuery(sql);
516     if (rsst.next()){
517     graphid = rsst.getInt(1);
518     }
519     rsst.close();
520     stmt.close();
521    
522 duarte 260 // inserting nodes
523 filippis 407 PrintStream nodesOut = new PrintStream(new FileOutputStream(graphid+"_nodes.txt"));
524     // get the max node in db
525     int maxNodeId = 0;
526     sql = "SELECT MAX(node_id) FROM "+db+".single_model_node;";
527     stmt = conn.createStatement();
528     rsst = stmt.executeQuery(sql);
529     if (rsst.next()){
530     maxNodeId = rsst.getInt(1);
531     }
532     rsst.close();
533     stmt.close();
534    
535 duarte 260 for (int resser:nodes.keySet()) {
536 duarte 326 String res = AAinfo.threeletter2oneletter(getResType(resser));
537 duarte 260 NodeNbh nbh = getNodeNbh(resser);
538 filippis 407 String secStructType = "\\N";
539     String secStructId = "\\N";
540     String sheetSerial = "\\N";
541     String turn = null;
542 duarte 276 if (secondaryStructure.getSecStrucElement(resser)!=null){
543 filippis 407 secStructType = Character.toString(secondaryStructure.getSecStrucElement(resser).getType());
544     secStructId = secondaryStructure.getSecStrucElement(resser).getId();
545     char sheetSerialChar = secondaryStructure.getSecStrucElement(resser).getSheetSerial();
546     if (sheetSerialChar != 0) {
547     sheetSerial = Character.toString(sheetSerialChar);
548     }
549     turn = secondaryStructure.getSecStrucElement(resser).isTurn()?"1":"0";
550 duarte 276 }
551 filippis 407 if (directed){ // we insert k(=k_in+k_out), k_in and k_out
552     nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+
553     secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+
554     (getInDegree(resser)+getOutDegree(resser))+"\t"+getInDegree(resser)+"\t"+getOutDegree(resser)+"\t"+
555     nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials());
556 duarte 260 } else { // we insert k (and no k_in or k_out)
557 filippis 407 nodesOut.println(graphid+"\t"+(maxNodeId+resser)+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+
558     secStructType+"\t"+secStructId+"\t"+sheetSerial+"\t"+turn+"\t"+
559     getDegree(resser)+"\t"+"\\N"+"\t"+"\\N"+"\t"+
560     nbh.getMotifNoGaps()+"\t"+nbh.getMotif()+"\t"+nbh.getCommaSeparatedResSerials());
561 duarte 260 }
562     }
563 filippis 407 nodesOut.close();
564     stmt = conn.createStatement();
565     sql = "LOAD DATA LOCAL INFILE '"+graphid+"_nodes.txt' INTO TABLE "+db+".single_model_node "+
566     " (graph_id, node_id, cid, num, res, "+
567     " sstype, ssid, sheet_serial, turn, "+
568     " k, k_in, k_out, n, nwg, n_num);";
569     stmt.executeUpdate(sql);
570     File fileToDelete = new File(graphid+"_nodes.txt");
571     if (fileToDelete.exists()) {
572     fileToDelete.delete();
573     }
574    
575     // inserting edges
576     PrintStream edgesOut = new PrintStream(new FileOutputStream(graphid+"_edges.txt"));
577     // get the max weight
578     double maxWeight = 0;
579     for (Edge cont:contacts) {
580     maxWeight = (maxWeight<cont.weight)?cont.weight:maxWeight;
581     }
582     for (Edge cont:contacts){
583     String i_res = AAinfo.threeletter2oneletter(getResType(cont.i));
584     String j_res = AAinfo.threeletter2oneletter(getResType(cont.j));
585    
586     String i_secStructType = "\\N";
587     String i_secStructId = "\\N";
588     String i_sheetSerial = "\\N";
589     String i_turn = null;
590     if (secondaryStructure.getSecStrucElement(cont.i)!=null){
591     i_secStructType = Character.toString(secondaryStructure.getSecStrucElement(cont.i).getType());
592     i_secStructId = secondaryStructure.getSecStrucElement(cont.i).getId();
593     char sheetSerialChar = secondaryStructure.getSecStrucElement(cont.i).getSheetSerial();
594     if (sheetSerialChar != 0) {
595     i_sheetSerial = Character.toString(sheetSerialChar);
596     }
597     i_turn = secondaryStructure.getSecStrucElement(cont.i).isTurn()?"1":"0";
598     }
599    
600     String j_secStructType = "\\N";
601     String j_secStructId = "\\N";
602     String j_sheetSerial = "\\N";
603     String j_turn = null;
604     if (secondaryStructure.getSecStrucElement(cont.j)!=null){
605     j_secStructType = Character.toString(secondaryStructure.getSecStrucElement(cont.j).getType());
606     j_secStructId = secondaryStructure.getSecStrucElement(cont.j).getId();
607     char sheetSerialChar = secondaryStructure.getSecStrucElement(cont.j).getSheetSerial();
608     if (sheetSerialChar != 0) {
609     j_sheetSerial = Character.toString(sheetSerialChar);
610     }
611     j_turn = secondaryStructure.getSecStrucElement(cont.j).isTurn()?"1":"0";
612     }
613    
614     edgesOut.println(graphid+"\t"+(maxNodeId+cont.i)+"\t"+chainCode+"\t"+cont.i+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+
615     (maxNodeId+cont.j)+"\t"+chainCode+"\t"+cont.j+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+
616     Math.round(cont.weight)+"\t"+(cont.weight/maxWeight));
617     if(!directed) {// we want both side of the matrix in the table to follow Ioannis' convention
618     // so we insert the reverse contact by swapping i, j in insertion
619     edgesOut.println(graphid+"\t"+(maxNodeId+cont.j)+"\t"+chainCode+"\t"+cont.j+"\t"+j_res+"\t"+j_secStructType+"\t"+j_secStructId+"\t"+j_sheetSerial+"\t"+j_turn+"\t"+
620     (maxNodeId+cont.i)+"\t"+chainCode+"\t"+cont.i+"\t"+i_res+"\t"+i_secStructType+"\t"+i_secStructId+"\t"+i_sheetSerial+"\t"+i_turn+"\t"+
621     Math.round(cont.weight)+"\t"+(cont.weight/maxWeight));
622     }
623     }
624     edgesOut.close();
625     sql = "LOAD DATA LOCAL INFILE '"+graphid+"_edges.txt' INTO TABLE "+db+".single_model_edge "+
626     " (graph_id, i_node_id, i_cid, i_num, i_res, i_sstype, i_ssid, i_sheet_serial, i_turn, "+
627     " j_node_id, j_cid, j_num, j_res, j_sstype, j_ssid, j_sheet_serial, j_turn, weight, norm_weight);";
628     stmt.executeUpdate(sql);
629 duarte 260 stmt.close();
630 filippis 407 fileToDelete = new File(graphid+"_edges.txt");
631     if (fileToDelete.exists()) {
632     fileToDelete.delete();
633     }
634 duarte 260 }
635 filippis 407
636 duarte 260 /**
637     * Write graph to given outfile in aglappe format
638     * @param outfile
639     * @throws IOException
640     */
641 duarte 144 public void write_graph_to_file (String outfile) throws IOException {
642     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
643 duarte 208 Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION);
644 duarte 144 Out.println("#SEQUENCE: "+sequence);
645 duarte 206 Out.println("#PDB: "+pdbCode);
646     Out.println("#PDB CHAIN CODE: "+pdbChainCode);
647     Out.println("#CHAIN: "+chainCode);
648 duarte 144 Out.println("#CT: "+ct);
649     Out.println("#CUTOFF: "+cutoff);
650 duarte 234 for (Edge pair:contacts){
651 duarte 144 int i_resser=pair.i;
652     int j_resser=pair.j;
653 duarte 361 double weight=pair.weight;
654 duarte 284 Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t%6.3f\n",weight);
655 duarte 144 }
656     Out.close();
657     }
658 duarte 175
659 duarte 159 /**
660 filippis 363 * Write graph to given outfile in network(Ioannis) format
661     * @param graphId
662     * @param dirName
663     * @throws IOException
664     */
665     public void writeUndirUnweightGraphToNetworkFiles (int graphId, String dirName) throws IOException {
666     if (directed) {
667     System.err.println("This method is only for undirected graphs!");
668     return;
669     }
670    
671     String filePrefix = dirName + "/" + String.valueOf(graphId)+"_"+pdbCode+"_"+chainCode+"_"+ct.replaceAll("/", ".")+"_"+String.valueOf(cutoff).replaceAll("\\.", "_")+"_";
672     PrintStream Out = new PrintStream(new FileOutputStream(filePrefix+"edges.txt"));
673     for (Edge pair:contacts){
674     int i_resser=pair.i;
675     int j_resser=pair.j;
676     if (i_resser < j_resser) {
677     Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t"+1+"\t"+"1.000"+"\n");
678     }
679     }
680     Out.close();
681     Out = new PrintStream(new FileOutputStream(filePrefix+"nodes.txt"));
682     for (int resser:nodes.keySet()) {
683     String res = AAinfo.threeletter2oneletter(getResType(resser));
684     Out.printf(Locale.US,resser+"\t"+chainCode+"\t"+resser+"\t"+res+"\t"+getDegree(resser)+"\t"+getDegree(resser)+"\n");
685    
686     }
687     Out.close();
688     }
689    
690    
691     /**
692 duarte 234 * Gets list of contacts as a new EdgeSet (deep copied)
693 duarte 175 *
694 duarte 374 */
695 duarte 234 public EdgeSet getContacts(){
696 duarte 374 return this.contacts.copy();
697 duarte 175 }
698    
699     /**
700 lpetzo 372 * Checks if the graphs has the given edge.
701     *
702     * @return false if there is no such contact, else true
703     * */
704     public boolean hasContact( Edge e ) {
705    
706     return contacts.contains(e);
707     }
708    
709     /**
710     * Gets iterator on the set of edges/contacts.
711     *
712     * @return contact iterator
713     * */
714     public Iterator<Edge> getContactIterator() {
715    
716     return contacts.iterator();
717     }
718    
719     /**
720 duarte 175 * Gets TreeMap of nodes, deep copying
721     *
722     */
723     public TreeMap<Integer,String> getNodes(){
724     TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>();
725     for (int resser:nodes.keySet()){
726     newNodes.put(resser, nodes.get(resser));
727     }
728     return newNodes;
729     }
730    
731     /**
732     * Deep copies this Graph object returning new one
733     * @return
734     */
735     public Graph copy(){
736 duarte 361 return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model,secondaryStructure.copy());
737 duarte 175 }
738    
739     /**
740 duarte 159 * Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map
741     * In non-crossed cases this should give us the upper half matrix (contacts are only j>i)
742     * In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed)
743     * @return
744     */
745     public int[][] getIntMatrix(){
746     // this initialises the matrix to 0 (i.e. no contact)
747     int[][] cm = new int[fullLength][fullLength];
748     // we put a 1 for all given contacts
749 duarte 234 for (Edge cont:contacts){
750 duarte 159 int i_resser = cont.i;
751     int j_resser = cont.j;
752     cm[i_resser-1][j_resser-1]=1;
753 duarte 129 }
754     return cm;
755     }
756 duarte 159
757 duarte 165 /**
758 duarte 179 * Gets a node's residue type given the residue serial
759     * @param resser
760     * @return
761     */
762     public String getResType(int resser){
763     return nodes.get(resser);
764     }
765    
766     /**
767 duarte 165 * Gets node neighbourhood given a residue serial
768     * @param resser
769     * @return
770     */
771 duarte 179 public NodeNbh getNodeNbh(int resser){
772     NodeNbh nbh = new NodeNbh(resser, getResType(resser));
773 duarte 165 //this could be implemented using the contact map matrix and scanning through 1 column/row
774     //it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations
775 duarte 179 //however we would then have the overhead of creating the matrix
776 duarte 234 for (Edge cont:contacts){
777 duarte 165 if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j));
778     if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i));
779     }
780     return nbh;
781     }
782    
783     /**
784     * Gets edge neighbourhood (common neighbourhood) given a residue serial pair
785     * @param i_resser
786     * @param j_resser
787     * @return
788     */
789 duarte 179 public EdgeNbh getEdgeNbh(int i_resser, int j_resser){
790     EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser));
791     NodeNbh i_nbhd = getNodeNbh(i_resser);
792     NodeNbh j_nbhd = getNodeNbh(j_resser);
793 duarte 175 if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap
794     for (int resser:i_nbhd.keySet()) {
795     if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser));
796     }
797     } else {
798     for (int resser:j_nbhd.keySet()) {
799     if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser));
800     }
801 duarte 165 }
802     return nbh;
803     }
804 duarte 232
805     /**
806     * Gets 2nd shell node neighbourhood
807     * @param resser
808     */
809     public NodeNbh get2ndshellNodeNbh(int resser){
810     // first we create a NodeNbh object for the second shell, central residue is given resser
811     NodeNbh nbh2ndshell = new NodeNbh(resser,getResType(resser));
812     // we get 1st neighbourhood
813     NodeNbh nbh = this.getNodeNbh(resser);
814     for (int nb:nbh.keySet()){
815     NodeNbh nbh2 = this.getNodeNbh(nb); // for each first neighbour we take its neighbourhood
816     for (int nb2:nbh2.keySet()){
817     if (nb2!=resser && !nbh.containsKey(nb2)){ // if the 2nd neighbour nb2 is not the given resser or is not a 1st neighbour
818     nbh2ndshell.put(nb2, getResType(nb2));
819     }
820     }
821     }
822     return nbh2ndshell;
823     }
824    
825 duarte 234 public void addEdge(Edge cont){
826 duarte 240 if (!directed && cont.i>cont.j){
827 duarte 239 // we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i
828     // if we added an edge i>j it could happen that the edge was already there but inverted and wouldn't be detected as a duplicate
829 stehr 337 cont = new Edge(cont.j,cont.i);
830 duarte 239 }
831 duarte 234 contacts.add(cont); // contacts is a TreeSet and thus takes care of duplicates
832     int oldNumContacts = numContacts;
833     numContacts=getNumContacts();
834     // if number of contacts changed that means we actually added a new contact and thus we modified the graph
835     if (numContacts!=oldNumContacts) modified=true;
836    
837 duarte 175 }
838    
839 duarte 234 public void delEdge(Edge cont){
840 duarte 240 if (!directed && cont.i>cont.j){
841     // we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i
842     // if we try to delete an edge i>j it won't be there, we have to invert it and then try to delete
843     cont = new Edge(cont.j,cont.i);
844     }
845 duarte 175 contacts.remove(cont);
846 duarte 240 int oldNumContacts = numContacts;
847     numContacts=getNumContacts();
848     // if number of contacts changed that means we actually added a new contact and thus we modified the graph
849     if (numContacts!=oldNumContacts) modified=true;
850 duarte 175 }
851    
852     public void restrictContactsToMaxRange(int range){
853 duarte 234 EdgeSet edgesToDelete = new EdgeSet();
854     for (Edge cont:contacts){
855 duarte 179 if (cont.getRange()>range) edgesToDelete.add(cont);
856 duarte 175 }
857 duarte 234 for (Edge cont:edgesToDelete){
858 duarte 179 delEdge(cont);
859     }
860 filippis 407 maxSeqSep = range;
861 duarte 175 }
862    
863     public void restrictContactsToMinRange(int range){
864 duarte 234 EdgeSet edgesToDelete = new EdgeSet();
865     for (Edge cont:contacts){
866 duarte 179 if (cont.getRange()<range) edgesToDelete.add(cont);
867 duarte 175 }
868 duarte 234 for (Edge cont:edgesToDelete){
869 duarte 179 delEdge(cont);
870     }
871 filippis 407 minSeqSep = range;
872 duarte 175 }
873 duarte 189
874 duarte 191 /**
875     * Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map
876     * @return
877     */
878 duarte 234 public HashMap<Edge,Integer> getAllEdgeNbhSizes() {
879     HashMap<Edge,Integer> sizes = new HashMap<Edge, Integer>();
880 duarte 191 if (!directed) {
881     for (int i=1; i<fullLength;i++){
882     for (int j=i+1; j<fullLength;j++){
883     int size = getEdgeNbh(i, j).size();
884 duarte 234 if (size>0) sizes.put(new Edge(i,j), size);
885 duarte 191 }
886     }
887     } else {
888     for (int i=1; i<fullLength;i++){
889     for (int j=1; j<fullLength;j++){
890     if (i!=j){
891     int size = getEdgeNbh(i, j).size();
892 duarte 234 if (size>0) sizes.put(new Edge(i,j), size);
893 duarte 191 }
894     }
895     }
896     }
897     return sizes;
898     }
899    
900 duarte 189 //TODO not sure what kind of return we want, for now is a HashMap with three graph objects
901     public HashMap<String,Graph> compare(Graph other) throws Exception{
902     //first check that other has same sequence than this, otherwise throw exception
903 spriya 288 if (this.getFullLength()!=other.getFullLength()) {
904 duarte 189 //TODO throw specific exception
905 spriya 288 throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
906     } else {
907     for (int resser:this.nodes.keySet()){
908     String this_res = this.nodes.get(resser);
909     String other_res = other.nodes.get(resser);
910     if (!this_res.equals("X") && !other_res.equals("X") && !this_res.equals(other_res)) {
911     //TODO throw specific exception
912     throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
913     }
914     }
915 duarte 189 }
916 spriya 286
917 duarte 234 EdgeSet common = new EdgeSet();
918     EdgeSet onlythis = new EdgeSet();
919     EdgeSet onlyother = new EdgeSet();
920     for (Edge cont:this.contacts){
921 duarte 189 if (other.contacts.contains(cont)) {
922     common.add(cont);
923 spriya 286 } else {
924 duarte 189 onlythis.add(cont);
925     }
926     }
927 duarte 234 for (Edge cont:other.contacts){
928 duarte 189 if (!this.contacts.contains(cont)){
929     onlyother.add(cont);
930     }
931     }
932 duarte 361 Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model,secondaryStructure.copy());
933     Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model,secondaryStructure.copy());
934     Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode,model,secondaryStructure.copy());
935 duarte 189 HashMap<String,Graph> result = new HashMap<String,Graph>();
936     result.put("common", commongraph);
937     result.put("onlythis", onlythisgraph);
938     result.put("onlyother",onlyothergraph);
939     return result;
940     }
941 duarte 206
942     public boolean isModified(){
943     return modified;
944     }
945    
946     public boolean isDirected(){
947     return directed;
948     }
949    
950     public String getPdbCode() {
951     return pdbCode;
952     }
953    
954     public String getPdbChainCode(){
955     return pdbChainCode;
956     }
957    
958     public String getChainCode(){
959     return chainCode;
960     }
961    
962 lpetzo 372 public int getModel() {
963 duarte 374 return model;
964 lpetzo 372 }
965    
966 duarte 206 public String getSequence(){
967     return sequence;
968     }
969    
970 duarte 388 /**
971     * Sets the sequence of this graph to the given one.
972     * It will also set the nodes labels to the new sequence.
973     * At the moment there are no checks on the input, if sequence of
974     * diferrent length than current is passed there will be unexpected
975     * results
976     * @param sequence
977     */
978     public void setSequence(String sequence) {
979     if (sequence.length()!=this.fullLength) {
980     //TODO throw exception
981     System.err.println("Setting sequence to another sequence with different length. This can cause problems!");
982     }
983     this.sequence = sequence;
984     Set<Integer> allnodes = nodes.keySet();
985     for (int node:allnodes) {
986     nodes.put(node,AAinfo.oneletter2threeletter(Character.toString(sequence.charAt(node-1))));
987     }
988     }
989    
990 duarte 206 public int getFullLength(){
991     return fullLength;
992     }
993    
994     public int getObsLength(){
995     return obsLength;
996     }
997    
998     public int getNumContacts(){
999     // in theory we could return just numContacts, because we have taken care of updating it every time contacts changed
1000     // however we call directly contacts.size() as I feel is safer
1001     return contacts.size();
1002     }
1003    
1004 stehr 376 /**
1005     * Returns the contact type of this graph.
1006     * @return the contact type
1007     */
1008 duarte 206 public String getContactType() {
1009     return ct;
1010     }
1011    
1012 stehr 376 /**
1013     * Sets the contact type of this graph.
1014     * @param ct the contact type
1015     */
1016 duarte 374 public void setContactType(String ct) {
1017     this.ct=ct;
1018     }
1019    
1020 stehr 376 /**
1021     * Returns the distance cutoff for this graph.
1022     * @return the distance cutoff
1023     */
1024 duarte 206 public double getCutoff(){
1025     return cutoff;
1026     }
1027 duarte 249
1028 stehr 376 /**
1029     * Sets the distance cutoff for this graph.
1030     * @param distCutoff the distance cutoff
1031     */
1032     public void setCutoff(double distCutoff) {
1033 stehr 378 this.cutoff = distCutoff;
1034 stehr 376 }
1035    
1036 duarte 249 public boolean containsContact(Edge cont){
1037     // be careful with order, this checks strictly whether the cont.i, cont.j is given, strictly in that order!
1038     // in undirected case contacts are stored only in 1 direction (j>i) and thus if wrong order given it won't be found
1039     return contacts.contains(cont);
1040     }
1041    
1042     public void resetContacts(){
1043     this.contacts = new EdgeSet();
1044     }
1045 duarte 260
1046     public int getDegree(int resser){
1047     if (directed) {
1048     System.err.println("Can't get degree for a directed graph, only in or out degree");
1049     return 0;
1050     }
1051     int k = 0;
1052     for (Edge cont:contacts){
1053     if (cont.i==resser || cont.j==resser) {
1054     k++;
1055     }
1056     }
1057     return k;
1058     }
1059    
1060     public int getInDegree(int resser){
1061     if (!directed){
1062     System.err.println("Can't get in degree for an undirected graph");
1063     return 0;
1064     }
1065     int k = 0;
1066     for (Edge cont:contacts){
1067     if (cont.j==resser) {
1068     k++;
1069     }
1070     }
1071     return k;
1072     }
1073    
1074     public int getOutDegree(int resser){
1075     if (!directed){
1076     System.err.println("Can't get out degree for an undirected graph");
1077     return 0;
1078     }
1079     int k = 0;
1080     for (Edge cont:contacts){
1081     if (cont.i==resser) {
1082     k++;
1083     }
1084     }
1085     return k;
1086     }
1087    
1088 stehr 274 // secondary structure related methods
1089    
1090     /**
1091     * Returns true if secondary structure information is available, false otherwise.
1092     */
1093     public boolean hasSecondaryStructure() {
1094     return !this.secondaryStructure.isEmpty();
1095     }
1096    
1097     /**
1098     * Returns the secondary structure annotation object of this graph.
1099     */
1100     public SecondaryStructure getSecondaryStructure() {
1101     return this.secondaryStructure;
1102     }
1103    
1104 duarte 329 /**
1105     * Evaluate this graph (assuming it is a prediction) against an original graph
1106     * @param originalGraph
1107     * @return
1108     */
1109 stehr 360 public PredEval evaluatePrediction(Graph originalGraph) {
1110     return evaluatePrediction(originalGraph, 1);
1111     }
1112    
1113     /**
1114     * Evaluate this graph (assuming it is a prediction) against an original graph,
1115     * considering only edges with sequence separation at least minSeqSep.
1116     * @param originalGraph
1117     * @return
1118     */
1119     public PredEval evaluatePrediction(Graph originalGraph, int minSeqSep) {
1120     // total predicted contacts
1121     int predicted = 0;
1122     for(Edge e:this.getContacts()) {
1123     if(Math.abs(e.j-e.i) >= minSeqSep) {
1124     predicted++;
1125     }
1126     }
1127 duarte 329
1128 stehr 360 // total native contacts
1129     int original = 0;
1130     for(Edge e:originalGraph.getContacts()) {
1131     if(Math.abs(e.j-e.i) >= minSeqSep) {
1132     original++;
1133     }
1134     }
1135    
1136     // total size of contact map (potential contacts)
1137 duarte 329 int cmtotal = 0;
1138     if (originalGraph.isDirected()){
1139 stehr 360 cmtotal = (originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep);
1140 duarte 329 } else {
1141 stehr 360 cmtotal = (int)(((originalGraph.getFullLength()-(minSeqSep-1))*(originalGraph.getFullLength()-minSeqSep))/2);
1142 duarte 329 }
1143     int TruePos=0, FalsePos=0, TrueNeg=0, FalseNeg=0;
1144    
1145     EdgeSet origContacts = originalGraph.getContacts();
1146     EdgeSet predictedContacts = this.getContacts();
1147    
1148     // directed/ non-directed graphs should be both fine with this code (as long as in the predicted directed graph we store the contacts as j>i)
1149     // the only thing that changes between directed/non-directed is the count of total cells in contact map (taken care for above)
1150     for (Edge predictedCont:predictedContacts){
1151 stehr 360 if(Math.abs(predictedCont.j-predictedCont.i) >= minSeqSep) {
1152     //System.out.println(predictedCont);
1153     if (origContacts.contains(predictedCont)) {
1154     TruePos++;
1155     }
1156     else {
1157     FalsePos++;
1158     }
1159 duarte 329 }
1160     }
1161    
1162 stehr 360 for (Edge origCont:origContacts) {
1163     if(Math.abs(origCont.j-origCont.i) >= minSeqSep) {
1164 duarte 329 //System.out.println(origCont);
1165 stehr 360 if (!predictedContacts.contains(origCont)) {
1166     //System.out.println(origCont);
1167     FalseNeg++;
1168     }
1169 duarte 329 }
1170     }
1171     TrueNeg=cmtotal-TruePos-FalsePos-FalseNeg;
1172     PredEval eval = new PredEval(TruePos,FalsePos,TrueNeg,FalseNeg,0,predicted,original,cmtotal);
1173     return eval;
1174     }
1175 filippis 407
1176     private static String quote(String s) {
1177     return ("'"+s+"'");
1178     }
1179 duarte 123 }