1 |
duarte |
123 |
package proteinstructure; |
2 |
duarte |
191 |
|
3 |
duarte |
123 |
import java.io.FileOutputStream; |
4 |
|
|
import java.io.PrintStream; |
5 |
|
|
import java.io.IOException; |
6 |
duarte |
279 |
import java.util.Collections; |
7 |
duarte |
284 |
import java.util.Locale; |
8 |
duarte |
129 |
import java.util.TreeMap; |
9 |
duarte |
189 |
import java.util.HashMap; |
10 |
duarte |
123 |
|
11 |
duarte |
260 |
import java.sql.SQLException; |
12 |
|
|
import java.sql.Statement; |
13 |
|
|
|
14 |
|
|
import java.sql.ResultSet; |
15 |
|
|
|
16 |
|
|
import tools.MySQLConnection; |
17 |
|
|
|
18 |
duarte |
207 |
/** |
19 |
|
|
* A residue interaction graph derived from a single chain pdb protein structure |
20 |
|
|
* |
21 |
|
|
* @author Jose Duarte |
22 |
|
|
* Class: Graph |
23 |
|
|
* Package: proteinstructure |
24 |
|
|
*/ |
25 |
duarte |
123 |
public class Graph { |
26 |
|
|
|
27 |
duarte |
144 |
public final static String GRAPHFILEFORMATVERSION = "1.0"; |
28 |
duarte |
206 |
|
29 |
stehr |
274 |
private final static String SINGLEMODELS_DB = "ioannis"; //TODO: Is this being used?? |
30 |
duarte |
260 |
|
31 |
duarte |
234 |
public EdgeSet contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it) |
32 |
duarte |
123 |
|
33 |
duarte |
284 |
protected TreeMap<Edge,Double> weights; |
34 |
|
|
|
35 |
duarte |
207 |
protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code) |
36 |
stehr |
274 |
protected SecondaryStructure secondaryStructure; // secondary structure annotation for this protein graph |
37 |
duarte |
284 |
|
38 |
duarte |
207 |
protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X') |
39 |
|
|
protected String pdbCode; |
40 |
|
|
protected String chainCode; |
41 |
duarte |
208 |
protected String pdbChainCode; |
42 |
duarte |
260 |
protected int model; |
43 |
duarte |
207 |
protected double cutoff; |
44 |
|
|
protected String ct; // the contact type |
45 |
duarte |
208 |
protected boolean directed; |
46 |
duarte |
207 |
|
47 |
duarte |
159 |
// fullLength is length of full sequence or: |
48 |
|
|
// -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain |
49 |
|
|
// -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain |
50 |
duarte |
207 |
protected int fullLength; |
51 |
|
|
protected int obsLength; // length without unobserved, non standard aas |
52 |
duarte |
159 |
|
53 |
duarte |
207 |
protected int numContacts; |
54 |
duarte |
159 |
|
55 |
duarte |
207 |
protected boolean modified; |
56 |
duarte |
175 |
|
57 |
duarte |
207 |
public Graph() { |
58 |
|
|
|
59 |
|
|
} |
60 |
|
|
|
61 |
duarte |
134 |
/** |
62 |
|
|
* Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types) |
63 |
duarte |
206 |
* Must also pass contact type, cutoff, pdbCode and chainCode |
64 |
duarte |
134 |
* @param contacts |
65 |
|
|
* @param nodes |
66 |
|
|
* @param sequence |
67 |
|
|
* @param cutoff |
68 |
|
|
* @param ct |
69 |
duarte |
206 |
* @param pdbCode |
70 |
|
|
* @param chainCode |
71 |
duarte |
260 |
* @param pdbChainCode |
72 |
|
|
* @param model |
73 |
stehr |
274 |
* @param ssElems |
74 |
|
|
* @param rs2ss |
75 |
duarte |
134 |
*/ |
76 |
duarte |
284 |
public Graph (EdgeSet contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode, int model, SecondaryStructure secStruct, TreeMap<Edge,Double> weights) { |
77 |
duarte |
123 |
this.contacts=contacts; |
78 |
duarte |
284 |
this.weights=weights; |
79 |
duarte |
123 |
this.cutoff=cutoff; |
80 |
duarte |
129 |
this.nodes=nodes; |
81 |
|
|
this.sequence=sequence; |
82 |
duarte |
206 |
this.pdbCode=pdbCode; |
83 |
|
|
this.chainCode=chainCode; |
84 |
|
|
this.pdbChainCode=pdbChainCode; |
85 |
duarte |
260 |
this.model=model; |
86 |
duarte |
123 |
this.ct=ct; |
87 |
duarte |
279 |
// in case of pdb was read from file and there was no SEQRES field then fullLength here shouldn't be sequence length but maximum observed residue (see Pdb class) |
88 |
|
|
this.fullLength=Math.max(sequence.length(),Collections.max(nodes.keySet())); |
89 |
duarte |
159 |
this.obsLength=nodes.size(); |
90 |
|
|
this.numContacts=contacts.size(); |
91 |
duarte |
175 |
this.modified=false; |
92 |
duarte |
208 |
this.directed=false; |
93 |
duarte |
129 |
if (ct.contains("/")){ |
94 |
|
|
directed=true; |
95 |
|
|
} |
96 |
stehr |
274 |
if(secStruct == null) { |
97 |
|
|
// we allow null to be passed to simplify graph construction |
98 |
|
|
this.secondaryStructure = new SecondaryStructure(); |
99 |
|
|
} else { |
100 |
|
|
this.secondaryStructure = secStruct; |
101 |
|
|
} |
102 |
stehr |
217 |
|
103 |
stehr |
274 |
// do some verification checks |
104 |
|
|
assert(secondaryStructure != null); |
105 |
stehr |
217 |
assert(this.pdbCode.equals(this.pdbCode.toLowerCase())); // pdb codes should be always lower case |
106 |
|
|
assert(this.pdbChainCode.equals(this.pdbChainCode.toUpperCase())); // pdb chain codes should be always upper case |
107 |
duarte |
123 |
} |
108 |
duarte |
135 |
|
109 |
duarte |
260 |
/** |
110 |
|
|
* Write graph to given db, using our db graph aglappe format, |
111 |
|
|
* i.e. tables: chain_graph, single_model_graph, single_model_node, single_model_edge |
112 |
|
|
* @param conn |
113 |
|
|
* @param db |
114 |
|
|
* @throws SQLException |
115 |
|
|
*/ |
116 |
|
|
public void write_graph_to_db(MySQLConnection conn, String db) throws SQLException{ |
117 |
|
|
// we are fixing these 3 values to what corresponds to our graphs |
118 |
|
|
String CW = "1"; |
119 |
|
|
String CR = "(true)"; |
120 |
|
|
String EXPBB = "0"; |
121 |
duarte |
135 |
|
122 |
duarte |
260 |
int pgraphid=0; |
123 |
|
|
int graphid=0; |
124 |
|
|
String sql = "SELECT graph_id FROM "+db+".chain_graph " + |
125 |
|
|
" WHERE accession_code='"+pdbCode+"' AND pchain_code='"+chainCode+"' LIMIT 1"; |
126 |
|
|
Statement stmt = conn.createStatement(); |
127 |
|
|
ResultSet rsst = stmt.executeQuery(sql); |
128 |
|
|
if (rsst.next()){ // if the pdbCode + chainCode were already in chain_graph then we take the graph_id as the pgraphid |
129 |
|
|
pgraphid = rsst.getInt(1); |
130 |
|
|
} else { // no pdbCode + chainCode found, we insert them in chain_graph, thus assigning a new graph_id (pgraphid) |
131 |
|
|
// we are inserting same number for num_obs_res and num_nodes (the difference would be the non-standard aas, but we can't get that number from this object at the moment) |
132 |
|
|
sql = "INSERT INTO "+db+".chain_graph (accession_code,pchain_code,model_serial,dist,expBB,method,num_res,num_obs_res,num_nodes,sses,date) " + |
133 |
duarte |
276 |
"VALUES ('"+pdbCode+"', '"+chainCode+"', "+model+", "+cutoff+", "+EXPBB+", 'rc-cutoff', "+getFullLength()+", "+getObsLength()+", "+getObsLength()+", "+secondaryStructure.getNumElements()+", now())"; |
134 |
duarte |
260 |
Statement stmt2 = conn.createStatement(); |
135 |
|
|
stmt2.executeUpdate(sql); |
136 |
|
|
// now we take the newly assigned graph_id as pgraphid |
137 |
|
|
sql = "SELECT LAST_INSERT_ID() FROM "+db+".chain_graph LIMIT 1"; |
138 |
|
|
ResultSet rsst2 = stmt2.executeQuery(sql); |
139 |
|
|
if (rsst2.next()){ |
140 |
|
|
pgraphid = rsst2.getInt(1); |
141 |
|
|
} |
142 |
|
|
stmt2.close(); |
143 |
|
|
rsst2.close(); |
144 |
|
|
} |
145 |
|
|
rsst.close(); |
146 |
|
|
// now we insert the graph info into single_model_graph |
147 |
|
|
String ctStr = ct; |
148 |
|
|
if (ct.equals("ALL")){ |
149 |
|
|
ctStr = "BB+SC+BB/SC"; |
150 |
|
|
} |
151 |
|
|
// 1st we grab the single_model_id |
152 |
|
|
int singlemodelid = 0; |
153 |
|
|
sql = "SELECT single_model_id FROM "+SINGLEMODELS_DB+".single_model WHERE CR='"+CR+"' AND CW='"+CW+"' AND expBB="+EXPBB+" AND CT='"+ctStr+"' AND dist="+cutoff+";"; |
154 |
|
|
rsst = stmt.executeQuery(sql); |
155 |
|
|
if (rsst.next()){ |
156 |
|
|
singlemodelid = rsst.getInt(1); |
157 |
|
|
} |
158 |
|
|
rsst.close(); |
159 |
|
|
// and then insert to single_model_graph |
160 |
|
|
sql = "INSERT INTO "+db+".single_model_graph (pgraph_id,graph_type,accession_code,single_model_id,dist,expBB,CW,CT,CR,num_nodes,date) " + |
161 |
|
|
" VALUES ("+pgraphid+", 'chain', '"+pdbCode+"', "+singlemodelid+", "+cutoff+", "+EXPBB+", '"+CW+"','"+ctStr+"', '"+CR+"', "+getObsLength()+", now())"; |
162 |
|
|
stmt.executeUpdate(sql); |
163 |
|
|
// and we grab the graph_id just assigned in single_model_graph |
164 |
|
|
sql = "SELECT LAST_INSERT_ID() FROM "+db+".single_model_graph LIMIT 1"; |
165 |
|
|
rsst = stmt.executeQuery(sql); |
166 |
|
|
if (rsst.next()){ |
167 |
|
|
graphid = rsst.getInt(1); |
168 |
|
|
} |
169 |
|
|
rsst.close(); |
170 |
|
|
stmt.close(); |
171 |
|
|
// inserting edges |
172 |
|
|
for (Edge cont:contacts){ |
173 |
|
|
String i_res = AA.threeletter2oneletter(getResType(cont.i)); |
174 |
|
|
String j_res = AA.threeletter2oneletter(getResType(cont.j)); |
175 |
duarte |
276 |
char i_secStructType=SecStrucElement.OTHER; |
176 |
|
|
if (secondaryStructure.getSecStrucElement(cont.i)!=null){ |
177 |
|
|
i_secStructType = secondaryStructure.getSecStrucElement(cont.i).getType(); |
178 |
|
|
} |
179 |
|
|
char j_secStructType=SecStrucElement.OTHER; |
180 |
|
|
if (secondaryStructure.getSecStrucElement(cont.j)!=null){ |
181 |
|
|
j_secStructType = secondaryStructure.getSecStrucElement(cont.j).getType(); |
182 |
|
|
} |
183 |
|
|
sql = "INSERT INTO "+db+".single_model_edge (graph_id,i_num,i_cid,i_res,i_sstype,j_num,j_cid,j_res,j_sstype,weight) " + |
184 |
duarte |
284 |
" VALUES ("+graphid+", "+cont.i+", '"+chainCode+"', '"+i_res+"', '"+i_secStructType+"',"+cont.j+", '"+chainCode+"', '"+j_res+"', '"+j_secStructType+"', "+Math.round(weights.get(cont))+")"; |
185 |
duarte |
260 |
stmt = conn.createStatement(); |
186 |
|
|
stmt.executeUpdate(sql); |
187 |
|
|
} |
188 |
|
|
if (!directed){ // we want both side of the matrix in the table to follow Ioannis' convention |
189 |
|
|
// so we insert the reverse contacts by doing the same but swapping i, j in insertion |
190 |
|
|
for (Edge cont:contacts){ |
191 |
|
|
String i_res = AA.threeletter2oneletter(getResType(cont.i)); |
192 |
|
|
String j_res = AA.threeletter2oneletter(getResType(cont.j)); |
193 |
duarte |
276 |
char i_secStructType=SecStrucElement.OTHER; |
194 |
|
|
if (secondaryStructure.getSecStrucElement(cont.i)!=null){ |
195 |
|
|
i_secStructType = secondaryStructure.getSecStrucElement(cont.i).getType(); |
196 |
|
|
} |
197 |
|
|
char j_secStructType=SecStrucElement.OTHER; |
198 |
|
|
if (secondaryStructure.getSecStrucElement(cont.j)!=null){ |
199 |
|
|
j_secStructType = secondaryStructure.getSecStrucElement(cont.j).getType(); |
200 |
|
|
} |
201 |
|
|
sql = "INSERT INTO "+db+".single_model_edge (graph_id,i_num,i_cid,i_res,i_sstype,j_num,j_cid,j_res,j_sstype,weight) " + |
202 |
duarte |
284 |
" VALUES ("+graphid+", "+cont.j+", '"+chainCode+"', '"+j_res+"', '"+j_secStructType+"',"+cont.i+", '"+chainCode+"', '"+i_res+"', '"+i_secStructType+"', "+Math.round(weights.get(cont))+")"; |
203 |
duarte |
260 |
stmt.executeUpdate(sql); |
204 |
|
|
} |
205 |
|
|
} |
206 |
|
|
// inserting nodes |
207 |
|
|
for (int resser:nodes.keySet()) { |
208 |
|
|
String res = AA.threeletter2oneletter(getResType(resser)); |
209 |
|
|
NodeNbh nbh = getNodeNbh(resser); |
210 |
duarte |
276 |
char secStructType=SecStrucElement.OTHER; |
211 |
|
|
if (secondaryStructure.getSecStrucElement(resser)!=null){ |
212 |
|
|
secStructType = secondaryStructure.getSecStrucElement(resser).getType(); |
213 |
|
|
} |
214 |
duarte |
260 |
if (directed){ // we insert k_in and k_out |
215 |
duarte |
284 |
sql = "INSERT INTO "+db+".single_model_node (graph_id,num,cid,res,sstype,k,k_in,k_out,n,nwg,n_num) " + |
216 |
|
|
" VALUES ("+graphid+", "+resser+", '"+chainCode+"', '"+res+"', '"+secStructType+"', "+0+", "+getInDegree(resser)+", "+getOutDegree(resser)+", '"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')"; |
217 |
duarte |
260 |
} else { // we insert k (and no k_in or k_out) |
218 |
duarte |
276 |
sql = "INSERT INTO "+db+".single_model_node (graph_id,num,cid,res,sstype,k,n,nwg,n_num) " + |
219 |
|
|
" VALUES ("+graphid+", "+resser+", '"+chainCode+"', '"+res+"', '"+secStructType+"',"+getDegree(resser)+", '"+nbh.getMotifNoGaps()+"', '"+nbh.getMotif()+"', '"+nbh.getCommaSeparatedResSerials()+"')"; |
220 |
duarte |
260 |
} |
221 |
|
|
stmt.executeUpdate(sql); |
222 |
|
|
} |
223 |
|
|
stmt.close(); |
224 |
|
|
} |
225 |
|
|
|
226 |
|
|
/** |
227 |
|
|
* Write graph to given outfile in aglappe format |
228 |
|
|
* @param outfile |
229 |
|
|
* @throws IOException |
230 |
|
|
*/ |
231 |
duarte |
144 |
public void write_graph_to_file (String outfile) throws IOException { |
232 |
|
|
PrintStream Out = new PrintStream(new FileOutputStream(outfile)); |
233 |
duarte |
208 |
Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION); |
234 |
duarte |
144 |
Out.println("#SEQUENCE: "+sequence); |
235 |
duarte |
206 |
Out.println("#PDB: "+pdbCode); |
236 |
|
|
Out.println("#PDB CHAIN CODE: "+pdbChainCode); |
237 |
|
|
Out.println("#CHAIN: "+chainCode); |
238 |
duarte |
144 |
Out.println("#CT: "+ct); |
239 |
|
|
Out.println("#CUTOFF: "+cutoff); |
240 |
duarte |
234 |
for (Edge pair:contacts){ |
241 |
duarte |
144 |
int i_resser=pair.i; |
242 |
|
|
int j_resser=pair.j; |
243 |
duarte |
284 |
double weight=weights.get(pair); |
244 |
|
|
Out.printf(Locale.US,i_resser+"\t"+j_resser+"\t%6.3f\n",weight); |
245 |
duarte |
144 |
} |
246 |
|
|
Out.close(); |
247 |
|
|
} |
248 |
duarte |
175 |
|
249 |
duarte |
159 |
/** |
250 |
duarte |
234 |
* Gets list of contacts as a new EdgeSet (deep copied) |
251 |
duarte |
175 |
* |
252 |
|
|
*/ |
253 |
duarte |
234 |
public EdgeSet getContacts(){ |
254 |
|
|
EdgeSet newContacts = new EdgeSet(); |
255 |
|
|
for (Edge cont:contacts){ |
256 |
|
|
newContacts.add(new Edge(cont.i,cont.j)); |
257 |
duarte |
175 |
} |
258 |
|
|
return newContacts; |
259 |
|
|
} |
260 |
|
|
|
261 |
|
|
/** |
262 |
|
|
* Gets TreeMap of nodes, deep copying |
263 |
|
|
* |
264 |
|
|
*/ |
265 |
|
|
public TreeMap<Integer,String> getNodes(){ |
266 |
|
|
TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>(); |
267 |
|
|
for (int resser:nodes.keySet()){ |
268 |
|
|
newNodes.put(resser, nodes.get(resser)); |
269 |
|
|
} |
270 |
|
|
return newNodes; |
271 |
|
|
} |
272 |
|
|
|
273 |
|
|
/** |
274 |
duarte |
284 |
* Gets TreeMap of weights, deep copyingg |
275 |
|
|
* @return |
276 |
|
|
*/ |
277 |
|
|
public TreeMap<Edge,Double> getWeights(){ |
278 |
|
|
TreeMap<Edge,Double> newWeights = new TreeMap<Edge, Double>(); |
279 |
|
|
for (Edge cont:weights.keySet()){ |
280 |
|
|
newWeights.put(new Edge(cont.i,cont.j), weights.get(cont)); |
281 |
|
|
} |
282 |
|
|
return newWeights; |
283 |
|
|
} |
284 |
|
|
|
285 |
|
|
/** |
286 |
duarte |
175 |
* Deep copies this Graph object returning new one |
287 |
|
|
* @return |
288 |
|
|
*/ |
289 |
|
|
public Graph copy(){ |
290 |
duarte |
284 |
return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model,secondaryStructure.copy(),getWeights()); |
291 |
duarte |
175 |
} |
292 |
|
|
|
293 |
|
|
/** |
294 |
duarte |
232 |
* Gets a reference to this Graph deep copying contacts but re-referencing nodes |
295 |
|
|
* @return |
296 |
|
|
*/ |
297 |
|
|
public Graph copyKeepingNodes(){ |
298 |
duarte |
284 |
return new Graph(getContacts(),nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model,secondaryStructure.copy(),getWeights()); |
299 |
duarte |
232 |
} |
300 |
|
|
|
301 |
|
|
/** |
302 |
duarte |
159 |
* Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map |
303 |
|
|
* In non-crossed cases this should give us the upper half matrix (contacts are only j>i) |
304 |
|
|
* In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed) |
305 |
|
|
* @return |
306 |
|
|
*/ |
307 |
|
|
public int[][] getIntMatrix(){ |
308 |
|
|
// this initialises the matrix to 0 (i.e. no contact) |
309 |
|
|
int[][] cm = new int[fullLength][fullLength]; |
310 |
|
|
// we put a 1 for all given contacts |
311 |
duarte |
234 |
for (Edge cont:contacts){ |
312 |
duarte |
159 |
int i_resser = cont.i; |
313 |
|
|
int j_resser = cont.j; |
314 |
|
|
cm[i_resser-1][j_resser-1]=1; |
315 |
duarte |
129 |
} |
316 |
|
|
return cm; |
317 |
|
|
} |
318 |
duarte |
159 |
|
319 |
duarte |
165 |
/** |
320 |
duarte |
179 |
* Gets a node's residue type given the residue serial |
321 |
|
|
* @param resser |
322 |
|
|
* @return |
323 |
|
|
*/ |
324 |
|
|
public String getResType(int resser){ |
325 |
|
|
return nodes.get(resser); |
326 |
|
|
} |
327 |
|
|
|
328 |
|
|
/** |
329 |
duarte |
165 |
* Gets node neighbourhood given a residue serial |
330 |
|
|
* @param resser |
331 |
|
|
* @return |
332 |
|
|
*/ |
333 |
duarte |
179 |
public NodeNbh getNodeNbh(int resser){ |
334 |
|
|
NodeNbh nbh = new NodeNbh(resser, getResType(resser)); |
335 |
duarte |
165 |
//this could be implemented using the contact map matrix and scanning through 1 column/row |
336 |
|
|
//it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations |
337 |
duarte |
179 |
//however we would then have the overhead of creating the matrix |
338 |
duarte |
234 |
for (Edge cont:contacts){ |
339 |
duarte |
165 |
if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j)); |
340 |
|
|
if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i)); |
341 |
|
|
} |
342 |
|
|
return nbh; |
343 |
|
|
} |
344 |
|
|
|
345 |
|
|
/** |
346 |
|
|
* Gets edge neighbourhood (common neighbourhood) given a residue serial pair |
347 |
|
|
* @param i_resser |
348 |
|
|
* @param j_resser |
349 |
|
|
* @return |
350 |
|
|
*/ |
351 |
duarte |
179 |
public EdgeNbh getEdgeNbh(int i_resser, int j_resser){ |
352 |
|
|
EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser)); |
353 |
|
|
NodeNbh i_nbhd = getNodeNbh(i_resser); |
354 |
|
|
NodeNbh j_nbhd = getNodeNbh(j_resser); |
355 |
duarte |
175 |
if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap |
356 |
|
|
for (int resser:i_nbhd.keySet()) { |
357 |
|
|
if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser)); |
358 |
|
|
} |
359 |
|
|
} else { |
360 |
|
|
for (int resser:j_nbhd.keySet()) { |
361 |
|
|
if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser)); |
362 |
|
|
} |
363 |
duarte |
165 |
} |
364 |
|
|
return nbh; |
365 |
|
|
} |
366 |
duarte |
232 |
|
367 |
|
|
/** |
368 |
|
|
* Gets 2nd shell node neighbourhood |
369 |
|
|
* @param resser |
370 |
|
|
*/ |
371 |
|
|
public NodeNbh get2ndshellNodeNbh(int resser){ |
372 |
|
|
// first we create a NodeNbh object for the second shell, central residue is given resser |
373 |
|
|
NodeNbh nbh2ndshell = new NodeNbh(resser,getResType(resser)); |
374 |
|
|
// we get 1st neighbourhood |
375 |
|
|
NodeNbh nbh = this.getNodeNbh(resser); |
376 |
|
|
for (int nb:nbh.keySet()){ |
377 |
|
|
NodeNbh nbh2 = this.getNodeNbh(nb); // for each first neighbour we take its neighbourhood |
378 |
|
|
for (int nb2:nbh2.keySet()){ |
379 |
|
|
if (nb2!=resser && !nbh.containsKey(nb2)){ // if the 2nd neighbour nb2 is not the given resser or is not a 1st neighbour |
380 |
|
|
nbh2ndshell.put(nb2, getResType(nb2)); |
381 |
|
|
} |
382 |
|
|
} |
383 |
|
|
} |
384 |
|
|
return nbh2ndshell; |
385 |
|
|
} |
386 |
|
|
|
387 |
duarte |
234 |
public void addEdge(Edge cont){ |
388 |
duarte |
240 |
if (!directed && cont.i>cont.j){ |
389 |
duarte |
239 |
// we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i |
390 |
|
|
// if we added an edge i>j it could happen that the edge was already there but inverted and wouldn't be detected as a duplicate |
391 |
duarte |
240 |
cont = new Edge(cont.j,cont.i); |
392 |
duarte |
239 |
} |
393 |
duarte |
234 |
contacts.add(cont); // contacts is a TreeSet and thus takes care of duplicates |
394 |
|
|
int oldNumContacts = numContacts; |
395 |
|
|
numContacts=getNumContacts(); |
396 |
|
|
// if number of contacts changed that means we actually added a new contact and thus we modified the graph |
397 |
|
|
if (numContacts!=oldNumContacts) modified=true; |
398 |
|
|
|
399 |
duarte |
175 |
} |
400 |
|
|
|
401 |
duarte |
234 |
public void delEdge(Edge cont){ |
402 |
duarte |
240 |
if (!directed && cont.i>cont.j){ |
403 |
|
|
// we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i |
404 |
|
|
// if we try to delete an edge i>j it won't be there, we have to invert it and then try to delete |
405 |
|
|
cont = new Edge(cont.j,cont.i); |
406 |
|
|
} |
407 |
duarte |
175 |
contacts.remove(cont); |
408 |
duarte |
240 |
int oldNumContacts = numContacts; |
409 |
|
|
numContacts=getNumContacts(); |
410 |
|
|
// if number of contacts changed that means we actually added a new contact and thus we modified the graph |
411 |
|
|
if (numContacts!=oldNumContacts) modified=true; |
412 |
duarte |
175 |
} |
413 |
|
|
|
414 |
|
|
public void restrictContactsToMaxRange(int range){ |
415 |
duarte |
234 |
EdgeSet edgesToDelete = new EdgeSet(); |
416 |
|
|
for (Edge cont:contacts){ |
417 |
duarte |
179 |
if (cont.getRange()>range) edgesToDelete.add(cont); |
418 |
duarte |
175 |
} |
419 |
duarte |
234 |
for (Edge cont:edgesToDelete){ |
420 |
duarte |
179 |
delEdge(cont); |
421 |
|
|
} |
422 |
duarte |
175 |
} |
423 |
|
|
|
424 |
|
|
public void restrictContactsToMinRange(int range){ |
425 |
duarte |
234 |
EdgeSet edgesToDelete = new EdgeSet(); |
426 |
|
|
for (Edge cont:contacts){ |
427 |
duarte |
179 |
if (cont.getRange()<range) edgesToDelete.add(cont); |
428 |
duarte |
175 |
} |
429 |
duarte |
234 |
for (Edge cont:edgesToDelete){ |
430 |
duarte |
179 |
delEdge(cont); |
431 |
|
|
} |
432 |
duarte |
175 |
} |
433 |
duarte |
189 |
|
434 |
duarte |
191 |
/** |
435 |
|
|
* Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map |
436 |
|
|
* @return |
437 |
|
|
*/ |
438 |
duarte |
234 |
public HashMap<Edge,Integer> getAllEdgeNbhSizes() { |
439 |
|
|
HashMap<Edge,Integer> sizes = new HashMap<Edge, Integer>(); |
440 |
duarte |
191 |
if (!directed) { |
441 |
|
|
for (int i=1; i<fullLength;i++){ |
442 |
|
|
for (int j=i+1; j<fullLength;j++){ |
443 |
|
|
int size = getEdgeNbh(i, j).size(); |
444 |
duarte |
234 |
if (size>0) sizes.put(new Edge(i,j), size); |
445 |
duarte |
191 |
} |
446 |
|
|
} |
447 |
|
|
} else { |
448 |
|
|
for (int i=1; i<fullLength;i++){ |
449 |
|
|
for (int j=1; j<fullLength;j++){ |
450 |
|
|
if (i!=j){ |
451 |
|
|
int size = getEdgeNbh(i, j).size(); |
452 |
duarte |
234 |
if (size>0) sizes.put(new Edge(i,j), size); |
453 |
duarte |
191 |
} |
454 |
|
|
} |
455 |
|
|
} |
456 |
|
|
} |
457 |
|
|
return sizes; |
458 |
|
|
} |
459 |
|
|
|
460 |
duarte |
189 |
//TODO not sure what kind of return we want, for now is a HashMap with three graph objects |
461 |
|
|
public HashMap<String,Graph> compare(Graph other) throws Exception{ |
462 |
|
|
//first check that other has same sequence than this, otherwise throw exception |
463 |
|
|
if (!this.sequence.equals(other.sequence)){ |
464 |
|
|
//TODO throw specific exception |
465 |
|
|
throw new Exception("Sequence of 2 graphs to compare differ, can't compare them."); |
466 |
|
|
} |
467 |
duarte |
234 |
EdgeSet common = new EdgeSet(); |
468 |
|
|
EdgeSet onlythis = new EdgeSet(); |
469 |
|
|
EdgeSet onlyother = new EdgeSet(); |
470 |
|
|
for (Edge cont:this.contacts){ |
471 |
duarte |
189 |
if (other.contacts.contains(cont)) { |
472 |
|
|
common.add(cont); |
473 |
|
|
} else{ |
474 |
|
|
onlythis.add(cont); |
475 |
|
|
} |
476 |
|
|
} |
477 |
duarte |
234 |
for (Edge cont:other.contacts){ |
478 |
duarte |
189 |
if (!this.contacts.contains(cont)){ |
479 |
|
|
onlyother.add(cont); |
480 |
|
|
} |
481 |
|
|
} |
482 |
duarte |
284 |
Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model,secondaryStructure.copy(),getWeights()); |
483 |
|
|
Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode,model,secondaryStructure.copy(),getWeights()); |
484 |
|
|
Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode,model,secondaryStructure.copy(),getWeights()); |
485 |
duarte |
189 |
HashMap<String,Graph> result = new HashMap<String,Graph>(); |
486 |
|
|
result.put("common", commongraph); |
487 |
|
|
result.put("onlythis", onlythisgraph); |
488 |
|
|
result.put("onlyother",onlyothergraph); |
489 |
|
|
return result; |
490 |
|
|
} |
491 |
duarte |
206 |
|
492 |
|
|
public boolean isModified(){ |
493 |
|
|
return modified; |
494 |
|
|
} |
495 |
|
|
|
496 |
|
|
public boolean isDirected(){ |
497 |
|
|
return directed; |
498 |
|
|
} |
499 |
|
|
|
500 |
|
|
public String getPdbCode() { |
501 |
|
|
return pdbCode; |
502 |
|
|
} |
503 |
|
|
|
504 |
|
|
public String getPdbChainCode(){ |
505 |
|
|
return pdbChainCode; |
506 |
|
|
} |
507 |
|
|
|
508 |
|
|
public String getChainCode(){ |
509 |
|
|
return chainCode; |
510 |
|
|
} |
511 |
|
|
|
512 |
|
|
public String getSequence(){ |
513 |
|
|
return sequence; |
514 |
|
|
} |
515 |
|
|
|
516 |
|
|
public int getFullLength(){ |
517 |
|
|
return fullLength; |
518 |
|
|
} |
519 |
|
|
|
520 |
|
|
public int getObsLength(){ |
521 |
|
|
return obsLength; |
522 |
|
|
} |
523 |
|
|
|
524 |
|
|
public int getNumContacts(){ |
525 |
|
|
// in theory we could return just numContacts, because we have taken care of updating it every time contacts changed |
526 |
|
|
// however we call directly contacts.size() as I feel is safer |
527 |
|
|
return contacts.size(); |
528 |
|
|
} |
529 |
|
|
|
530 |
|
|
public String getContactType() { |
531 |
|
|
return ct; |
532 |
|
|
} |
533 |
|
|
|
534 |
|
|
public double getCutoff(){ |
535 |
|
|
return cutoff; |
536 |
|
|
} |
537 |
duarte |
249 |
|
538 |
duarte |
284 |
public double getWeight(Edge cont){ |
539 |
|
|
return this.weights.get(cont); |
540 |
|
|
} |
541 |
|
|
|
542 |
duarte |
249 |
public boolean containsContact(Edge cont){ |
543 |
|
|
// be careful with order, this checks strictly whether the cont.i, cont.j is given, strictly in that order! |
544 |
|
|
// in undirected case contacts are stored only in 1 direction (j>i) and thus if wrong order given it won't be found |
545 |
|
|
return contacts.contains(cont); |
546 |
|
|
} |
547 |
|
|
|
548 |
|
|
public void resetContacts(){ |
549 |
|
|
this.contacts = new EdgeSet(); |
550 |
|
|
} |
551 |
duarte |
260 |
|
552 |
|
|
public int getDegree(int resser){ |
553 |
|
|
if (directed) { |
554 |
|
|
System.err.println("Can't get degree for a directed graph, only in or out degree"); |
555 |
|
|
return 0; |
556 |
|
|
} |
557 |
|
|
int k = 0; |
558 |
|
|
for (Edge cont:contacts){ |
559 |
|
|
if (cont.i==resser || cont.j==resser) { |
560 |
|
|
k++; |
561 |
|
|
} |
562 |
|
|
} |
563 |
|
|
return k; |
564 |
|
|
} |
565 |
|
|
|
566 |
|
|
public int getInDegree(int resser){ |
567 |
|
|
if (!directed){ |
568 |
|
|
System.err.println("Can't get in degree for an undirected graph"); |
569 |
|
|
return 0; |
570 |
|
|
} |
571 |
|
|
int k = 0; |
572 |
|
|
for (Edge cont:contacts){ |
573 |
|
|
if (cont.j==resser) { |
574 |
|
|
k++; |
575 |
|
|
} |
576 |
|
|
} |
577 |
|
|
return k; |
578 |
|
|
} |
579 |
|
|
|
580 |
|
|
public int getOutDegree(int resser){ |
581 |
|
|
if (!directed){ |
582 |
|
|
System.err.println("Can't get out degree for an undirected graph"); |
583 |
|
|
return 0; |
584 |
|
|
} |
585 |
|
|
int k = 0; |
586 |
|
|
for (Edge cont:contacts){ |
587 |
|
|
if (cont.i==resser) { |
588 |
|
|
k++; |
589 |
|
|
} |
590 |
|
|
} |
591 |
|
|
return k; |
592 |
|
|
} |
593 |
|
|
|
594 |
stehr |
274 |
// secondary structure related methods |
595 |
|
|
|
596 |
|
|
/** |
597 |
|
|
* Returns true if secondary structure information is available, false otherwise. |
598 |
|
|
*/ |
599 |
|
|
public boolean hasSecondaryStructure() { |
600 |
|
|
return !this.secondaryStructure.isEmpty(); |
601 |
|
|
} |
602 |
|
|
|
603 |
|
|
/** |
604 |
|
|
* Returns the secondary structure annotation object of this graph. |
605 |
|
|
*/ |
606 |
|
|
public SecondaryStructure getSecondaryStructure() { |
607 |
|
|
return this.secondaryStructure; |
608 |
|
|
} |
609 |
|
|
|
610 |
duarte |
123 |
} |