1 |
duarte |
123 |
package proteinstructure; |
2 |
duarte |
191 |
|
3 |
duarte |
123 |
import java.io.FileOutputStream; |
4 |
|
|
import java.io.PrintStream; |
5 |
|
|
import java.io.IOException; |
6 |
duarte |
129 |
import java.util.TreeMap; |
7 |
duarte |
189 |
import java.util.HashMap; |
8 |
duarte |
123 |
|
9 |
duarte |
207 |
/** |
10 |
|
|
* A residue interaction graph derived from a single chain pdb protein structure |
11 |
|
|
* |
12 |
|
|
* @author Jose Duarte |
13 |
|
|
* Class: Graph |
14 |
|
|
* Package: proteinstructure |
15 |
|
|
*/ |
16 |
duarte |
123 |
public class Graph { |
17 |
|
|
|
18 |
duarte |
144 |
public final static String GRAPHFILEFORMATVERSION = "1.0"; |
19 |
duarte |
206 |
|
20 |
duarte |
234 |
public EdgeSet contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it) |
21 |
duarte |
123 |
|
22 |
duarte |
207 |
protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code) |
23 |
|
|
protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X') |
24 |
|
|
protected String pdbCode; |
25 |
|
|
protected String chainCode; |
26 |
duarte |
208 |
protected String pdbChainCode; |
27 |
duarte |
207 |
protected double cutoff; |
28 |
|
|
protected String ct; // the contact type |
29 |
duarte |
208 |
protected boolean directed; |
30 |
duarte |
207 |
|
31 |
duarte |
159 |
// fullLength is length of full sequence or: |
32 |
|
|
// -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain |
33 |
|
|
// -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain |
34 |
duarte |
207 |
protected int fullLength; |
35 |
|
|
protected int obsLength; // length without unobserved, non standard aas |
36 |
duarte |
159 |
|
37 |
duarte |
207 |
protected int numContacts; |
38 |
duarte |
159 |
|
39 |
duarte |
207 |
protected boolean modified; |
40 |
duarte |
175 |
|
41 |
duarte |
207 |
public Graph() { |
42 |
|
|
|
43 |
|
|
} |
44 |
|
|
|
45 |
duarte |
134 |
/** |
46 |
|
|
* Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types) |
47 |
duarte |
206 |
* Must also pass contact type, cutoff, pdbCode and chainCode |
48 |
duarte |
134 |
* @param contacts |
49 |
|
|
* @param nodes |
50 |
|
|
* @param sequence |
51 |
|
|
* @param cutoff |
52 |
|
|
* @param ct |
53 |
duarte |
206 |
* @param pdbCode |
54 |
|
|
* @param chainCode |
55 |
duarte |
134 |
*/ |
56 |
duarte |
252 |
public Graph (EdgeSet contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode) { |
57 |
duarte |
123 |
this.contacts=contacts; |
58 |
|
|
this.cutoff=cutoff; |
59 |
duarte |
129 |
this.nodes=nodes; |
60 |
|
|
this.sequence=sequence; |
61 |
duarte |
206 |
this.pdbCode=pdbCode; |
62 |
|
|
this.chainCode=chainCode; |
63 |
|
|
this.pdbChainCode=pdbChainCode; |
64 |
duarte |
123 |
this.ct=ct; |
65 |
duarte |
159 |
this.fullLength=sequence.length(); |
66 |
|
|
this.obsLength=nodes.size(); |
67 |
|
|
this.numContacts=contacts.size(); |
68 |
duarte |
175 |
this.modified=false; |
69 |
duarte |
208 |
this.directed=false; |
70 |
duarte |
129 |
if (ct.contains("/")){ |
71 |
|
|
directed=true; |
72 |
|
|
} |
73 |
stehr |
217 |
|
74 |
|
|
assert(this.pdbCode.equals(this.pdbCode.toLowerCase())); // pdb codes should be always lower case |
75 |
|
|
assert(this.pdbChainCode.equals(this.pdbChainCode.toUpperCase())); // pdb chain codes should be always upper case |
76 |
duarte |
123 |
} |
77 |
duarte |
135 |
|
78 |
duarte |
129 |
|
79 |
duarte |
135 |
//TODO implement (from python) write_graph_to_db, do we really need it here?? |
80 |
|
|
|
81 |
duarte |
144 |
public void write_graph_to_file (String outfile) throws IOException { |
82 |
|
|
PrintStream Out = new PrintStream(new FileOutputStream(outfile)); |
83 |
duarte |
208 |
Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION); |
84 |
duarte |
144 |
Out.println("#SEQUENCE: "+sequence); |
85 |
duarte |
206 |
Out.println("#PDB: "+pdbCode); |
86 |
|
|
Out.println("#PDB CHAIN CODE: "+pdbChainCode); |
87 |
|
|
Out.println("#CHAIN: "+chainCode); |
88 |
duarte |
144 |
Out.println("#CT: "+ct); |
89 |
|
|
Out.println("#CUTOFF: "+cutoff); |
90 |
duarte |
234 |
for (Edge pair:contacts){ |
91 |
duarte |
144 |
int i_resser=pair.i; |
92 |
|
|
int j_resser=pair.j; |
93 |
|
|
Out.println(i_resser+"\t"+j_resser); |
94 |
|
|
} |
95 |
|
|
Out.close(); |
96 |
|
|
} |
97 |
duarte |
175 |
|
98 |
duarte |
159 |
/** |
99 |
duarte |
234 |
* Gets list of contacts as a new EdgeSet (deep copied) |
100 |
duarte |
175 |
* |
101 |
|
|
*/ |
102 |
duarte |
234 |
public EdgeSet getContacts(){ |
103 |
|
|
EdgeSet newContacts = new EdgeSet(); |
104 |
|
|
for (Edge cont:contacts){ |
105 |
|
|
newContacts.add(new Edge(cont.i,cont.j)); |
106 |
duarte |
175 |
} |
107 |
|
|
return newContacts; |
108 |
|
|
} |
109 |
|
|
|
110 |
|
|
/** |
111 |
|
|
* Gets TreeMap of nodes, deep copying |
112 |
|
|
* |
113 |
|
|
*/ |
114 |
|
|
public TreeMap<Integer,String> getNodes(){ |
115 |
|
|
TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>(); |
116 |
|
|
for (int resser:nodes.keySet()){ |
117 |
|
|
newNodes.put(resser, nodes.get(resser)); |
118 |
|
|
} |
119 |
|
|
return newNodes; |
120 |
|
|
} |
121 |
|
|
|
122 |
|
|
/** |
123 |
|
|
* Deep copies this Graph object returning new one |
124 |
|
|
* @return |
125 |
|
|
*/ |
126 |
|
|
public Graph copy(){ |
127 |
duarte |
206 |
return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode); |
128 |
duarte |
175 |
} |
129 |
|
|
|
130 |
|
|
/** |
131 |
duarte |
232 |
* Gets a reference to this Graph deep copying contacts but re-referencing nodes |
132 |
|
|
* @return |
133 |
|
|
*/ |
134 |
|
|
public Graph copyKeepingNodes(){ |
135 |
|
|
return new Graph(getContacts(),nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode); |
136 |
|
|
} |
137 |
|
|
|
138 |
|
|
/** |
139 |
duarte |
159 |
* Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map |
140 |
|
|
* In non-crossed cases this should give us the upper half matrix (contacts are only j>i) |
141 |
|
|
* In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed) |
142 |
|
|
* @return |
143 |
|
|
*/ |
144 |
|
|
public int[][] getIntMatrix(){ |
145 |
|
|
// this initialises the matrix to 0 (i.e. no contact) |
146 |
|
|
int[][] cm = new int[fullLength][fullLength]; |
147 |
|
|
// we put a 1 for all given contacts |
148 |
duarte |
234 |
for (Edge cont:contacts){ |
149 |
duarte |
159 |
int i_resser = cont.i; |
150 |
|
|
int j_resser = cont.j; |
151 |
|
|
cm[i_resser-1][j_resser-1]=1; |
152 |
duarte |
129 |
} |
153 |
|
|
return cm; |
154 |
|
|
} |
155 |
duarte |
159 |
|
156 |
duarte |
165 |
/** |
157 |
duarte |
179 |
* Gets a node's residue type given the residue serial |
158 |
|
|
* @param resser |
159 |
|
|
* @return |
160 |
|
|
*/ |
161 |
|
|
public String getResType(int resser){ |
162 |
|
|
return nodes.get(resser); |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
/** |
166 |
duarte |
165 |
* Gets node neighbourhood given a residue serial |
167 |
|
|
* @param resser |
168 |
|
|
* @return |
169 |
|
|
*/ |
170 |
duarte |
179 |
public NodeNbh getNodeNbh(int resser){ |
171 |
|
|
NodeNbh nbh = new NodeNbh(resser, getResType(resser)); |
172 |
duarte |
165 |
//this could be implemented using the contact map matrix and scanning through 1 column/row |
173 |
|
|
//it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations |
174 |
duarte |
179 |
//however we would then have the overhead of creating the matrix |
175 |
duarte |
234 |
for (Edge cont:contacts){ |
176 |
duarte |
165 |
if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j)); |
177 |
|
|
if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i)); |
178 |
|
|
} |
179 |
|
|
return nbh; |
180 |
|
|
} |
181 |
|
|
|
182 |
|
|
/** |
183 |
|
|
* Gets edge neighbourhood (common neighbourhood) given a residue serial pair |
184 |
|
|
* @param i_resser |
185 |
|
|
* @param j_resser |
186 |
|
|
* @return |
187 |
|
|
*/ |
188 |
duarte |
179 |
public EdgeNbh getEdgeNbh(int i_resser, int j_resser){ |
189 |
|
|
EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser)); |
190 |
|
|
NodeNbh i_nbhd = getNodeNbh(i_resser); |
191 |
|
|
NodeNbh j_nbhd = getNodeNbh(j_resser); |
192 |
duarte |
175 |
if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap |
193 |
|
|
for (int resser:i_nbhd.keySet()) { |
194 |
|
|
if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser)); |
195 |
|
|
} |
196 |
|
|
} else { |
197 |
|
|
for (int resser:j_nbhd.keySet()) { |
198 |
|
|
if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser)); |
199 |
|
|
} |
200 |
duarte |
165 |
} |
201 |
|
|
return nbh; |
202 |
|
|
} |
203 |
duarte |
232 |
|
204 |
|
|
/** |
205 |
|
|
* Gets 2nd shell node neighbourhood |
206 |
|
|
* @param resser |
207 |
|
|
*/ |
208 |
|
|
public NodeNbh get2ndshellNodeNbh(int resser){ |
209 |
|
|
// first we create a NodeNbh object for the second shell, central residue is given resser |
210 |
|
|
NodeNbh nbh2ndshell = new NodeNbh(resser,getResType(resser)); |
211 |
|
|
// we get 1st neighbourhood |
212 |
|
|
NodeNbh nbh = this.getNodeNbh(resser); |
213 |
|
|
for (int nb:nbh.keySet()){ |
214 |
|
|
NodeNbh nbh2 = this.getNodeNbh(nb); // for each first neighbour we take its neighbourhood |
215 |
|
|
for (int nb2:nbh2.keySet()){ |
216 |
|
|
if (nb2!=resser && !nbh.containsKey(nb2)){ // if the 2nd neighbour nb2 is not the given resser or is not a 1st neighbour |
217 |
|
|
nbh2ndshell.put(nb2, getResType(nb2)); |
218 |
|
|
} |
219 |
|
|
} |
220 |
|
|
} |
221 |
|
|
return nbh2ndshell; |
222 |
|
|
} |
223 |
|
|
|
224 |
duarte |
234 |
public void addEdge(Edge cont){ |
225 |
duarte |
240 |
if (!directed && cont.i>cont.j){ |
226 |
duarte |
239 |
// we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i |
227 |
|
|
// if we added an edge i>j it could happen that the edge was already there but inverted and wouldn't be detected as a duplicate |
228 |
duarte |
240 |
cont = new Edge(cont.j,cont.i); |
229 |
duarte |
239 |
} |
230 |
duarte |
234 |
contacts.add(cont); // contacts is a TreeSet and thus takes care of duplicates |
231 |
|
|
int oldNumContacts = numContacts; |
232 |
|
|
numContacts=getNumContacts(); |
233 |
|
|
// if number of contacts changed that means we actually added a new contact and thus we modified the graph |
234 |
|
|
if (numContacts!=oldNumContacts) modified=true; |
235 |
|
|
|
236 |
duarte |
175 |
} |
237 |
|
|
|
238 |
duarte |
234 |
public void delEdge(Edge cont){ |
239 |
duarte |
240 |
if (!directed && cont.i>cont.j){ |
240 |
|
|
// we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i |
241 |
|
|
// if we try to delete an edge i>j it won't be there, we have to invert it and then try to delete |
242 |
|
|
cont = new Edge(cont.j,cont.i); |
243 |
|
|
} |
244 |
duarte |
175 |
contacts.remove(cont); |
245 |
duarte |
240 |
int oldNumContacts = numContacts; |
246 |
|
|
numContacts=getNumContacts(); |
247 |
|
|
// if number of contacts changed that means we actually added a new contact and thus we modified the graph |
248 |
|
|
if (numContacts!=oldNumContacts) modified=true; |
249 |
duarte |
175 |
} |
250 |
|
|
|
251 |
|
|
public void restrictContactsToMaxRange(int range){ |
252 |
duarte |
234 |
EdgeSet edgesToDelete = new EdgeSet(); |
253 |
|
|
for (Edge cont:contacts){ |
254 |
duarte |
179 |
if (cont.getRange()>range) edgesToDelete.add(cont); |
255 |
duarte |
175 |
} |
256 |
duarte |
234 |
for (Edge cont:edgesToDelete){ |
257 |
duarte |
179 |
delEdge(cont); |
258 |
|
|
} |
259 |
duarte |
175 |
} |
260 |
|
|
|
261 |
|
|
public void restrictContactsToMinRange(int range){ |
262 |
duarte |
234 |
EdgeSet edgesToDelete = new EdgeSet(); |
263 |
|
|
for (Edge cont:contacts){ |
264 |
duarte |
179 |
if (cont.getRange()<range) edgesToDelete.add(cont); |
265 |
duarte |
175 |
} |
266 |
duarte |
234 |
for (Edge cont:edgesToDelete){ |
267 |
duarte |
179 |
delEdge(cont); |
268 |
|
|
} |
269 |
duarte |
175 |
} |
270 |
duarte |
189 |
|
271 |
duarte |
191 |
/** |
272 |
|
|
* Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map |
273 |
|
|
* @return |
274 |
|
|
*/ |
275 |
duarte |
234 |
public HashMap<Edge,Integer> getAllEdgeNbhSizes() { |
276 |
|
|
HashMap<Edge,Integer> sizes = new HashMap<Edge, Integer>(); |
277 |
duarte |
191 |
if (!directed) { |
278 |
|
|
for (int i=1; i<fullLength;i++){ |
279 |
|
|
for (int j=i+1; j<fullLength;j++){ |
280 |
|
|
int size = getEdgeNbh(i, j).size(); |
281 |
duarte |
234 |
if (size>0) sizes.put(new Edge(i,j), size); |
282 |
duarte |
191 |
} |
283 |
|
|
} |
284 |
|
|
} else { |
285 |
|
|
for (int i=1; i<fullLength;i++){ |
286 |
|
|
for (int j=1; j<fullLength;j++){ |
287 |
|
|
if (i!=j){ |
288 |
|
|
int size = getEdgeNbh(i, j).size(); |
289 |
duarte |
234 |
if (size>0) sizes.put(new Edge(i,j), size); |
290 |
duarte |
191 |
} |
291 |
|
|
} |
292 |
|
|
} |
293 |
|
|
} |
294 |
|
|
return sizes; |
295 |
|
|
} |
296 |
|
|
|
297 |
duarte |
189 |
//TODO not sure what kind of return we want, for now is a HashMap with three graph objects |
298 |
|
|
public HashMap<String,Graph> compare(Graph other) throws Exception{ |
299 |
|
|
//first check that other has same sequence than this, otherwise throw exception |
300 |
|
|
if (!this.sequence.equals(other.sequence)){ |
301 |
|
|
//TODO throw specific exception |
302 |
|
|
throw new Exception("Sequence of 2 graphs to compare differ, can't compare them."); |
303 |
|
|
} |
304 |
duarte |
234 |
EdgeSet common = new EdgeSet(); |
305 |
|
|
EdgeSet onlythis = new EdgeSet(); |
306 |
|
|
EdgeSet onlyother = new EdgeSet(); |
307 |
|
|
for (Edge cont:this.contacts){ |
308 |
duarte |
189 |
if (other.contacts.contains(cont)) { |
309 |
|
|
common.add(cont); |
310 |
|
|
} else{ |
311 |
|
|
onlythis.add(cont); |
312 |
|
|
} |
313 |
|
|
} |
314 |
duarte |
234 |
for (Edge cont:other.contacts){ |
315 |
duarte |
189 |
if (!this.contacts.contains(cont)){ |
316 |
|
|
onlyother.add(cont); |
317 |
|
|
} |
318 |
|
|
} |
319 |
duarte |
206 |
Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode); |
320 |
|
|
Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode); |
321 |
|
|
Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode); |
322 |
duarte |
189 |
HashMap<String,Graph> result = new HashMap<String,Graph>(); |
323 |
|
|
result.put("common", commongraph); |
324 |
|
|
result.put("onlythis", onlythisgraph); |
325 |
|
|
result.put("onlyother",onlyothergraph); |
326 |
|
|
return result; |
327 |
|
|
} |
328 |
duarte |
206 |
|
329 |
|
|
public boolean isModified(){ |
330 |
|
|
return modified; |
331 |
|
|
} |
332 |
|
|
|
333 |
|
|
public boolean isDirected(){ |
334 |
|
|
return directed; |
335 |
|
|
} |
336 |
|
|
|
337 |
|
|
public String getPdbCode() { |
338 |
|
|
return pdbCode; |
339 |
|
|
} |
340 |
|
|
|
341 |
|
|
public String getPdbChainCode(){ |
342 |
|
|
return pdbChainCode; |
343 |
|
|
} |
344 |
|
|
|
345 |
|
|
public String getChainCode(){ |
346 |
|
|
return chainCode; |
347 |
|
|
} |
348 |
|
|
|
349 |
|
|
public String getSequence(){ |
350 |
|
|
return sequence; |
351 |
|
|
} |
352 |
|
|
|
353 |
|
|
public int getFullLength(){ |
354 |
|
|
return fullLength; |
355 |
|
|
} |
356 |
|
|
|
357 |
|
|
public int getObsLength(){ |
358 |
|
|
return obsLength; |
359 |
|
|
} |
360 |
|
|
|
361 |
|
|
public int getNumContacts(){ |
362 |
|
|
// in theory we could return just numContacts, because we have taken care of updating it every time contacts changed |
363 |
|
|
// however we call directly contacts.size() as I feel is safer |
364 |
|
|
return contacts.size(); |
365 |
|
|
} |
366 |
|
|
|
367 |
|
|
public String getContactType() { |
368 |
|
|
return ct; |
369 |
|
|
} |
370 |
|
|
|
371 |
|
|
public double getCutoff(){ |
372 |
|
|
return cutoff; |
373 |
|
|
} |
374 |
duarte |
249 |
|
375 |
|
|
public boolean containsContact(Edge cont){ |
376 |
|
|
// be careful with order, this checks strictly whether the cont.i, cont.j is given, strictly in that order! |
377 |
|
|
// in undirected case contacts are stored only in 1 direction (j>i) and thus if wrong order given it won't be found |
378 |
|
|
return contacts.contains(cont); |
379 |
|
|
} |
380 |
|
|
|
381 |
|
|
public void resetContacts(){ |
382 |
|
|
this.contacts = new EdgeSet(); |
383 |
|
|
} |
384 |
duarte |
123 |
} |