1 |
duarte |
123 |
package proteinstructure; |
2 |
duarte |
191 |
|
3 |
duarte |
123 |
import java.io.FileOutputStream; |
4 |
|
|
import java.io.PrintStream; |
5 |
|
|
import java.io.IOException; |
6 |
duarte |
129 |
import java.util.TreeMap; |
7 |
duarte |
189 |
import java.util.HashMap; |
8 |
duarte |
123 |
|
9 |
duarte |
207 |
/** |
10 |
|
|
* A residue interaction graph derived from a single chain pdb protein structure |
11 |
|
|
* |
12 |
|
|
* @author Jose Duarte |
13 |
|
|
* Class: Graph |
14 |
|
|
* Package: proteinstructure |
15 |
|
|
*/ |
16 |
duarte |
123 |
public class Graph { |
17 |
|
|
|
18 |
duarte |
144 |
public final static String GRAPHFILEFORMATVERSION = "1.0"; |
19 |
duarte |
206 |
|
20 |
duarte |
207 |
public ContactList contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it) |
21 |
duarte |
123 |
|
22 |
duarte |
207 |
protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code) |
23 |
|
|
protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X') |
24 |
|
|
protected String pdbCode; |
25 |
|
|
protected String chainCode; |
26 |
duarte |
208 |
protected String pdbChainCode; |
27 |
duarte |
207 |
protected double cutoff; |
28 |
|
|
protected String ct; // the contact type |
29 |
duarte |
208 |
protected boolean directed; |
30 |
duarte |
207 |
|
31 |
duarte |
159 |
// fullLength is length of full sequence or: |
32 |
|
|
// -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain |
33 |
|
|
// -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain |
34 |
duarte |
207 |
protected int fullLength; |
35 |
|
|
protected int obsLength; // length without unobserved, non standard aas |
36 |
duarte |
159 |
|
37 |
duarte |
207 |
protected int numContacts; |
38 |
duarte |
159 |
|
39 |
duarte |
207 |
protected boolean modified; |
40 |
duarte |
175 |
|
41 |
duarte |
207 |
public Graph() { |
42 |
|
|
|
43 |
|
|
} |
44 |
|
|
|
45 |
duarte |
134 |
/** |
46 |
|
|
* Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types) |
47 |
duarte |
206 |
* Must also pass contact type, cutoff, pdbCode and chainCode |
48 |
duarte |
134 |
* @param contacts |
49 |
|
|
* @param nodes |
50 |
|
|
* @param sequence |
51 |
|
|
* @param cutoff |
52 |
|
|
* @param ct |
53 |
duarte |
206 |
* @param pdbCode |
54 |
|
|
* @param chainCode |
55 |
duarte |
134 |
*/ |
56 |
duarte |
206 |
public Graph (ContactList contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode) { |
57 |
duarte |
123 |
this.contacts=contacts; |
58 |
|
|
this.cutoff=cutoff; |
59 |
duarte |
129 |
this.nodes=nodes; |
60 |
|
|
this.sequence=sequence; |
61 |
duarte |
206 |
this.pdbCode=pdbCode; |
62 |
|
|
this.chainCode=chainCode; |
63 |
|
|
this.pdbChainCode=pdbChainCode; |
64 |
duarte |
123 |
this.ct=ct; |
65 |
duarte |
159 |
this.fullLength=sequence.length(); |
66 |
|
|
this.obsLength=nodes.size(); |
67 |
|
|
this.numContacts=contacts.size(); |
68 |
duarte |
175 |
this.modified=false; |
69 |
duarte |
208 |
this.directed=false; |
70 |
duarte |
129 |
if (ct.contains("/")){ |
71 |
|
|
directed=true; |
72 |
|
|
} |
73 |
duarte |
123 |
} |
74 |
duarte |
135 |
|
75 |
duarte |
129 |
|
76 |
duarte |
135 |
//TODO implement (from python) write_graph_to_db, do we really need it here?? |
77 |
|
|
|
78 |
duarte |
144 |
public void write_graph_to_file (String outfile) throws IOException { |
79 |
|
|
PrintStream Out = new PrintStream(new FileOutputStream(outfile)); |
80 |
duarte |
208 |
Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION); |
81 |
duarte |
144 |
Out.println("#SEQUENCE: "+sequence); |
82 |
duarte |
206 |
Out.println("#PDB: "+pdbCode); |
83 |
|
|
Out.println("#PDB CHAIN CODE: "+pdbChainCode); |
84 |
|
|
Out.println("#CHAIN: "+chainCode); |
85 |
duarte |
144 |
Out.println("#CT: "+ct); |
86 |
|
|
Out.println("#CUTOFF: "+cutoff); |
87 |
|
|
for (Contact pair:contacts){ |
88 |
|
|
int i_resser=pair.i; |
89 |
|
|
int j_resser=pair.j; |
90 |
|
|
Out.println(i_resser+"\t"+j_resser); |
91 |
|
|
} |
92 |
|
|
Out.close(); |
93 |
|
|
} |
94 |
duarte |
175 |
|
95 |
duarte |
159 |
/** |
96 |
duarte |
175 |
* Gets list of contacts as a new ContactList (deep copied) |
97 |
|
|
* |
98 |
|
|
*/ |
99 |
|
|
public ContactList getContacts(){ |
100 |
|
|
ContactList newContacts = new ContactList(); |
101 |
|
|
for (Contact cont:contacts){ |
102 |
|
|
newContacts.add(new Contact(cont.i,cont.j)); |
103 |
|
|
} |
104 |
|
|
return newContacts; |
105 |
|
|
} |
106 |
|
|
|
107 |
|
|
/** |
108 |
|
|
* Gets TreeMap of nodes, deep copying |
109 |
|
|
* |
110 |
|
|
*/ |
111 |
|
|
public TreeMap<Integer,String> getNodes(){ |
112 |
|
|
TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>(); |
113 |
|
|
for (int resser:nodes.keySet()){ |
114 |
|
|
newNodes.put(resser, nodes.get(resser)); |
115 |
|
|
} |
116 |
|
|
return newNodes; |
117 |
|
|
} |
118 |
|
|
|
119 |
|
|
/** |
120 |
|
|
* Deep copies this Graph object returning new one |
121 |
|
|
* @return |
122 |
|
|
*/ |
123 |
|
|
public Graph copy(){ |
124 |
duarte |
206 |
return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode); |
125 |
duarte |
175 |
} |
126 |
|
|
|
127 |
|
|
/** |
128 |
duarte |
159 |
* Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map |
129 |
|
|
* In non-crossed cases this should give us the upper half matrix (contacts are only j>i) |
130 |
|
|
* In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed) |
131 |
|
|
* @return |
132 |
|
|
*/ |
133 |
|
|
public int[][] getIntMatrix(){ |
134 |
|
|
// this initialises the matrix to 0 (i.e. no contact) |
135 |
|
|
int[][] cm = new int[fullLength][fullLength]; |
136 |
|
|
// we put a 1 for all given contacts |
137 |
|
|
for (Contact cont:contacts){ |
138 |
|
|
int i_resser = cont.i; |
139 |
|
|
int j_resser = cont.j; |
140 |
|
|
cm[i_resser-1][j_resser-1]=1; |
141 |
duarte |
129 |
} |
142 |
|
|
return cm; |
143 |
|
|
} |
144 |
duarte |
159 |
|
145 |
duarte |
165 |
/** |
146 |
duarte |
179 |
* Gets a node's residue type given the residue serial |
147 |
|
|
* @param resser |
148 |
|
|
* @return |
149 |
|
|
*/ |
150 |
|
|
public String getResType(int resser){ |
151 |
|
|
return nodes.get(resser); |
152 |
|
|
} |
153 |
|
|
|
154 |
|
|
/** |
155 |
duarte |
165 |
* Gets node neighbourhood given a residue serial |
156 |
|
|
* @param resser |
157 |
|
|
* @return |
158 |
|
|
*/ |
159 |
duarte |
179 |
public NodeNbh getNodeNbh(int resser){ |
160 |
|
|
NodeNbh nbh = new NodeNbh(resser, getResType(resser)); |
161 |
duarte |
165 |
//this could be implemented using the contact map matrix and scanning through 1 column/row |
162 |
|
|
//it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations |
163 |
duarte |
179 |
//however we would then have the overhead of creating the matrix |
164 |
duarte |
165 |
for (Contact cont:contacts){ |
165 |
|
|
if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j)); |
166 |
|
|
if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i)); |
167 |
|
|
} |
168 |
|
|
return nbh; |
169 |
|
|
} |
170 |
|
|
|
171 |
|
|
/** |
172 |
|
|
* Gets edge neighbourhood (common neighbourhood) given a residue serial pair |
173 |
|
|
* @param i_resser |
174 |
|
|
* @param j_resser |
175 |
|
|
* @return |
176 |
|
|
*/ |
177 |
duarte |
179 |
public EdgeNbh getEdgeNbh(int i_resser, int j_resser){ |
178 |
|
|
EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser)); |
179 |
|
|
NodeNbh i_nbhd = getNodeNbh(i_resser); |
180 |
|
|
NodeNbh j_nbhd = getNodeNbh(j_resser); |
181 |
duarte |
175 |
if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap |
182 |
|
|
for (int resser:i_nbhd.keySet()) { |
183 |
|
|
if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser)); |
184 |
|
|
} |
185 |
|
|
} else { |
186 |
|
|
for (int resser:j_nbhd.keySet()) { |
187 |
|
|
if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser)); |
188 |
|
|
} |
189 |
duarte |
165 |
} |
190 |
|
|
return nbh; |
191 |
|
|
} |
192 |
|
|
|
193 |
duarte |
175 |
public void addEdge(Contact cont){ |
194 |
|
|
contacts.add(cont); |
195 |
|
|
numContacts++; |
196 |
|
|
modified=true; |
197 |
|
|
} |
198 |
|
|
|
199 |
|
|
public void delEdge(Contact cont){ |
200 |
|
|
contacts.remove(cont); |
201 |
|
|
numContacts--; |
202 |
|
|
modified=true; |
203 |
|
|
} |
204 |
|
|
|
205 |
|
|
public void restrictContactsToMaxRange(int range){ |
206 |
duarte |
179 |
ContactList edgesToDelete = new ContactList(); |
207 |
duarte |
175 |
for (Contact cont:contacts){ |
208 |
duarte |
179 |
if (cont.getRange()>range) edgesToDelete.add(cont); |
209 |
duarte |
175 |
} |
210 |
duarte |
179 |
for (Contact cont:edgesToDelete){ |
211 |
|
|
delEdge(cont); |
212 |
|
|
} |
213 |
duarte |
175 |
} |
214 |
|
|
|
215 |
|
|
public void restrictContactsToMinRange(int range){ |
216 |
duarte |
179 |
ContactList edgesToDelete = new ContactList(); |
217 |
duarte |
175 |
for (Contact cont:contacts){ |
218 |
duarte |
179 |
if (cont.getRange()<range) edgesToDelete.add(cont); |
219 |
duarte |
175 |
} |
220 |
duarte |
179 |
for (Contact cont:edgesToDelete){ |
221 |
|
|
delEdge(cont); |
222 |
|
|
} |
223 |
duarte |
175 |
} |
224 |
duarte |
189 |
|
225 |
duarte |
191 |
/** |
226 |
|
|
* Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map |
227 |
|
|
* @return |
228 |
|
|
*/ |
229 |
|
|
public HashMap<Contact,Integer> getAllEdgeNbhSizes() { |
230 |
|
|
HashMap<Contact,Integer> sizes = new HashMap<Contact, Integer>(); |
231 |
|
|
if (!directed) { |
232 |
|
|
for (int i=1; i<fullLength;i++){ |
233 |
|
|
for (int j=i+1; j<fullLength;j++){ |
234 |
|
|
int size = getEdgeNbh(i, j).size(); |
235 |
|
|
if (size>0) sizes.put(new Contact(i,j), size); |
236 |
|
|
} |
237 |
|
|
} |
238 |
|
|
} else { |
239 |
|
|
for (int i=1; i<fullLength;i++){ |
240 |
|
|
for (int j=1; j<fullLength;j++){ |
241 |
|
|
if (i!=j){ |
242 |
|
|
int size = getEdgeNbh(i, j).size(); |
243 |
|
|
if (size>0) sizes.put(new Contact(i,j), size); |
244 |
|
|
} |
245 |
|
|
} |
246 |
|
|
} |
247 |
|
|
} |
248 |
|
|
return sizes; |
249 |
|
|
} |
250 |
|
|
|
251 |
duarte |
189 |
//TODO not sure what kind of return we want, for now is a HashMap with three graph objects |
252 |
|
|
public HashMap<String,Graph> compare(Graph other) throws Exception{ |
253 |
|
|
//first check that other has same sequence than this, otherwise throw exception |
254 |
|
|
if (!this.sequence.equals(other.sequence)){ |
255 |
|
|
//TODO throw specific exception |
256 |
|
|
throw new Exception("Sequence of 2 graphs to compare differ, can't compare them."); |
257 |
|
|
} |
258 |
|
|
ContactList common = new ContactList(); |
259 |
|
|
ContactList onlythis = new ContactList(); |
260 |
|
|
ContactList onlyother = new ContactList(); |
261 |
|
|
for (Contact cont:this.contacts){ |
262 |
|
|
if (other.contacts.contains(cont)) { |
263 |
|
|
common.add(cont); |
264 |
|
|
} else{ |
265 |
|
|
onlythis.add(cont); |
266 |
|
|
} |
267 |
|
|
} |
268 |
|
|
for (Contact cont:other.contacts){ |
269 |
|
|
if (!this.contacts.contains(cont)){ |
270 |
|
|
onlyother.add(cont); |
271 |
|
|
} |
272 |
|
|
} |
273 |
duarte |
206 |
Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode); |
274 |
|
|
Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode); |
275 |
|
|
Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode); |
276 |
duarte |
189 |
HashMap<String,Graph> result = new HashMap<String,Graph>(); |
277 |
|
|
result.put("common", commongraph); |
278 |
|
|
result.put("onlythis", onlythisgraph); |
279 |
|
|
result.put("onlyother",onlyothergraph); |
280 |
|
|
return result; |
281 |
|
|
} |
282 |
duarte |
206 |
|
283 |
|
|
public boolean isModified(){ |
284 |
|
|
return modified; |
285 |
|
|
} |
286 |
|
|
|
287 |
|
|
public boolean isDirected(){ |
288 |
|
|
return directed; |
289 |
|
|
} |
290 |
|
|
|
291 |
|
|
public String getPdbCode() { |
292 |
|
|
return pdbCode; |
293 |
|
|
} |
294 |
|
|
|
295 |
|
|
public String getPdbChainCode(){ |
296 |
|
|
return pdbChainCode; |
297 |
|
|
} |
298 |
|
|
|
299 |
|
|
public String getChainCode(){ |
300 |
|
|
return chainCode; |
301 |
|
|
} |
302 |
|
|
|
303 |
|
|
public String getSequence(){ |
304 |
|
|
return sequence; |
305 |
|
|
} |
306 |
|
|
|
307 |
|
|
public int getFullLength(){ |
308 |
|
|
return fullLength; |
309 |
|
|
} |
310 |
|
|
|
311 |
|
|
public int getObsLength(){ |
312 |
|
|
return obsLength; |
313 |
|
|
} |
314 |
|
|
|
315 |
|
|
public int getNumContacts(){ |
316 |
|
|
// in theory we could return just numContacts, because we have taken care of updating it every time contacts changed |
317 |
|
|
// however we call directly contacts.size() as I feel is safer |
318 |
|
|
return contacts.size(); |
319 |
|
|
} |
320 |
|
|
|
321 |
|
|
public String getContactType() { |
322 |
|
|
return ct; |
323 |
|
|
} |
324 |
|
|
|
325 |
|
|
public double getCutoff(){ |
326 |
|
|
return cutoff; |
327 |
|
|
} |
328 |
duarte |
123 |
} |