ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 234
Committed: Tue Jul 10 14:14:58 2007 UTC (17 years, 2 months ago) by duarte
File size: 10826 byte(s)
Log Message:
Made ContactList a TreeSet instead of an ArrayList, should improve performance (log(n) rather than linear)
REFACTORING: 
ContactList -> EdgeSet
Contact -> Edge
Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 191
3 duarte 123 import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6 duarte 129 import java.util.TreeMap;
7 duarte 189 import java.util.HashMap;
8 duarte 123
9 duarte 207 /**
10     * A residue interaction graph derived from a single chain pdb protein structure
11     *
12     * @author Jose Duarte
13     * Class: Graph
14     * Package: proteinstructure
15     */
16 duarte 123 public class Graph {
17    
18 duarte 144 public final static String GRAPHFILEFORMATVERSION = "1.0";
19 duarte 206
20 duarte 234 public EdgeSet contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it)
21 duarte 123
22 duarte 207 protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code)
23     protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X')
24     protected String pdbCode;
25     protected String chainCode;
26 duarte 208 protected String pdbChainCode;
27 duarte 207 protected double cutoff;
28     protected String ct; // the contact type
29 duarte 208 protected boolean directed;
30 duarte 207
31 duarte 159 // fullLength is length of full sequence or:
32     // -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain
33     // -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain
34 duarte 207 protected int fullLength;
35     protected int obsLength; // length without unobserved, non standard aas
36 duarte 159
37 duarte 207 protected int numContacts;
38 duarte 159
39 duarte 207 protected boolean modified;
40 duarte 175
41 duarte 207 public Graph() {
42    
43     }
44    
45 duarte 134 /**
46     * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
47 duarte 206 * Must also pass contact type, cutoff, pdbCode and chainCode
48 duarte 134 * @param contacts
49     * @param nodes
50     * @param sequence
51     * @param cutoff
52     * @param ct
53 duarte 206 * @param pdbCode
54     * @param chainCode
55 duarte 134 */
56 duarte 234 protected Graph (EdgeSet contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode) {
57 duarte 123 this.contacts=contacts;
58     this.cutoff=cutoff;
59 duarte 129 this.nodes=nodes;
60     this.sequence=sequence;
61 duarte 206 this.pdbCode=pdbCode;
62     this.chainCode=chainCode;
63     this.pdbChainCode=pdbChainCode;
64 duarte 123 this.ct=ct;
65 duarte 159 this.fullLength=sequence.length();
66     this.obsLength=nodes.size();
67     this.numContacts=contacts.size();
68 duarte 175 this.modified=false;
69 duarte 208 this.directed=false;
70 duarte 129 if (ct.contains("/")){
71     directed=true;
72     }
73 stehr 217
74     assert(this.pdbCode.equals(this.pdbCode.toLowerCase())); // pdb codes should be always lower case
75     assert(this.pdbChainCode.equals(this.pdbChainCode.toUpperCase())); // pdb chain codes should be always upper case
76 duarte 123 }
77 duarte 135
78 duarte 129
79 duarte 135 //TODO implement (from python) write_graph_to_db, do we really need it here??
80    
81 duarte 144 public void write_graph_to_file (String outfile) throws IOException {
82     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
83 duarte 208 Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION);
84 duarte 144 Out.println("#SEQUENCE: "+sequence);
85 duarte 206 Out.println("#PDB: "+pdbCode);
86     Out.println("#PDB CHAIN CODE: "+pdbChainCode);
87     Out.println("#CHAIN: "+chainCode);
88 duarte 144 Out.println("#CT: "+ct);
89     Out.println("#CUTOFF: "+cutoff);
90 duarte 234 for (Edge pair:contacts){
91 duarte 144 int i_resser=pair.i;
92     int j_resser=pair.j;
93     Out.println(i_resser+"\t"+j_resser);
94     }
95     Out.close();
96     }
97 duarte 175
98 duarte 159 /**
99 duarte 234 * Gets list of contacts as a new EdgeSet (deep copied)
100 duarte 175 *
101     */
102 duarte 234 public EdgeSet getContacts(){
103     EdgeSet newContacts = new EdgeSet();
104     for (Edge cont:contacts){
105     newContacts.add(new Edge(cont.i,cont.j));
106 duarte 175 }
107     return newContacts;
108     }
109    
110     /**
111     * Gets TreeMap of nodes, deep copying
112     *
113     */
114     public TreeMap<Integer,String> getNodes(){
115     TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>();
116     for (int resser:nodes.keySet()){
117     newNodes.put(resser, nodes.get(resser));
118     }
119     return newNodes;
120     }
121    
122     /**
123     * Deep copies this Graph object returning new one
124     * @return
125     */
126     public Graph copy(){
127 duarte 206 return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
128 duarte 175 }
129    
130     /**
131 duarte 232 * Gets a reference to this Graph deep copying contacts but re-referencing nodes
132     * @return
133     */
134     public Graph copyKeepingNodes(){
135     return new Graph(getContacts(),nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
136     }
137    
138     /**
139 duarte 159 * Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map
140     * In non-crossed cases this should give us the upper half matrix (contacts are only j>i)
141     * In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed)
142     * @return
143     */
144     public int[][] getIntMatrix(){
145     // this initialises the matrix to 0 (i.e. no contact)
146     int[][] cm = new int[fullLength][fullLength];
147     // we put a 1 for all given contacts
148 duarte 234 for (Edge cont:contacts){
149 duarte 159 int i_resser = cont.i;
150     int j_resser = cont.j;
151     cm[i_resser-1][j_resser-1]=1;
152 duarte 129 }
153     return cm;
154     }
155 duarte 159
156 duarte 165 /**
157 duarte 179 * Gets a node's residue type given the residue serial
158     * @param resser
159     * @return
160     */
161     public String getResType(int resser){
162     return nodes.get(resser);
163     }
164    
165     /**
166 duarte 165 * Gets node neighbourhood given a residue serial
167     * @param resser
168     * @return
169     */
170 duarte 179 public NodeNbh getNodeNbh(int resser){
171     NodeNbh nbh = new NodeNbh(resser, getResType(resser));
172 duarte 165 //this could be implemented using the contact map matrix and scanning through 1 column/row
173     //it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations
174 duarte 179 //however we would then have the overhead of creating the matrix
175 duarte 234 for (Edge cont:contacts){
176 duarte 165 if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j));
177     if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i));
178     }
179     return nbh;
180     }
181    
182     /**
183     * Gets edge neighbourhood (common neighbourhood) given a residue serial pair
184     * @param i_resser
185     * @param j_resser
186     * @return
187     */
188 duarte 179 public EdgeNbh getEdgeNbh(int i_resser, int j_resser){
189     EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser));
190     NodeNbh i_nbhd = getNodeNbh(i_resser);
191     NodeNbh j_nbhd = getNodeNbh(j_resser);
192 duarte 175 if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap
193     for (int resser:i_nbhd.keySet()) {
194     if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser));
195     }
196     } else {
197     for (int resser:j_nbhd.keySet()) {
198     if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser));
199     }
200 duarte 165 }
201     return nbh;
202     }
203 duarte 232
204     /**
205     * Gets 2nd shell node neighbourhood
206     * @param resser
207     */
208     public NodeNbh get2ndshellNodeNbh(int resser){
209     // first we create a NodeNbh object for the second shell, central residue is given resser
210     NodeNbh nbh2ndshell = new NodeNbh(resser,getResType(resser));
211     // we get 1st neighbourhood
212     NodeNbh nbh = this.getNodeNbh(resser);
213     for (int nb:nbh.keySet()){
214     NodeNbh nbh2 = this.getNodeNbh(nb); // for each first neighbour we take its neighbourhood
215     for (int nb2:nbh2.keySet()){
216     if (nb2!=resser && !nbh.containsKey(nb2)){ // if the 2nd neighbour nb2 is not the given resser or is not a 1st neighbour
217     nbh2ndshell.put(nb2, getResType(nb2));
218     }
219     }
220     }
221     return nbh2ndshell;
222     }
223    
224 duarte 234 public void addEdge(Edge cont){
225     contacts.add(cont); // contacts is a TreeSet and thus takes care of duplicates
226     int oldNumContacts = numContacts;
227     numContacts=getNumContacts();
228     // if number of contacts changed that means we actually added a new contact and thus we modified the graph
229     if (numContacts!=oldNumContacts) modified=true;
230    
231 duarte 175 }
232    
233 duarte 234 public void delEdge(Edge cont){
234 duarte 175 contacts.remove(cont);
235     numContacts--;
236     modified=true;
237     }
238    
239     public void restrictContactsToMaxRange(int range){
240 duarte 234 EdgeSet edgesToDelete = new EdgeSet();
241     for (Edge cont:contacts){
242 duarte 179 if (cont.getRange()>range) edgesToDelete.add(cont);
243 duarte 175 }
244 duarte 234 for (Edge cont:edgesToDelete){
245 duarte 179 delEdge(cont);
246     }
247 duarte 175 }
248    
249     public void restrictContactsToMinRange(int range){
250 duarte 234 EdgeSet edgesToDelete = new EdgeSet();
251     for (Edge cont:contacts){
252 duarte 179 if (cont.getRange()<range) edgesToDelete.add(cont);
253 duarte 175 }
254 duarte 234 for (Edge cont:edgesToDelete){
255 duarte 179 delEdge(cont);
256     }
257 duarte 175 }
258 duarte 189
259 duarte 191 /**
260     * Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map
261     * @return
262     */
263 duarte 234 public HashMap<Edge,Integer> getAllEdgeNbhSizes() {
264     HashMap<Edge,Integer> sizes = new HashMap<Edge, Integer>();
265 duarte 191 if (!directed) {
266     for (int i=1; i<fullLength;i++){
267     for (int j=i+1; j<fullLength;j++){
268     int size = getEdgeNbh(i, j).size();
269 duarte 234 if (size>0) sizes.put(new Edge(i,j), size);
270 duarte 191 }
271     }
272     } else {
273     for (int i=1; i<fullLength;i++){
274     for (int j=1; j<fullLength;j++){
275     if (i!=j){
276     int size = getEdgeNbh(i, j).size();
277 duarte 234 if (size>0) sizes.put(new Edge(i,j), size);
278 duarte 191 }
279     }
280     }
281     }
282     return sizes;
283     }
284    
285 duarte 189 //TODO not sure what kind of return we want, for now is a HashMap with three graph objects
286     public HashMap<String,Graph> compare(Graph other) throws Exception{
287     //first check that other has same sequence than this, otherwise throw exception
288     if (!this.sequence.equals(other.sequence)){
289     //TODO throw specific exception
290     throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
291     }
292 duarte 234 EdgeSet common = new EdgeSet();
293     EdgeSet onlythis = new EdgeSet();
294     EdgeSet onlyother = new EdgeSet();
295     for (Edge cont:this.contacts){
296 duarte 189 if (other.contacts.contains(cont)) {
297     common.add(cont);
298     } else{
299     onlythis.add(cont);
300     }
301     }
302 duarte 234 for (Edge cont:other.contacts){
303 duarte 189 if (!this.contacts.contains(cont)){
304     onlyother.add(cont);
305     }
306     }
307 duarte 206 Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
308     Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
309     Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode);
310 duarte 189 HashMap<String,Graph> result = new HashMap<String,Graph>();
311     result.put("common", commongraph);
312     result.put("onlythis", onlythisgraph);
313     result.put("onlyother",onlyothergraph);
314     return result;
315     }
316 duarte 206
317     public boolean isModified(){
318     return modified;
319     }
320    
321     public boolean isDirected(){
322     return directed;
323     }
324    
325     public String getPdbCode() {
326     return pdbCode;
327     }
328    
329     public String getPdbChainCode(){
330     return pdbChainCode;
331     }
332    
333     public String getChainCode(){
334     return chainCode;
335     }
336    
337     public String getSequence(){
338     return sequence;
339     }
340    
341     public int getFullLength(){
342     return fullLength;
343     }
344    
345     public int getObsLength(){
346     return obsLength;
347     }
348    
349     public int getNumContacts(){
350     // in theory we could return just numContacts, because we have taken care of updating it every time contacts changed
351     // however we call directly contacts.size() as I feel is safer
352     return contacts.size();
353     }
354    
355     public String getContactType() {
356     return ct;
357     }
358    
359     public double getCutoff(){
360     return cutoff;
361     }
362 duarte 123 }