ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 207
Committed: Wed Jun 27 11:06:34 2007 UTC (17 years, 3 months ago) by duarte
File size: 9771 byte(s)
Log Message:
Restructured construction of Pdb and Graph objects: now subclasses for each case
Cleaned up and made consistent database connections
Now can also pass a MySQLConnection in all cases (as well as having default values for a default connection)
PdbaseInfo and MsdsdInfo classes removed: now merged into PdbasePdb and MsdsdPdb respectively
Updated following this changes testPdb and compareCMs

Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 191
3 duarte 123 import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6 duarte 129 import java.util.TreeMap;
7 duarte 189 import java.util.HashMap;
8 duarte 123
9 duarte 207 /**
10     * A residue interaction graph derived from a single chain pdb protein structure
11     *
12     * @author Jose Duarte
13     * Class: Graph
14     * Package: proteinstructure
15     */
16 duarte 123 public class Graph {
17    
18 duarte 144 public final static String GRAPHFILEFORMATVERSION = "1.0";
19 duarte 206
20 duarte 207 public ContactList contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it)
21 duarte 123
22 duarte 207 protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code)
23     protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X')
24     protected String pdbCode;
25     protected String chainCode;
26     protected String pdbChainCode="";
27     protected double cutoff;
28     protected String ct; // the contact type
29     protected boolean directed=false;
30    
31 duarte 159 // fullLength is length of full sequence or:
32     // -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain
33     // -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain
34 duarte 207 protected int fullLength;
35     protected int obsLength; // length without unobserved, non standard aas
36 duarte 159
37 duarte 207 protected int numContacts;
38 duarte 159
39 duarte 207 protected boolean modified;
40 duarte 175
41 duarte 207 public Graph() {
42    
43     }
44    
45 duarte 134 /**
46     * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
47 duarte 206 * Must also pass contact type, cutoff, pdbCode and chainCode
48 duarte 134 * @param contacts
49     * @param nodes
50     * @param sequence
51     * @param cutoff
52     * @param ct
53 duarte 206 * @param pdbCode
54     * @param chainCode
55 duarte 134 */
56 duarte 206 public Graph (ContactList contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode) {
57 duarte 123 this.contacts=contacts;
58     this.cutoff=cutoff;
59 duarte 129 this.nodes=nodes;
60     this.sequence=sequence;
61 duarte 206 this.pdbCode=pdbCode;
62     this.chainCode=chainCode;
63     this.pdbChainCode=pdbChainCode;
64 duarte 123 this.ct=ct;
65 duarte 159 this.fullLength=sequence.length();
66     this.obsLength=nodes.size();
67     this.numContacts=contacts.size();
68 duarte 175 this.modified=false;
69 duarte 129 if (ct.contains("/")){
70     directed=true;
71     }
72 duarte 123 }
73 duarte 135
74 duarte 129
75 duarte 135 //TODO implement (from python) write_graph_to_db, do we really need it here??
76    
77 duarte 123 public void write_contacts_to_file (String outfile) throws IOException {
78     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
79     for (Contact pair:contacts){
80     int i_resser=pair.i;
81     int j_resser=pair.j;
82     Out.println(i_resser+"\t"+j_resser);
83     }
84     Out.close();
85     }
86 duarte 144
87     public void write_graph_to_file (String outfile) throws IOException {
88     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
89     Out.println("#VER: "+GRAPHFILEFORMATVERSION);
90     Out.println("#SEQUENCE: "+sequence);
91 duarte 206 Out.println("#PDB: "+pdbCode);
92     Out.println("#PDB CHAIN CODE: "+pdbChainCode);
93     Out.println("#CHAIN: "+chainCode);
94 duarte 144 Out.println("#CT: "+ct);
95     Out.println("#CUTOFF: "+cutoff);
96     for (Contact pair:contacts){
97     int i_resser=pair.i;
98     int j_resser=pair.j;
99     Out.println(i_resser+"\t"+j_resser);
100     }
101     Out.close();
102     }
103 duarte 175
104 duarte 159 /**
105 duarte 175 * Gets list of contacts as a new ContactList (deep copied)
106     *
107     */
108     public ContactList getContacts(){
109     ContactList newContacts = new ContactList();
110     for (Contact cont:contacts){
111     newContacts.add(new Contact(cont.i,cont.j));
112     }
113     return newContacts;
114     }
115    
116     /**
117     * Gets TreeMap of nodes, deep copying
118     *
119     */
120     public TreeMap<Integer,String> getNodes(){
121     TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>();
122     for (int resser:nodes.keySet()){
123     newNodes.put(resser, nodes.get(resser));
124     }
125     return newNodes;
126     }
127    
128     /**
129     * Deep copies this Graph object returning new one
130     * @return
131     */
132     public Graph copy(){
133 duarte 206 return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
134 duarte 175 }
135    
136     /**
137 duarte 159 * Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map
138     * In non-crossed cases this should give us the upper half matrix (contacts are only j>i)
139     * In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed)
140     * @return
141     */
142     public int[][] getIntMatrix(){
143     // this initialises the matrix to 0 (i.e. no contact)
144     int[][] cm = new int[fullLength][fullLength];
145     // we put a 1 for all given contacts
146     for (Contact cont:contacts){
147     int i_resser = cont.i;
148     int j_resser = cont.j;
149     cm[i_resser-1][j_resser-1]=1;
150 duarte 129 }
151     return cm;
152     }
153 duarte 159
154 duarte 165 /**
155 duarte 179 * Gets a node's residue type given the residue serial
156     * @param resser
157     * @return
158     */
159     public String getResType(int resser){
160     return nodes.get(resser);
161     }
162    
163     /**
164 duarte 165 * Gets node neighbourhood given a residue serial
165     * @param resser
166     * @return
167     */
168 duarte 179 public NodeNbh getNodeNbh(int resser){
169     NodeNbh nbh = new NodeNbh(resser, getResType(resser));
170 duarte 165 //this could be implemented using the contact map matrix and scanning through 1 column/row
171     //it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations
172 duarte 179 //however we would then have the overhead of creating the matrix
173 duarte 165 for (Contact cont:contacts){
174     if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j));
175     if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i));
176     }
177     return nbh;
178     }
179    
180     /**
181     * Gets edge neighbourhood (common neighbourhood) given a residue serial pair
182     * @param i_resser
183     * @param j_resser
184     * @return
185     */
186 duarte 179 public EdgeNbh getEdgeNbh(int i_resser, int j_resser){
187     EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser));
188     NodeNbh i_nbhd = getNodeNbh(i_resser);
189     NodeNbh j_nbhd = getNodeNbh(j_resser);
190 duarte 175 if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap
191     for (int resser:i_nbhd.keySet()) {
192     if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser));
193     }
194     } else {
195     for (int resser:j_nbhd.keySet()) {
196     if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser));
197     }
198 duarte 165 }
199     return nbh;
200     }
201    
202 duarte 175 public void addEdge(Contact cont){
203     contacts.add(cont);
204     numContacts++;
205     modified=true;
206     }
207    
208     public void delEdge(Contact cont){
209     contacts.remove(cont);
210     numContacts--;
211     modified=true;
212     }
213    
214     public void restrictContactsToMaxRange(int range){
215 duarte 179 ContactList edgesToDelete = new ContactList();
216 duarte 175 for (Contact cont:contacts){
217 duarte 179 if (cont.getRange()>range) edgesToDelete.add(cont);
218 duarte 175 }
219 duarte 179 for (Contact cont:edgesToDelete){
220     delEdge(cont);
221     }
222 duarte 175 }
223    
224     public void restrictContactsToMinRange(int range){
225 duarte 179 ContactList edgesToDelete = new ContactList();
226 duarte 175 for (Contact cont:contacts){
227 duarte 179 if (cont.getRange()<range) edgesToDelete.add(cont);
228 duarte 175 }
229 duarte 179 for (Contact cont:edgesToDelete){
230     delEdge(cont);
231     }
232 duarte 175 }
233 duarte 189
234 duarte 191 /**
235     * Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map
236     * @return
237     */
238     public HashMap<Contact,Integer> getAllEdgeNbhSizes() {
239     HashMap<Contact,Integer> sizes = new HashMap<Contact, Integer>();
240     if (!directed) {
241     for (int i=1; i<fullLength;i++){
242     for (int j=i+1; j<fullLength;j++){
243     int size = getEdgeNbh(i, j).size();
244     if (size>0) sizes.put(new Contact(i,j), size);
245     }
246     }
247     } else {
248     for (int i=1; i<fullLength;i++){
249     for (int j=1; j<fullLength;j++){
250     if (i!=j){
251     int size = getEdgeNbh(i, j).size();
252     if (size>0) sizes.put(new Contact(i,j), size);
253     }
254     }
255     }
256     }
257     return sizes;
258     }
259    
260 duarte 189 //TODO not sure what kind of return we want, for now is a HashMap with three graph objects
261     public HashMap<String,Graph> compare(Graph other) throws Exception{
262     //first check that other has same sequence than this, otherwise throw exception
263     if (!this.sequence.equals(other.sequence)){
264     //TODO throw specific exception
265     throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
266     }
267     ContactList common = new ContactList();
268     ContactList onlythis = new ContactList();
269     ContactList onlyother = new ContactList();
270     for (Contact cont:this.contacts){
271     if (other.contacts.contains(cont)) {
272     common.add(cont);
273     } else{
274     onlythis.add(cont);
275     }
276     }
277     for (Contact cont:other.contacts){
278     if (!this.contacts.contains(cont)){
279     onlyother.add(cont);
280     }
281     }
282 duarte 206 Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
283     Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
284     Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode);
285 duarte 189 HashMap<String,Graph> result = new HashMap<String,Graph>();
286     result.put("common", commongraph);
287     result.put("onlythis", onlythisgraph);
288     result.put("onlyother",onlyothergraph);
289     return result;
290     }
291 duarte 206
292     public boolean isModified(){
293     return modified;
294     }
295    
296     public boolean isDirected(){
297     return directed;
298     }
299    
300     public String getPdbCode() {
301     return pdbCode;
302     }
303    
304     public String getPdbChainCode(){
305     return pdbChainCode;
306     }
307    
308     public String getChainCode(){
309     return chainCode;
310     }
311    
312     public String getSequence(){
313     return sequence;
314     }
315    
316     public int getFullLength(){
317     return fullLength;
318     }
319    
320     public int getObsLength(){
321     return obsLength;
322     }
323    
324     public int getNumContacts(){
325     // in theory we could return just numContacts, because we have taken care of updating it every time contacts changed
326     // however we call directly contacts.size() as I feel is safer
327     return contacts.size();
328     }
329    
330     public String getContactType() {
331     return ct;
332     }
333    
334     public double getCutoff(){
335     return cutoff;
336     }
337 duarte 123 }