ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 175
Committed: Fri May 25 17:31:58 2007 UTC (17 years, 9 months ago) by duarte
File size: 18502 byte(s)
Log Message:
FIXED BUG in initialisation of fullLength when reading from file. In case of no sequence and nodes provided, was not getting correctly the maximum value for contacts. Now using the method getMaxContact from new class ContactList
NEW FUNCTIONALITY in Graph:
-New member variable modified
-New methods addEdge, delEdge, restrictContactsToMaxRange, restrictContactsToMinRange, getContacts, getNodes, copy
-Improved slightly the implementation of getEdgeNbh 
FIXED BUG in initialisation of fullLenght when reading from file. In case of no sequence and nodes provided, was not getting correctly the maximum value for contacts. Now using the method getMaxContact from ContactList
New class ContactList
NEW FUNCTIONALITY in Graph:
-New member variable modified
-New methods addEdge, delEdge, restrictContactsToMaxRange, restrictContactsToMinRange, getContacts, getNodes, copy
-Improved slightly the implementation of getEdgeNbh 
New method getRange in Contact
Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 134 import java.io.BufferedReader;
3     import java.io.File;
4     import java.io.FileNotFoundException;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 134 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8     import java.io.IOException;
9 duarte 135 import java.sql.ResultSet;
10     import java.sql.SQLException;
11     import java.sql.Statement;
12 duarte 123 import java.util.ArrayList;
13 duarte 159 import java.util.Collections;
14 duarte 129 import java.util.TreeMap;
15 duarte 144 import java.util.regex.Matcher;
16     import java.util.regex.Pattern;
17 duarte 135 import tools.MySQLConnection;
18 duarte 123
19    
20     public class Graph {
21    
22 duarte 135 public final static String MYSQLSERVER="white";
23     public final static String MYSQLUSER=getUserName();
24     public final static String MYSQLPWD="nieve";
25 duarte 144
26     public final static String GRAPHFILEFORMATVERSION = "1.0";
27 duarte 135
28 duarte 175 ContactList contacts;
29 duarte 135 // nodes is a TreeMap of residue serials to residue types (3 letter code)
30 duarte 129 TreeMap<Integer,String> nodes;
31 duarte 159 public String sequence; // the full sequence (with unobserved residues and non-standard aas ='X')
32 duarte 143 public String accode;
33     public String chain;
34 duarte 159 public String chaincode="";
35 duarte 146 public double cutoff;
36     public String ct;
37 stehr 161 public boolean directed=false;
38 duarte 123
39 duarte 159 // fullLength is length of full sequence or:
40     // -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain
41     // -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain
42     public int fullLength;
43     public int obsLength; // length without unobserved, non standard aas
44    
45     public int numContacts;
46    
47 duarte 175 public boolean modified;
48    
49 duarte 135 // these 2 fields only used when reading from db
50     int graphid=0;
51     int sm_id=0;
52    
53 duarte 134 /**
54     * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
55     * Must also pass contact type, cutoff, accession code and chain
56     * @param contacts
57     * @param nodes
58     * @param sequence
59     * @param cutoff
60     * @param ct
61     * @param accode
62     * @param chain
63     */
64 duarte 175 public Graph (ContactList contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String accode, String chain, String chaincode) {
65 duarte 123 this.contacts=contacts;
66     this.cutoff=cutoff;
67 duarte 129 this.nodes=nodes;
68     this.sequence=sequence;
69     this.accode=accode;
70     this.chain=chain;
71 duarte 156 this.chaincode=chaincode;
72 duarte 123 this.ct=ct;
73 duarte 159 this.fullLength=sequence.length();
74     this.obsLength=nodes.size();
75     this.numContacts=contacts.size();
76 duarte 175 this.modified=false;
77 duarte 129 if (ct.contains("/")){
78     directed=true;
79     }
80 duarte 123 }
81 duarte 135
82     /**
83 duarte 141 * Constructs Graph object from graph db, given the dbname, accode, chaincode (classic pdb chain code), ct and cutoff
84 duarte 135 * @param dbname
85     * @param accode
86 duarte 141 * @param chaincode
87 duarte 135 * @param cutoff
88     * @param ct
89     */
90 duarte 141 public Graph(String dbname, String accode, String chaincode, double cutoff, String ct) throws GraphIdNotFoundError{
91 duarte 135 this.cutoff=cutoff;
92     this.accode=accode;
93     this.ct=ct;
94 duarte 142 // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
95     // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
96     this.sequence="";
97 duarte 135 //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
98     if (ct.contains("/")){
99     directed=true;
100     }
101     MySQLConnection conn = new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD,dbname);
102 duarte 141 getgraphid(conn, chaincode); // initialises graphid, sm_id and chain
103     read_graph_from_db(conn); // gets contacts, nodes and sequence
104 duarte 135 conn.close();
105 duarte 159 this.obsLength=nodes.size();
106     if (!sequence.equals("")){
107     this.fullLength=sequence.length();
108     } else {
109     // if nodes TreeMap has correct residue numbering then this should get the right full length,
110     // we will only miss: gaps (unobserved residues) at the end of the sequence. Those we can't know unless full sequence is given
111     this.fullLength=Collections.max(nodes.keySet());
112     }
113     this.numContacts=contacts.size();
114 duarte 175 this.modified=false;
115 duarte 135 }
116 duarte 152
117     /**
118     * Constructs Graph object from graph db, given the graphid
119     * @param dbname
120     * @param graphid
121     */
122     public Graph(String dbname,int graphid) throws GraphIdNotFoundError{
123     this.graphid=graphid;
124     // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
125     // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
126     this.sequence="";
127     MySQLConnection conn = new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD,dbname);
128     read_graph_from_db(conn); // gets contacts, nodes and sequence
129     get_db_graph_info(conn); // gets accode, chaincode, chain, ct and cutoff from db (from graph_id)
130     conn.close();
131     //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
132     if (ct.contains("/")){
133     directed=true;
134     }
135 duarte 159 this.obsLength=nodes.size();
136     if (!sequence.equals("")){
137     this.fullLength=sequence.length();
138     } else {
139     // if nodes TreeMap has correct residue numbering then this should get the right full length,
140     // we will only miss: gaps (unobserved residues) at the end of the sequence. Those we can't know unless full sequence is given
141     this.fullLength=Collections.max(nodes.keySet());
142     }
143     this.numContacts=contacts.size();
144 duarte 175 this.modified=false;
145 duarte 152 }
146 duarte 129
147 duarte 134 /**
148     * Constructs Graph object by reading a file with contacts
149 duarte 144 * If the contacts file doesn't have the sequence then the graph object won't have sequence or nodes
150     * That means it won't be possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
151 duarte 134 * @param contactsfile
152     * @throws IOException
153     * @throws FileNotFoundException
154     */
155 duarte 144 public Graph (String contactsfile) throws IOException, FileNotFoundException{
156 duarte 142 // we set the sequence to blank when we read from file as we don't have the full sequence
157 duarte 144 // if sequence is present in contactsfile then is read from there
158 duarte 142 this.sequence="";
159 duarte 144 this.ct="";
160     this.cutoff=0.0;
161 duarte 155 // we initialise accode, chain and chaincode to empty strings in case the file doesn't specify then
162     this.accode="";
163     this.chain="";
164     this.chaincode="";
165 duarte 134 if (ct.contains("/")){
166     directed=true;
167     }
168 duarte 159 read_graph_from_file(contactsfile); // initialises contacts, and nodes (only if sequence is given)
169     if (!sequence.equals("")){
170     this.fullLength=sequence.length();
171     this.obsLength=nodes.size();
172     } else {
173     // if contacts have correct residue numbering then this should get the right full length up to the maximum node that makes a contact,
174     // we will miss: nodes without contacts at the end of sequence and gaps (unobserved residues) at the end of the sequence.
175     // We don't know more without nodes and sequence
176 duarte 175 this.fullLength=contacts.getMaxNode();
177 duarte 159 // in this case nodes has not been initialised so we set obsLength=fullLength as we don't have the information
178     this.obsLength=fullLength;
179     }
180     this.numContacts=contacts.size();
181 duarte 175 this.modified=false;
182 duarte 134 }
183    
184 duarte 135 //TODO implement (from python) write_graph_to_db, do we really need it here??
185    
186     /** get user name from operating system (for use as database username) */
187     private static String getUserName() {
188     String user = null;
189     user = System.getProperty("user.name");
190     if(user == null) {
191     System.err.println("Could not get user name from operating system. Exiting");
192     System.exit(1);
193     }
194     return user;
195     }
196 duarte 144
197     public void read_graph_from_file (String contactsfile) throws FileNotFoundException, IOException {
198 duarte 175 contacts = new ContactList();
199 duarte 134 System.out.println("Reading contacts from file "+contactsfile);
200     BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
201     String line;
202     while ((line = fcont.readLine() ) != null ) {
203 duarte 144 Pattern p = Pattern.compile("^#");
204     Matcher m = p.matcher(line);
205     if (m.find()){
206     // Pattern ps = Pattern.compile("^#VER: (\\d\\.\\d)");
207     // Matcher ms = ps.matcher(line);
208     // if (ms.find()){
209     // if (!ms.group(1).equals(GRAPHFILEFORMATVERSION)){
210     // throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format version");
211     // }
212     // }
213     Pattern ps = Pattern.compile("^#SEQUENCE:\\s*(\\w+)$");
214     Matcher ms = ps.matcher(line);
215     if (ms.find()){
216     sequence=ms.group(1);
217     }
218     ps = Pattern.compile("^#PDB:\\s*(\\w+)");
219     ms = ps.matcher(line);
220     if (ms.find()){
221     accode=ms.group(1);
222     }
223     ps = Pattern.compile("^#PDB CHAIN CODE:\\s*(\\w)");
224     ms = ps.matcher(line);
225     if (ms.find()){
226     chaincode=ms.group(1);
227     }
228     ps = Pattern.compile("^#CHAIN:\\s*(\\w)");
229     ms = ps.matcher(line);
230     if (ms.find()){
231     chain=ms.group(1);
232     }
233     ps = Pattern.compile("^#CT:\\s*([a-zA-Z/]+)");
234     ms = ps.matcher(line);
235     if (ms.find()){
236     ct=ms.group(1);
237     }
238     ps = Pattern.compile("^#CUTOFF:\\s*(\\d+\\.\\d+)");
239     ms = ps.matcher(line);
240     if (ms.find()){
241     cutoff=Double.parseDouble(ms.group(1));
242     }
243     }
244     else{
245     int i = Integer.parseInt(line.split("\\s+")[0]);
246     int j = Integer.parseInt(line.split("\\s+")[1]);
247     contacts.add(new Contact(i,j));
248     }
249 duarte 134 }
250     fcont.close();
251 duarte 152 // if sequence was given we take nodes from it
252 duarte 144 nodes = new TreeMap<Integer, String>();
253     for (int i=0;i<sequence.length();i++){
254     String letter = String.valueOf(sequence.charAt(i));
255 duarte 152 nodes.put(i+1, AA.oneletter2threeletter(letter));
256 duarte 144 }
257    
258 duarte 134 }
259    
260 duarte 135 /**
261     * Reads contacts and nodes from db.
262     * The db must be a graph db following our standard format, i.e. must have tables:
263     * chain_graph, single_model_graph, single_model_node, single_model_edge
264     * We don't care here about the origin of the data (msdsd, pdbase, predicted) for the generation of the graph as long as it follows our data format
265     * We read both edges and nodes from single_model_edge and single_model_node.
266 duarte 152 * The sequence is set to blank, as we can't get the full sequence from graph db
267 duarte 135 * @param conn
268     */
269     public void read_graph_from_db(MySQLConnection conn){
270 duarte 175 contacts = new ContactList();
271 duarte 135 nodes = new TreeMap<Integer, String>();
272     try {
273 duarte 142 // we read only half of the matrix (contacts in one direction only) so that we have the same type of contacts as when creating Graph from Pdb object
274 duarte 135 String sql="SELECT i_num,j_num FROM single_model_edge WHERE graph_id="+graphid+" AND j_num>i_num ORDER BY i_num,j_num ";
275     Statement stmt = conn.createStatement();
276     ResultSet rsst = stmt.executeQuery(sql);
277     while (rsst.next()) {
278     int i=rsst.getInt(1);
279     int j=rsst.getInt(2);
280     contacts.add(new Contact(i,j));
281     }
282     rsst.close();
283     stmt.close();
284     sql="SELECT num,res FROM single_model_node WHERE graph_id="+graphid+" ORDER BY num ";
285     stmt = conn.createStatement();
286     rsst = stmt.executeQuery(sql);
287     while (rsst.next()){
288     int num=rsst.getInt(1);
289     String res=rsst.getString(2);
290     nodes.put(num, AA.oneletter2threeletter(res));
291     }
292     rsst.close();
293     stmt.close();
294     } catch (SQLException e) {
295     e.printStackTrace();
296     }
297    
298     }
299    
300 duarte 141 public void getgraphid (MySQLConnection conn, String chaincode) throws GraphIdNotFoundError{
301     // input is chaincode i.e. pdb chain code
302     // we take chain (internal chain identifier, pchain_code for msdsd and asym_id for pdbase) from pchain_code field in chain_graph
303     // (in the chain_graph table the internal chain identifier is called 'pchain_code')
304 duarte 135 int pgraphid=0;
305 duarte 141 String chainstr="='"+chaincode+"' ";
306     if (chaincode.equals("NULL")){
307     chainstr=" IS NULL ";
308     }
309 duarte 135 try {
310 duarte 141 String sql="SELECT graph_id, pchain_code FROM chain_graph WHERE accession_code='"+accode+"' AND chain_pdb_code"+chainstr+" AND dist="+cutoff;
311 duarte 135 Statement stmt = conn.createStatement();
312     ResultSet rsst = stmt.executeQuery(sql);
313     int check=0;
314     while (rsst.next()) {
315     check++;
316     pgraphid=rsst.getInt(1);
317 duarte 141 chain=rsst.getString(2);
318 duarte 135 }
319     if (check!=1){
320 duarte 141 System.err.println("No pgraph_id match or more than 1 match for accession_code="+accode+", chain_pdb_code="+chaincode+", dist="+cutoff);
321 duarte 135 }
322     rsst.close();
323     stmt.close();
324     // we set the ctstr to the same as ct except in ALL case, where it is BB+SC+BB/SC
325     String ctstr=ct;
326     if (ct.equals("ALL")){
327     ctstr="BB+SC+BB/SC";
328     }
329     sql="SELECT graph_id,single_model_id FROM single_model_graph WHERE pgraph_id="+pgraphid+" AND CT='"+ctstr+"' AND dist="+cutoff+" AND CR='(true)' AND CW=1";
330     stmt = conn.createStatement();
331     rsst = stmt.executeQuery(sql);
332     check=0;
333     while (rsst.next()){
334     check++;
335     graphid=rsst.getInt(1);
336     sm_id=rsst.getInt(2);
337     }
338     if (check!=1){
339     System.err.println("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
340     throw new GraphIdNotFoundError("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
341     }
342     } catch (SQLException e) {
343     e.printStackTrace();
344     }
345    
346     }
347    
348 duarte 152 public void get_db_graph_info(MySQLConnection conn) throws GraphIdNotFoundError {
349     try {
350     int pgraphid=0;
351     String sql="SELECT pgraph_id,CT,dist FROM single_model_graph WHERE graph_id="+graphid;
352     Statement stmt = conn.createStatement();
353     ResultSet rsst = stmt.executeQuery(sql);
354     int check=0;
355     while (rsst.next()) {
356     check++;
357     pgraphid=rsst.getInt(1);
358     ct=rsst.getString(2);
359     if (ct.equals("BB+SC+BB/SC")) ct="ALL";
360     cutoff=rsst.getDouble(3);
361     }
362     if (check!=1){
363     System.err.println("No pgraph_id match or more than 1 match for graph_id="+graphid);
364     throw new GraphIdNotFoundError("No pgraph_id match or more than 1 match for graph_id="+graphid+" in db"+conn.getDbname());
365     }
366     rsst.close();
367     stmt.close();
368     sql="SELECT accession_code, chain_pdb_code, pchain_code FROM chain_graph WHERE graph_id="+pgraphid;
369     stmt = conn.createStatement();
370     rsst = stmt.executeQuery(sql);
371     check=0;
372     while (rsst.next()){
373     check++;
374     accode=rsst.getString(1);
375     chaincode=rsst.getString(2);
376 duarte 158 // java returns a null if the field is a database null, we want actually the "NULL" string in that case
377     if (chaincode==null) chaincode="NULL";
378 duarte 152 chain=rsst.getString(3);
379     }
380     if (check!=1){
381     System.err.println("No accession_code+chain_pdb_code+pchain_code match or more than 1 match for graph_id="+pgraphid+" in chain_graph table");
382     }
383     rsst.close();
384     stmt.close();
385     } catch (SQLException e) {
386     e.printStackTrace();
387     }
388    
389     }
390    
391 duarte 123 public void write_contacts_to_file (String outfile) throws IOException {
392     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
393     for (Contact pair:contacts){
394     int i_resser=pair.i;
395     int j_resser=pair.j;
396     Out.println(i_resser+"\t"+j_resser);
397     }
398     Out.close();
399     }
400 duarte 144
401     public void write_graph_to_file (String outfile) throws IOException {
402     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
403     Out.println("#VER: "+GRAPHFILEFORMATVERSION);
404     Out.println("#SEQUENCE: "+sequence);
405     Out.println("#PDB: "+accode);
406     Out.println("#PDB CHAIN CODE: "+chaincode);
407     Out.println("#CHAIN: "+chain);
408     Out.println("#CT: "+ct);
409     Out.println("#CUTOFF: "+cutoff);
410     for (Contact pair:contacts){
411     int i_resser=pair.i;
412     int j_resser=pair.j;
413     Out.println(i_resser+"\t"+j_resser);
414     }
415     Out.close();
416     }
417 duarte 175
418 duarte 159 /**
419 duarte 175 * Gets list of contacts as a new ContactList (deep copied)
420     *
421     */
422     public ContactList getContacts(){
423     ContactList newContacts = new ContactList();
424     for (Contact cont:contacts){
425     newContacts.add(new Contact(cont.i,cont.j));
426     }
427     return newContacts;
428     }
429    
430     /**
431     * Gets TreeMap of nodes, deep copying
432     *
433     */
434     public TreeMap<Integer,String> getNodes(){
435     TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>();
436     for (int resser:nodes.keySet()){
437     newNodes.put(resser, nodes.get(resser));
438     }
439     return newNodes;
440     }
441    
442     /**
443     * Deep copies this Graph object returning new one
444     * @return
445     */
446     public Graph copy(){
447     return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,accode,chain,chaincode);
448     }
449    
450     /**
451 duarte 159 * Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map
452     * In non-crossed cases this should give us the upper half matrix (contacts are only j>i)
453     * In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed)
454     * @return
455     */
456     public int[][] getIntMatrix(){
457     // this initialises the matrix to 0 (i.e. no contact)
458     int[][] cm = new int[fullLength][fullLength];
459     // we put a 1 for all given contacts
460     for (Contact cont:contacts){
461     int i_resser = cont.i;
462     int j_resser = cont.j;
463     cm[i_resser-1][j_resser-1]=1;
464 duarte 129 }
465     return cm;
466     }
467 duarte 159
468 duarte 165 /**
469     * Gets node neighbourhood given a residue serial
470     * @param resser
471     * @return
472     */
473     public TreeMap<Integer,String> getNodeNbh(int resser){
474     TreeMap<Integer,String> nbh = new TreeMap<Integer, String>();
475     //this could be implemented using the contact map matrix and scanning through 1 column/row
476     //it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations
477     for (Contact cont:contacts){
478     if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j));
479     if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i));
480     }
481     return nbh;
482     }
483    
484     /**
485     * Gets edge neighbourhood (common neighbourhood) given a residue serial pair
486     * @param i_resser
487     * @param j_resser
488     * @return
489     */
490     public TreeMap<Integer,String> getEdgeNbh(int i_resser, int j_resser){
491     TreeMap<Integer,String> nbh = new TreeMap<Integer, String>();
492     TreeMap<Integer,String> i_nbhd = getNodeNbh(i_resser);
493     TreeMap<Integer,String> j_nbhd = getNodeNbh(j_resser);
494 duarte 175 if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap
495     for (int resser:i_nbhd.keySet()) {
496     if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser));
497     }
498     } else {
499     for (int resser:j_nbhd.keySet()) {
500     if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser));
501     }
502 duarte 165 }
503     return nbh;
504     }
505    
506 duarte 175 public void addEdge(Contact cont){
507     contacts.add(cont);
508     numContacts++;
509     modified=true;
510     }
511    
512     public void delEdge(Contact cont){
513     contacts.remove(cont);
514     numContacts--;
515     modified=true;
516     }
517    
518     public void restrictContactsToMaxRange(int range){
519     for (Contact cont:contacts){
520     if (cont.getRange()>range) delEdge(cont);
521     }
522     }
523    
524     public void restrictContactsToMinRange(int range){
525     for (Contact cont:contacts){
526     if (cont.getRange()<range) delEdge(cont);
527     }
528     }
529    
530 duarte 123 }