ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/DbGraph.java
Revision: 234
Committed: Tue Jul 10 14:14:58 2007 UTC (17 years, 2 months ago) by duarte
File size: 10140 byte(s)
Log Message:
Made ContactList a TreeSet instead of an ArrayList, should improve performance (log(n) rather than linear)
REFACTORING: 
ContactList -> EdgeSet
Contact -> Edge
Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.sql.ResultSet;
4     import java.sql.SQLException;
5     import java.sql.Statement;
6     import java.util.Collections;
7     import java.util.TreeMap;
8    
9     import tools.MySQLConnection;
10    
11     /**
12     * A residue interaction graph derived from a single chain pdb protein structure loaded from a graph database in aglappe's format
13     *
14     * @author Jose Duarte
15     * Class: DbGraph
16     * Package: proteinstructure
17     */
18     public class DbGraph extends Graph {
19    
20     private final static String MYSQLSERVER="white";
21     private final static String MYSQLUSER=MySQLConnection.getUserName();
22     private final static String MYSQLPWD="nieve";
23    
24 duarte 208 private final static String DEFAULT_CR ="(true)"; // default contact range (CR field in graph db)
25     private final static String DEFAULT_CW ="1"; // default contact weight (CW field in graph db)
26    
27 duarte 207 private int graphid=0;
28     //private int sm_id=0; // for future use
29    
30     private String dbname;
31     private MySQLConnection conn;
32    
33     /**
34     * Constructs Graph object from graph db, given the dbname, pdbCode, pdbChainCode (classic pdb chain code), ct and cutoff
35     * and passing a MySQLConnection
36     * @param dbname
37     * @param conn
38     * @param pdbCode
39     * @param pdbChainCode
40     * @param cutoff
41     * @param ct
42     * @throws GraphIdNotFoundError
43 duarte 210 * @throws SQLException
44 duarte 207 */
45 duarte 210 public DbGraph(String dbname, MySQLConnection conn, String pdbCode, String pdbChainCode, double cutoff, String ct) throws GraphIdNotFoundError, SQLException {
46 duarte 207 this.dbname=dbname;
47     this.conn=conn;
48     this.cutoff=cutoff;
49 stehr 217 this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case
50     this.pdbChainCode=pdbChainCode.toUpperCase(); // our convention: chain codes are upper case
51 duarte 207 this.ct=ct;
52 duarte 208 this.directed=false;
53 duarte 207 // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
54     // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
55     this.sequence="";
56     //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
57     if (ct.contains("/")){
58     directed=true;
59     }
60    
61     getgraphid(); // initialises graphid, sm_id and chainCode
62     read_graph_from_db(); // gets contacts, nodes and sequence
63    
64     this.obsLength=nodes.size();
65     if (!sequence.equals("")){
66     this.fullLength=sequence.length();
67     } else {
68     // if nodes TreeMap has correct residue numbering then this should get the right full length,
69     // we will only miss: gaps (unobserved residues) at the end of the sequence. Those we can't know unless full sequence is given
70     this.fullLength=Collections.max(nodes.keySet());
71     }
72     this.numContacts=contacts.size();
73     this.modified=false;
74     }
75    
76     /**
77     * Constructs Graph object from graph db, given the dbname, pdbCode, pdbChainCode (classic pdb chain code), ct and cutoff
78     * MySQLConnection is taken from defaults in DbGraph class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
79     * @param dbname
80     * @param pdbCode
81     * @param pdbChainCode
82     * @param cutoff
83     * @param ct
84     * @throws GraphIdNotFoundError
85     * @throws SQLException
86     */
87     public DbGraph(String dbname, String pdbCode, String pdbChainCode, double cutoff, String ct) throws GraphIdNotFoundError, SQLException{
88     this(dbname,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD),pdbCode,pdbChainCode,cutoff,ct);
89     }
90    
91    
92     /**
93     * Constructs Graph object from graph db, given the graphid and passing a MySQLConnection
94     * @param dbname
95     * @param conn
96     * @param graphid
97     * @throws GraphIdNotFoundError
98 duarte 210 * @throws SQLException
99 duarte 207 */
100 duarte 210 public DbGraph(String dbname, MySQLConnection conn, int graphid) throws GraphIdNotFoundError, SQLException {
101 duarte 207 this.dbname=dbname;
102     this.conn=conn;
103     this.graphid=graphid;
104 duarte 210 this.directed=false;
105 duarte 207 // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
106     // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
107     this.sequence="";
108    
109     read_graph_from_db(); // gets contacts, nodes and sequence
110     get_db_graph_info(); // gets pdbCode, pdbChainCode, chainCode, ct and cutoff from db (from graph_id)
111    
112     //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
113     if (ct.contains("/")){
114     directed=true;
115     }
116     this.obsLength=nodes.size();
117     if (!sequence.equals("")){
118     this.fullLength=sequence.length();
119     } else {
120     // if nodes TreeMap has correct residue numbering then this should get the right full length,
121     // we will only miss: gaps (unobserved residues) at the end of the sequence. Those we can't know unless full sequence is given
122     this.fullLength=Collections.max(nodes.keySet());
123     }
124     this.numContacts=contacts.size();
125     this.modified=false;
126     }
127    
128     /**
129     * Constructs Graph object from graph db, given the graphid
130     * MySQLConnection is taken from defaults in DbGraph class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
131     * @param dbname
132     * @param graphid
133     * @throws GraphIdNotFoundError
134     * @throws SQLException
135     */
136     public DbGraph(String dbname, int graphid) throws GraphIdNotFoundError, SQLException{
137     this(dbname,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD), graphid);
138     }
139    
140     /**
141     * Reads contacts and nodes from db.
142     * The db must be a graph db following our standard format, i.e. must have tables:
143     * chain_graph, single_model_graph, single_model_node, single_model_edge
144     * We don't care here about the origin of the data (msdsd, pdbase, predicted) for the generation of the graph as long as it follows our data format
145     * We read both edges and nodes from single_model_edge and single_model_node.
146     * The sequence is set to blank, as we can't get the full sequence from graph db
147     * @param conn
148 duarte 210 * @throws SQLException
149 duarte 207 */
150 duarte 210 private void read_graph_from_db() throws SQLException{
151 duarte 234 contacts = new EdgeSet();
152 duarte 207 nodes = new TreeMap<Integer, String>();
153    
154 duarte 210 // we read only half of the matrix (contacts in one direction only) so that we have the same type of contacts as when creating Graph from Pdb object
155     String sql="SELECT i_num,j_num FROM "+dbname+".single_model_edge WHERE graph_id="+graphid+" AND j_num>i_num ORDER BY i_num,j_num ";
156     Statement stmt = conn.createStatement();
157     ResultSet rsst = stmt.executeQuery(sql);
158     while (rsst.next()) {
159     int i=rsst.getInt(1);
160     int j=rsst.getInt(2);
161 duarte 234 contacts.add(new Edge(i,j));
162 duarte 210 }
163     rsst.close();
164     stmt.close();
165     sql="SELECT num,res FROM "+dbname+".single_model_node WHERE graph_id="+graphid+" ORDER BY num ";
166     stmt = conn.createStatement();
167     rsst = stmt.executeQuery(sql);
168     while (rsst.next()){
169     int num=rsst.getInt(1);
170     String res=rsst.getString(2);
171     nodes.put(num, AA.oneletter2threeletter(res));
172     }
173     rsst.close();
174     stmt.close();
175 duarte 207 }
176    
177 duarte 210 private void getgraphid () throws GraphIdNotFoundError, SQLException{
178 duarte 207 // input is pdbChainCode i.e. pdb chain code
179     // we take chainCode (internal chain identifier, pchain_code for msdsd and asym_id for pdbase) from pchain_code field in chain_graph
180     // (in the chain_graph table the internal chain identifier is called 'pchain_code')
181     int pgraphid=0;
182     String chainstr="='"+pdbChainCode+"' ";
183     if (pdbChainCode.equals("NULL")){
184     chainstr=" IS NULL ";
185     }
186 duarte 210
187     String sql="SELECT graph_id, pchain_code FROM "+dbname+".chain_graph WHERE accession_code='"+pdbCode+"' AND chain_pdb_code"+chainstr+" AND dist="+cutoff;
188     Statement stmt = conn.createStatement();
189     ResultSet rsst = stmt.executeQuery(sql);
190     int check=0;
191     while (rsst.next()) {
192     check++;
193     pgraphid=rsst.getInt(1);
194     chainCode=rsst.getString(2);
195 duarte 207 }
196 duarte 210 if (check!=1){
197     System.err.println("No pgraph_id match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode+", dist="+cutoff);
198     }
199     rsst.close();
200     stmt.close();
201     // we set the ctstr to the same as ct except in ALL case, where it is BB+SC+BB/SC
202     String ctstr=ct;
203     if (ct.equals("ALL")){
204     ctstr="BB+SC+BB/SC";
205     }
206     sql="SELECT graph_id,single_model_id FROM "+dbname+".single_model_graph WHERE pgraph_id="+pgraphid+" AND CT='"+ctstr+"' AND dist="+cutoff+" AND CR='"+DEFAULT_CR+"' AND CW="+DEFAULT_CW;
207     stmt = conn.createStatement();
208     rsst = stmt.executeQuery(sql);
209     check=0;
210     while (rsst.next()){
211     check++;
212     graphid=rsst.getInt(1);
213     //sm_id=rsst.getInt(2); // we might want to use it in the future
214     }
215     if (check!=1){
216 stehr 215 //System.err.println("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
217 duarte 210 throw new GraphIdNotFoundError("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
218     }
219 duarte 207 }
220    
221 duarte 210 private void get_db_graph_info() throws GraphIdNotFoundError, SQLException {
222 duarte 207 int pgraphid=0;
223     String sql="SELECT pgraph_id,CT,dist FROM "+dbname+".single_model_graph WHERE graph_id="+graphid;
224     Statement stmt = conn.createStatement();
225     ResultSet rsst = stmt.executeQuery(sql);
226     int check=0;
227     while (rsst.next()) {
228     check++;
229     pgraphid=rsst.getInt(1);
230     ct=rsst.getString(2);
231     if (ct.equals("BB+SC+BB/SC")) ct="ALL";
232     cutoff=rsst.getDouble(3);
233     }
234     if (check!=1){
235 stehr 215 //System.err.println("No pgraph_id match or more than 1 match for graph_id="+graphid);
236 duarte 207 throw new GraphIdNotFoundError("No pgraph_id match or more than 1 match for graph_id="+graphid+" in db"+conn.getDbname());
237     }
238     rsst.close();
239     stmt.close();
240     sql="SELECT accession_code, chain_pdb_code, pchain_code FROM "+dbname+".chain_graph WHERE graph_id="+pgraphid;
241     stmt = conn.createStatement();
242     rsst = stmt.executeQuery(sql);
243     check=0;
244     while (rsst.next()){
245     check++;
246     pdbCode=rsst.getString(1);
247     pdbChainCode=rsst.getString(2);
248     // java returns a null if the field is a database null, we want actually the "NULL" string in that case
249     if (pdbChainCode==null) pdbChainCode="NULL";
250     chainCode=rsst.getString(3);
251     }
252     if (check!=1){
253     System.err.println("No accession_code+chain_pdb_code+pchain_code match or more than 1 match for graph_id="+pgraphid+" in chain_graph table");
254     }
255     rsst.close();
256     stmt.close();
257     }
258    
259     }