ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 152
Committed: Wed May 16 14:12:50 2007 UTC (17 years, 9 months ago) by duarte
File size: 13751 byte(s)
Log Message:
FIXED 2 BUGS. NEW FEATURE reading graph from db given graph_id
Bugs:
- nodes TreeMap in read_graph_from_file contained one letter res codes, now three letter codes
- in getCM was passing residues as a TreeMap of nums to three letter residue types, now passing one letter residue types
New constructor for getting graph from db given graph_id, new method to go with that get_db_graph_info

Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 134 import java.io.BufferedReader;
3     import java.io.File;
4     import java.io.FileNotFoundException;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 134 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8     import java.io.IOException;
9 duarte 135 import java.sql.ResultSet;
10     import java.sql.SQLException;
11     import java.sql.Statement;
12 duarte 123 import java.util.ArrayList;
13 duarte 129 import java.util.TreeMap;
14 duarte 144 import java.util.regex.Matcher;
15     import java.util.regex.Pattern;
16 duarte 135 import tools.MySQLConnection;
17 duarte 123
18    
19     public class Graph {
20    
21 duarte 135 public final static String MYSQLSERVER="white";
22     public final static String MYSQLUSER=getUserName();
23     public final static String MYSQLPWD="nieve";
24 duarte 144
25     public final static String GRAPHFILEFORMATVERSION = "1.0";
26 duarte 135
27 duarte 123 ArrayList<Contact> contacts;
28 duarte 135 // nodes is a TreeMap of residue serials to residue types (3 letter code)
29 duarte 129 TreeMap<Integer,String> nodes;
30 duarte 145 public String sequence;
31 duarte 143 public String accode;
32     public String chain;
33 duarte 144 public String chaincode=""; // when reading graph from file the field will be filled, otherwise no
34 duarte 146 public double cutoff;
35     public String ct;
36 duarte 135 boolean directed=false;
37 duarte 123
38 duarte 135 // these 2 fields only used when reading from db
39     int graphid=0;
40     int sm_id=0;
41    
42 duarte 134 /**
43     * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
44     * Must also pass contact type, cutoff, accession code and chain
45     * @param contacts
46     * @param nodes
47     * @param sequence
48     * @param cutoff
49     * @param ct
50     * @param accode
51     * @param chain
52     */
53 duarte 129 public Graph (ArrayList<Contact> contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String accode, String chain) {
54 duarte 123 this.contacts=contacts;
55     this.cutoff=cutoff;
56 duarte 129 this.nodes=nodes;
57     this.sequence=sequence;
58     this.accode=accode;
59     this.chain=chain;
60 duarte 123 this.ct=ct;
61 duarte 129 if (ct.contains("/")){
62     directed=true;
63     }
64 duarte 123 }
65 duarte 135
66     /**
67 duarte 141 * Constructs Graph object from graph db, given the dbname, accode, chaincode (classic pdb chain code), ct and cutoff
68 duarte 135 * @param dbname
69     * @param accode
70 duarte 141 * @param chaincode
71 duarte 135 * @param cutoff
72     * @param ct
73     */
74 duarte 141 public Graph(String dbname, String accode, String chaincode, double cutoff, String ct) throws GraphIdNotFoundError{
75 duarte 135 this.cutoff=cutoff;
76     this.accode=accode;
77     this.ct=ct;
78 duarte 142 // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
79     // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
80     this.sequence="";
81 duarte 135 //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
82     if (ct.contains("/")){
83     directed=true;
84     }
85     MySQLConnection conn = new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD,dbname);
86 duarte 141 getgraphid(conn, chaincode); // initialises graphid, sm_id and chain
87     read_graph_from_db(conn); // gets contacts, nodes and sequence
88 duarte 135 conn.close();
89     }
90 duarte 152
91     /**
92     * Constructs Graph object from graph db, given the graphid
93     * @param dbname
94     * @param graphid
95     */
96     public Graph(String dbname,int graphid) throws GraphIdNotFoundError{
97     this.graphid=graphid;
98     // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
99     // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
100     this.sequence="";
101     MySQLConnection conn = new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD,dbname);
102     read_graph_from_db(conn); // gets contacts, nodes and sequence
103     get_db_graph_info(conn); // gets accode, chaincode, chain, ct and cutoff from db (from graph_id)
104     conn.close();
105     //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
106     if (ct.contains("/")){
107     directed=true;
108     }
109     }
110 duarte 129
111 duarte 134 /**
112     * Constructs Graph object by reading a file with contacts
113 duarte 144 * If the contacts file doesn't have the sequence then the graph object won't have sequence or nodes
114     * That means it won't be possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
115 duarte 134 * @param contactsfile
116     * @throws IOException
117     * @throws FileNotFoundException
118     */
119 duarte 144 public Graph (String contactsfile) throws IOException, FileNotFoundException{
120 duarte 142 // we set the sequence to blank when we read from file as we don't have the full sequence
121 duarte 144 // if sequence is present in contactsfile then is read from there
122 duarte 142 this.sequence="";
123 duarte 144 this.ct="";
124     this.cutoff=0.0;
125 duarte 134 if (ct.contains("/")){
126     directed=true;
127     }
128 duarte 144 read_graph_from_file(contactsfile);
129 duarte 134 }
130    
131 duarte 135 //TODO implement (from python) write_graph_to_db, do we really need it here??
132    
133     /** get user name from operating system (for use as database username) */
134     private static String getUserName() {
135     String user = null;
136     user = System.getProperty("user.name");
137     if(user == null) {
138     System.err.println("Could not get user name from operating system. Exiting");
139     System.exit(1);
140     }
141     return user;
142     }
143 duarte 144
144     public void read_graph_from_file (String contactsfile) throws FileNotFoundException, IOException {
145 duarte 134 contacts = new ArrayList<Contact>();
146     System.out.println("Reading contacts from file "+contactsfile);
147     BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
148     String line;
149     while ((line = fcont.readLine() ) != null ) {
150 duarte 144 Pattern p = Pattern.compile("^#");
151     Matcher m = p.matcher(line);
152     if (m.find()){
153     // Pattern ps = Pattern.compile("^#VER: (\\d\\.\\d)");
154     // Matcher ms = ps.matcher(line);
155     // if (ms.find()){
156     // if (!ms.group(1).equals(GRAPHFILEFORMATVERSION)){
157     // throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format version");
158     // }
159     // }
160     Pattern ps = Pattern.compile("^#SEQUENCE:\\s*(\\w+)$");
161     Matcher ms = ps.matcher(line);
162     if (ms.find()){
163     sequence=ms.group(1);
164     }
165     ps = Pattern.compile("^#PDB:\\s*(\\w+)");
166     ms = ps.matcher(line);
167     if (ms.find()){
168     accode=ms.group(1);
169     }
170     ps = Pattern.compile("^#PDB CHAIN CODE:\\s*(\\w)");
171     ms = ps.matcher(line);
172     if (ms.find()){
173     chaincode=ms.group(1);
174     }
175     ps = Pattern.compile("^#CHAIN:\\s*(\\w)");
176     ms = ps.matcher(line);
177     if (ms.find()){
178     chain=ms.group(1);
179     }
180     ps = Pattern.compile("^#CT:\\s*([a-zA-Z/]+)");
181     ms = ps.matcher(line);
182     if (ms.find()){
183     ct=ms.group(1);
184     }
185     ps = Pattern.compile("^#CUTOFF:\\s*(\\d+\\.\\d+)");
186     ms = ps.matcher(line);
187     if (ms.find()){
188     cutoff=Double.parseDouble(ms.group(1));
189     }
190     }
191     else{
192     int i = Integer.parseInt(line.split("\\s+")[0]);
193     int j = Integer.parseInt(line.split("\\s+")[1]);
194     contacts.add(new Contact(i,j));
195     }
196 duarte 134 }
197     fcont.close();
198 duarte 152 // if sequence was given we take nodes from it
199 duarte 144 nodes = new TreeMap<Integer, String>();
200     for (int i=0;i<sequence.length();i++){
201     String letter = String.valueOf(sequence.charAt(i));
202 duarte 152 nodes.put(i+1, AA.oneletter2threeletter(letter));
203 duarte 144 }
204    
205 duarte 134 }
206    
207 duarte 135 /**
208     * Reads contacts and nodes from db.
209     * The db must be a graph db following our standard format, i.e. must have tables:
210     * chain_graph, single_model_graph, single_model_node, single_model_edge
211     * We don't care here about the origin of the data (msdsd, pdbase, predicted) for the generation of the graph as long as it follows our data format
212     * We read both edges and nodes from single_model_edge and single_model_node.
213 duarte 152 * The sequence is set to blank, as we can't get the full sequence from graph db
214 duarte 135 * @param conn
215     */
216     public void read_graph_from_db(MySQLConnection conn){
217     contacts = new ArrayList<Contact>();
218     nodes = new TreeMap<Integer, String>();
219     try {
220 duarte 142 // we read only half of the matrix (contacts in one direction only) so that we have the same type of contacts as when creating Graph from Pdb object
221 duarte 135 String sql="SELECT i_num,j_num FROM single_model_edge WHERE graph_id="+graphid+" AND j_num>i_num ORDER BY i_num,j_num ";
222     Statement stmt = conn.createStatement();
223     ResultSet rsst = stmt.executeQuery(sql);
224     while (rsst.next()) {
225     int i=rsst.getInt(1);
226     int j=rsst.getInt(2);
227     contacts.add(new Contact(i,j));
228     }
229     rsst.close();
230     stmt.close();
231     sql="SELECT num,res FROM single_model_node WHERE graph_id="+graphid+" ORDER BY num ";
232     stmt = conn.createStatement();
233     rsst = stmt.executeQuery(sql);
234     while (rsst.next()){
235     int num=rsst.getInt(1);
236     String res=rsst.getString(2);
237     nodes.put(num, AA.oneletter2threeletter(res));
238     }
239     rsst.close();
240     stmt.close();
241     } catch (SQLException e) {
242     e.printStackTrace();
243     }
244    
245     }
246    
247 duarte 141 public void getgraphid (MySQLConnection conn, String chaincode) throws GraphIdNotFoundError{
248     // input is chaincode i.e. pdb chain code
249     // we take chain (internal chain identifier, pchain_code for msdsd and asym_id for pdbase) from pchain_code field in chain_graph
250     // (in the chain_graph table the internal chain identifier is called 'pchain_code')
251 duarte 135 int pgraphid=0;
252 duarte 141 String chainstr="='"+chaincode+"' ";
253     if (chaincode.equals("NULL")){
254     chainstr=" IS NULL ";
255     }
256 duarte 135 try {
257 duarte 141 String sql="SELECT graph_id, pchain_code FROM chain_graph WHERE accession_code='"+accode+"' AND chain_pdb_code"+chainstr+" AND dist="+cutoff;
258 duarte 135 Statement stmt = conn.createStatement();
259     ResultSet rsst = stmt.executeQuery(sql);
260     int check=0;
261     while (rsst.next()) {
262     check++;
263     pgraphid=rsst.getInt(1);
264 duarte 141 chain=rsst.getString(2);
265 duarte 135 }
266     if (check!=1){
267 duarte 141 System.err.println("No pgraph_id match or more than 1 match for accession_code="+accode+", chain_pdb_code="+chaincode+", dist="+cutoff);
268 duarte 135 }
269     rsst.close();
270     stmt.close();
271     // we set the ctstr to the same as ct except in ALL case, where it is BB+SC+BB/SC
272     String ctstr=ct;
273     if (ct.equals("ALL")){
274     ctstr="BB+SC+BB/SC";
275     }
276     sql="SELECT graph_id,single_model_id FROM single_model_graph WHERE pgraph_id="+pgraphid+" AND CT='"+ctstr+"' AND dist="+cutoff+" AND CR='(true)' AND CW=1";
277     stmt = conn.createStatement();
278     rsst = stmt.executeQuery(sql);
279     check=0;
280     while (rsst.next()){
281     check++;
282     graphid=rsst.getInt(1);
283     sm_id=rsst.getInt(2);
284     }
285     if (check!=1){
286     System.err.println("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
287     throw new GraphIdNotFoundError("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
288     }
289     } catch (SQLException e) {
290     e.printStackTrace();
291     }
292    
293     }
294    
295 duarte 152 public void get_db_graph_info(MySQLConnection conn) throws GraphIdNotFoundError {
296     try {
297     int pgraphid=0;
298     String sql="SELECT pgraph_id,CT,dist FROM single_model_graph WHERE graph_id="+graphid;
299     Statement stmt = conn.createStatement();
300     ResultSet rsst = stmt.executeQuery(sql);
301     int check=0;
302     while (rsst.next()) {
303     check++;
304     pgraphid=rsst.getInt(1);
305     ct=rsst.getString(2);
306     if (ct.equals("BB+SC+BB/SC")) ct="ALL";
307     cutoff=rsst.getDouble(3);
308     }
309     if (check!=1){
310     System.err.println("No pgraph_id match or more than 1 match for graph_id="+graphid);
311     throw new GraphIdNotFoundError("No pgraph_id match or more than 1 match for graph_id="+graphid+" in db"+conn.getDbname());
312     }
313     rsst.close();
314     stmt.close();
315     sql="SELECT accession_code, chain_pdb_code, pchain_code FROM chain_graph WHERE graph_id="+pgraphid;
316     stmt = conn.createStatement();
317     rsst = stmt.executeQuery(sql);
318     check=0;
319     while (rsst.next()){
320     check++;
321     accode=rsst.getString(1);
322     chaincode=rsst.getString(2);
323     chain=rsst.getString(3);
324     }
325     if (check!=1){
326     System.err.println("No accession_code+chain_pdb_code+pchain_code match or more than 1 match for graph_id="+pgraphid+" in chain_graph table");
327     }
328     rsst.close();
329     stmt.close();
330     } catch (SQLException e) {
331     e.printStackTrace();
332     }
333    
334     }
335    
336 duarte 123 public void write_contacts_to_file (String outfile) throws IOException {
337     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
338     for (Contact pair:contacts){
339     int i_resser=pair.i;
340     int j_resser=pair.j;
341     Out.println(i_resser+"\t"+j_resser);
342     }
343     Out.close();
344     }
345 duarte 144
346     public void write_graph_to_file (String outfile) throws IOException {
347     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
348     Out.println("#VER: "+GRAPHFILEFORMATVERSION);
349     Out.println("#SEQUENCE: "+sequence);
350     Out.println("#PDB: "+accode);
351     Out.println("#PDB CHAIN CODE: "+chaincode);
352     Out.println("#CHAIN: "+chain);
353     Out.println("#CT: "+ct);
354     Out.println("#CUTOFF: "+cutoff);
355     for (Contact pair:contacts){
356     int i_resser=pair.i;
357     int j_resser=pair.j;
358     Out.println(i_resser+"\t"+j_resser);
359     }
360     Out.close();
361     }
362    
363 duarte 129 public ContactMap getCM() {
364 duarte 152 // residues is the map from residue nums to residue types used in ContactMap class,
365     // i.e. it is the same as Pdb.resser2restype or Graph.nodes, BUT!!! residues has one letter residue codes as opposed to Pdb.resser2restype or Graph.nodes!!
366 duarte 129 TreeMap<Integer,String> residues = new TreeMap<Integer,String>();
367     // we copy residues from nodes (deep copy)
368     for (int node:nodes.keySet()){
369 duarte 152 residues.put(node, AA.threeletter2oneletter(nodes.get(node)));
370 duarte 129 }
371     // check if we are in directed or undirected case. If undirected we fill the opposite contacts to pass a full list of contacts to ContactMap (which contains full matrix)
372     ArrayList<Contact> contacts2pass = new ArrayList<Contact>();
373     if (directed){
374     contacts2pass=contacts;
375     } else {
376     for (Contact cont:contacts){
377     int i_resser = cont.i;
378     int j_resser = cont.j;
379     contacts2pass.add(new Contact(i_resser,j_resser));
380     contacts2pass.add(new Contact(j_resser,i_resser));
381     }
382     }
383     // construct the ContactMap object and return it
384     ContactMap cm = new ContactMap(contacts2pass,residues,sequence);
385     return cm;
386    
387     }
388    
389 duarte 123 }