ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 156
Committed: Tue May 22 10:40:04 2007 UTC (17 years, 4 months ago) by duarte
File size: 13955 byte(s)
Log Message:
Now passing also chaincode from Pdb object to Graph object in get_graph
Line File contents
1 package proteinstructure;
2 import java.io.BufferedReader;
3 import java.io.File;
4 import java.io.FileNotFoundException;
5 import java.io.FileOutputStream;
6 import java.io.FileReader;
7 import java.io.PrintStream;
8 import java.io.IOException;
9 import java.sql.ResultSet;
10 import java.sql.SQLException;
11 import java.sql.Statement;
12 import java.util.ArrayList;
13 import java.util.TreeMap;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
16 import tools.MySQLConnection;
17
18
19 public class Graph {
20
21 public final static String MYSQLSERVER="white";
22 public final static String MYSQLUSER=getUserName();
23 public final static String MYSQLPWD="nieve";
24
25 public final static String GRAPHFILEFORMATVERSION = "1.0";
26
27 ArrayList<Contact> contacts;
28 // nodes is a TreeMap of residue serials to residue types (3 letter code)
29 TreeMap<Integer,String> nodes;
30 public String sequence;
31 public String accode;
32 public String chain;
33 public String chaincode=""; // when reading graph from file the field will be filled, otherwise no
34 public double cutoff;
35 public String ct;
36 boolean directed=false;
37
38 // these 2 fields only used when reading from db
39 int graphid=0;
40 int sm_id=0;
41
42 /**
43 * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
44 * Must also pass contact type, cutoff, accession code and chain
45 * @param contacts
46 * @param nodes
47 * @param sequence
48 * @param cutoff
49 * @param ct
50 * @param accode
51 * @param chain
52 */
53 public Graph (ArrayList<Contact> contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String accode, String chain, String chaincode) {
54 this.contacts=contacts;
55 this.cutoff=cutoff;
56 this.nodes=nodes;
57 this.sequence=sequence;
58 this.accode=accode;
59 this.chain=chain;
60 this.chaincode=chaincode;
61 this.ct=ct;
62 if (ct.contains("/")){
63 directed=true;
64 }
65 }
66
67 /**
68 * Constructs Graph object from graph db, given the dbname, accode, chaincode (classic pdb chain code), ct and cutoff
69 * @param dbname
70 * @param accode
71 * @param chaincode
72 * @param cutoff
73 * @param ct
74 */
75 public Graph(String dbname, String accode, String chaincode, double cutoff, String ct) throws GraphIdNotFoundError{
76 this.cutoff=cutoff;
77 this.accode=accode;
78 this.ct=ct;
79 // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
80 // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
81 this.sequence="";
82 //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
83 if (ct.contains("/")){
84 directed=true;
85 }
86 MySQLConnection conn = new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD,dbname);
87 getgraphid(conn, chaincode); // initialises graphid, sm_id and chain
88 read_graph_from_db(conn); // gets contacts, nodes and sequence
89 conn.close();
90 }
91
92 /**
93 * Constructs Graph object from graph db, given the graphid
94 * @param dbname
95 * @param graphid
96 */
97 public Graph(String dbname,int graphid) throws GraphIdNotFoundError{
98 this.graphid=graphid;
99 // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
100 // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
101 this.sequence="";
102 MySQLConnection conn = new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD,dbname);
103 read_graph_from_db(conn); // gets contacts, nodes and sequence
104 get_db_graph_info(conn); // gets accode, chaincode, chain, ct and cutoff from db (from graph_id)
105 conn.close();
106 //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
107 if (ct.contains("/")){
108 directed=true;
109 }
110 }
111
112 /**
113 * Constructs Graph object by reading a file with contacts
114 * If the contacts file doesn't have the sequence then the graph object won't have sequence or nodes
115 * That means it won't be possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
116 * @param contactsfile
117 * @throws IOException
118 * @throws FileNotFoundException
119 */
120 public Graph (String contactsfile) throws IOException, FileNotFoundException{
121 // we set the sequence to blank when we read from file as we don't have the full sequence
122 // if sequence is present in contactsfile then is read from there
123 this.sequence="";
124 this.ct="";
125 this.cutoff=0.0;
126 // we initialise accode, chain and chaincode to empty strings in case the file doesn't specify then
127 this.accode="";
128 this.chain="";
129 this.chaincode="";
130 if (ct.contains("/")){
131 directed=true;
132 }
133 read_graph_from_file(contactsfile);
134 }
135
136 //TODO implement (from python) write_graph_to_db, do we really need it here??
137
138 /** get user name from operating system (for use as database username) */
139 private static String getUserName() {
140 String user = null;
141 user = System.getProperty("user.name");
142 if(user == null) {
143 System.err.println("Could not get user name from operating system. Exiting");
144 System.exit(1);
145 }
146 return user;
147 }
148
149 public void read_graph_from_file (String contactsfile) throws FileNotFoundException, IOException {
150 contacts = new ArrayList<Contact>();
151 System.out.println("Reading contacts from file "+contactsfile);
152 BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
153 String line;
154 while ((line = fcont.readLine() ) != null ) {
155 Pattern p = Pattern.compile("^#");
156 Matcher m = p.matcher(line);
157 if (m.find()){
158 // Pattern ps = Pattern.compile("^#VER: (\\d\\.\\d)");
159 // Matcher ms = ps.matcher(line);
160 // if (ms.find()){
161 // if (!ms.group(1).equals(GRAPHFILEFORMATVERSION)){
162 // throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format version");
163 // }
164 // }
165 Pattern ps = Pattern.compile("^#SEQUENCE:\\s*(\\w+)$");
166 Matcher ms = ps.matcher(line);
167 if (ms.find()){
168 sequence=ms.group(1);
169 }
170 ps = Pattern.compile("^#PDB:\\s*(\\w+)");
171 ms = ps.matcher(line);
172 if (ms.find()){
173 accode=ms.group(1);
174 }
175 ps = Pattern.compile("^#PDB CHAIN CODE:\\s*(\\w)");
176 ms = ps.matcher(line);
177 if (ms.find()){
178 chaincode=ms.group(1);
179 }
180 ps = Pattern.compile("^#CHAIN:\\s*(\\w)");
181 ms = ps.matcher(line);
182 if (ms.find()){
183 chain=ms.group(1);
184 }
185 ps = Pattern.compile("^#CT:\\s*([a-zA-Z/]+)");
186 ms = ps.matcher(line);
187 if (ms.find()){
188 ct=ms.group(1);
189 }
190 ps = Pattern.compile("^#CUTOFF:\\s*(\\d+\\.\\d+)");
191 ms = ps.matcher(line);
192 if (ms.find()){
193 cutoff=Double.parseDouble(ms.group(1));
194 }
195 }
196 else{
197 int i = Integer.parseInt(line.split("\\s+")[0]);
198 int j = Integer.parseInt(line.split("\\s+")[1]);
199 contacts.add(new Contact(i,j));
200 }
201 }
202 fcont.close();
203 // if sequence was given we take nodes from it
204 nodes = new TreeMap<Integer, String>();
205 for (int i=0;i<sequence.length();i++){
206 String letter = String.valueOf(sequence.charAt(i));
207 nodes.put(i+1, AA.oneletter2threeletter(letter));
208 }
209
210 }
211
212 /**
213 * Reads contacts and nodes from db.
214 * The db must be a graph db following our standard format, i.e. must have tables:
215 * chain_graph, single_model_graph, single_model_node, single_model_edge
216 * We don't care here about the origin of the data (msdsd, pdbase, predicted) for the generation of the graph as long as it follows our data format
217 * We read both edges and nodes from single_model_edge and single_model_node.
218 * The sequence is set to blank, as we can't get the full sequence from graph db
219 * @param conn
220 */
221 public void read_graph_from_db(MySQLConnection conn){
222 contacts = new ArrayList<Contact>();
223 nodes = new TreeMap<Integer, String>();
224 try {
225 // we read only half of the matrix (contacts in one direction only) so that we have the same type of contacts as when creating Graph from Pdb object
226 String sql="SELECT i_num,j_num FROM single_model_edge WHERE graph_id="+graphid+" AND j_num>i_num ORDER BY i_num,j_num ";
227 Statement stmt = conn.createStatement();
228 ResultSet rsst = stmt.executeQuery(sql);
229 while (rsst.next()) {
230 int i=rsst.getInt(1);
231 int j=rsst.getInt(2);
232 contacts.add(new Contact(i,j));
233 }
234 rsst.close();
235 stmt.close();
236 sql="SELECT num,res FROM single_model_node WHERE graph_id="+graphid+" ORDER BY num ";
237 stmt = conn.createStatement();
238 rsst = stmt.executeQuery(sql);
239 while (rsst.next()){
240 int num=rsst.getInt(1);
241 String res=rsst.getString(2);
242 nodes.put(num, AA.oneletter2threeletter(res));
243 }
244 rsst.close();
245 stmt.close();
246 } catch (SQLException e) {
247 e.printStackTrace();
248 }
249
250 }
251
252 public void getgraphid (MySQLConnection conn, String chaincode) throws GraphIdNotFoundError{
253 // input is chaincode i.e. pdb chain code
254 // we take chain (internal chain identifier, pchain_code for msdsd and asym_id for pdbase) from pchain_code field in chain_graph
255 // (in the chain_graph table the internal chain identifier is called 'pchain_code')
256 int pgraphid=0;
257 String chainstr="='"+chaincode+"' ";
258 if (chaincode.equals("NULL")){
259 chainstr=" IS NULL ";
260 }
261 try {
262 String sql="SELECT graph_id, pchain_code FROM chain_graph WHERE accession_code='"+accode+"' AND chain_pdb_code"+chainstr+" AND dist="+cutoff;
263 Statement stmt = conn.createStatement();
264 ResultSet rsst = stmt.executeQuery(sql);
265 int check=0;
266 while (rsst.next()) {
267 check++;
268 pgraphid=rsst.getInt(1);
269 chain=rsst.getString(2);
270 }
271 if (check!=1){
272 System.err.println("No pgraph_id match or more than 1 match for accession_code="+accode+", chain_pdb_code="+chaincode+", dist="+cutoff);
273 }
274 rsst.close();
275 stmt.close();
276 // we set the ctstr to the same as ct except in ALL case, where it is BB+SC+BB/SC
277 String ctstr=ct;
278 if (ct.equals("ALL")){
279 ctstr="BB+SC+BB/SC";
280 }
281 sql="SELECT graph_id,single_model_id FROM single_model_graph WHERE pgraph_id="+pgraphid+" AND CT='"+ctstr+"' AND dist="+cutoff+" AND CR='(true)' AND CW=1";
282 stmt = conn.createStatement();
283 rsst = stmt.executeQuery(sql);
284 check=0;
285 while (rsst.next()){
286 check++;
287 graphid=rsst.getInt(1);
288 sm_id=rsst.getInt(2);
289 }
290 if (check!=1){
291 System.err.println("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
292 throw new GraphIdNotFoundError("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
293 }
294 } catch (SQLException e) {
295 e.printStackTrace();
296 }
297
298 }
299
300 public void get_db_graph_info(MySQLConnection conn) throws GraphIdNotFoundError {
301 try {
302 int pgraphid=0;
303 String sql="SELECT pgraph_id,CT,dist FROM single_model_graph WHERE graph_id="+graphid;
304 Statement stmt = conn.createStatement();
305 ResultSet rsst = stmt.executeQuery(sql);
306 int check=0;
307 while (rsst.next()) {
308 check++;
309 pgraphid=rsst.getInt(1);
310 ct=rsst.getString(2);
311 if (ct.equals("BB+SC+BB/SC")) ct="ALL";
312 cutoff=rsst.getDouble(3);
313 }
314 if (check!=1){
315 System.err.println("No pgraph_id match or more than 1 match for graph_id="+graphid);
316 throw new GraphIdNotFoundError("No pgraph_id match or more than 1 match for graph_id="+graphid+" in db"+conn.getDbname());
317 }
318 rsst.close();
319 stmt.close();
320 sql="SELECT accession_code, chain_pdb_code, pchain_code FROM chain_graph WHERE graph_id="+pgraphid;
321 stmt = conn.createStatement();
322 rsst = stmt.executeQuery(sql);
323 check=0;
324 while (rsst.next()){
325 check++;
326 accode=rsst.getString(1);
327 chaincode=rsst.getString(2);
328 chain=rsst.getString(3);
329 }
330 if (check!=1){
331 System.err.println("No accession_code+chain_pdb_code+pchain_code match or more than 1 match for graph_id="+pgraphid+" in chain_graph table");
332 }
333 rsst.close();
334 stmt.close();
335 } catch (SQLException e) {
336 e.printStackTrace();
337 }
338
339 }
340
341 public void write_contacts_to_file (String outfile) throws IOException {
342 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
343 for (Contact pair:contacts){
344 int i_resser=pair.i;
345 int j_resser=pair.j;
346 Out.println(i_resser+"\t"+j_resser);
347 }
348 Out.close();
349 }
350
351 public void write_graph_to_file (String outfile) throws IOException {
352 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
353 Out.println("#VER: "+GRAPHFILEFORMATVERSION);
354 Out.println("#SEQUENCE: "+sequence);
355 Out.println("#PDB: "+accode);
356 Out.println("#PDB CHAIN CODE: "+chaincode);
357 Out.println("#CHAIN: "+chain);
358 Out.println("#CT: "+ct);
359 Out.println("#CUTOFF: "+cutoff);
360 for (Contact pair:contacts){
361 int i_resser=pair.i;
362 int j_resser=pair.j;
363 Out.println(i_resser+"\t"+j_resser);
364 }
365 Out.close();
366 }
367
368 public ContactMap getCM() {
369 // residues is the map from residue nums to residue types used in ContactMap class,
370 // i.e. it is the same as Pdb.resser2restype or Graph.nodes, BUT!!! residues has one letter residue codes as opposed to Pdb.resser2restype or Graph.nodes!!
371 TreeMap<Integer,String> residues = new TreeMap<Integer,String>();
372 // we copy residues from nodes (deep copy)
373 for (int node:nodes.keySet()){
374 residues.put(node, AA.threeletter2oneletter(nodes.get(node)));
375 }
376 // check if we are in directed or undirected case. If undirected we fill the opposite contacts to pass a full list of contacts to ContactMap (which contains full matrix)
377 ArrayList<Contact> contacts2pass = new ArrayList<Contact>();
378 if (directed){
379 contacts2pass=contacts;
380 } else {
381 for (Contact cont:contacts){
382 int i_resser = cont.i;
383 int j_resser = cont.j;
384 contacts2pass.add(new Contact(i_resser,j_resser));
385 contacts2pass.add(new Contact(j_resser,i_resser));
386 }
387 }
388 // construct the ContactMap object and return it
389 ContactMap cm = new ContactMap(contacts2pass,residues,sequence);
390 return cm;
391
392 }
393
394 }