ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 146
Committed: Tue May 15 13:51:17 2007 UTC (17 years, 9 months ago) by duarte
File size: 11310 byte(s)
Log Message:
Made cutoff and ct public
Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 134 import java.io.BufferedReader;
3     import java.io.File;
4     import java.io.FileNotFoundException;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 134 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8     import java.io.IOException;
9 duarte 135 import java.sql.ResultSet;
10     import java.sql.SQLException;
11     import java.sql.Statement;
12 duarte 123 import java.util.ArrayList;
13 duarte 129 import java.util.TreeMap;
14 duarte 144 import java.util.regex.Matcher;
15     import java.util.regex.Pattern;
16 duarte 135 import tools.MySQLConnection;
17 duarte 123
18    
19     public class Graph {
20    
21 duarte 135 public final static String MYSQLSERVER="white";
22     public final static String MYSQLUSER=getUserName();
23     public final static String MYSQLPWD="nieve";
24 duarte 144
25     public final static String GRAPHFILEFORMATVERSION = "1.0";
26 duarte 135
27 duarte 123 ArrayList<Contact> contacts;
28 duarte 135 // nodes is a TreeMap of residue serials to residue types (3 letter code)
29 duarte 129 TreeMap<Integer,String> nodes;
30 duarte 145 public String sequence;
31 duarte 143 public String accode;
32     public String chain;
33 duarte 144 public String chaincode=""; // when reading graph from file the field will be filled, otherwise no
34 duarte 146 public double cutoff;
35     public String ct;
36 duarte 135 boolean directed=false;
37 duarte 123
38 duarte 135 // these 2 fields only used when reading from db
39     int graphid=0;
40     int sm_id=0;
41    
42 duarte 134 /**
43     * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
44     * Must also pass contact type, cutoff, accession code and chain
45     * @param contacts
46     * @param nodes
47     * @param sequence
48     * @param cutoff
49     * @param ct
50     * @param accode
51     * @param chain
52     */
53 duarte 129 public Graph (ArrayList<Contact> contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String accode, String chain) {
54 duarte 123 this.contacts=contacts;
55     this.cutoff=cutoff;
56 duarte 129 this.nodes=nodes;
57     this.sequence=sequence;
58     this.accode=accode;
59     this.chain=chain;
60 duarte 123 this.ct=ct;
61 duarte 129 if (ct.contains("/")){
62     directed=true;
63     }
64 duarte 123 }
65 duarte 135
66     /**
67 duarte 141 * Constructs Graph object from graph db, given the dbname, accode, chaincode (classic pdb chain code), ct and cutoff
68 duarte 135 * @param dbname
69     * @param accode
70 duarte 141 * @param chaincode
71 duarte 135 * @param cutoff
72     * @param ct
73     */
74 duarte 141 public Graph(String dbname, String accode, String chaincode, double cutoff, String ct) throws GraphIdNotFoundError{
75 duarte 135 this.cutoff=cutoff;
76     this.accode=accode;
77     this.ct=ct;
78 duarte 142 // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
79     // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
80     this.sequence="";
81 duarte 135 //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
82     if (ct.contains("/")){
83     directed=true;
84     }
85     MySQLConnection conn = new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD,dbname);
86 duarte 141 getgraphid(conn, chaincode); // initialises graphid, sm_id and chain
87     read_graph_from_db(conn); // gets contacts, nodes and sequence
88 duarte 135 conn.close();
89     }
90 duarte 129
91 duarte 134 /**
92     * Constructs Graph object by reading a file with contacts
93 duarte 144 * If the contacts file doesn't have the sequence then the graph object won't have sequence or nodes
94     * That means it won't be possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
95 duarte 134 * @param contactsfile
96     * @throws IOException
97     * @throws FileNotFoundException
98     */
99 duarte 144 public Graph (String contactsfile) throws IOException, FileNotFoundException{
100 duarte 142 // we set the sequence to blank when we read from file as we don't have the full sequence
101 duarte 144 // if sequence is present in contactsfile then is read from there
102 duarte 142 this.sequence="";
103 duarte 144 this.ct="";
104     this.cutoff=0.0;
105 duarte 134 if (ct.contains("/")){
106     directed=true;
107     }
108 duarte 144 read_graph_from_file(contactsfile);
109 duarte 134 }
110    
111 duarte 135 //TODO implement (from python) write_graph_to_db, do we really need it here??
112    
113     /** get user name from operating system (for use as database username) */
114     private static String getUserName() {
115     String user = null;
116     user = System.getProperty("user.name");
117     if(user == null) {
118     System.err.println("Could not get user name from operating system. Exiting");
119     System.exit(1);
120     }
121     return user;
122     }
123 duarte 144
124     public void read_graph_from_file (String contactsfile) throws FileNotFoundException, IOException {
125 duarte 134 contacts = new ArrayList<Contact>();
126     System.out.println("Reading contacts from file "+contactsfile);
127     BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
128     String line;
129     while ((line = fcont.readLine() ) != null ) {
130 duarte 144 Pattern p = Pattern.compile("^#");
131     Matcher m = p.matcher(line);
132     if (m.find()){
133     // Pattern ps = Pattern.compile("^#VER: (\\d\\.\\d)");
134     // Matcher ms = ps.matcher(line);
135     // if (ms.find()){
136     // if (!ms.group(1).equals(GRAPHFILEFORMATVERSION)){
137     // throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format version");
138     // }
139     // }
140     Pattern ps = Pattern.compile("^#SEQUENCE:\\s*(\\w+)$");
141     Matcher ms = ps.matcher(line);
142     if (ms.find()){
143     sequence=ms.group(1);
144     }
145     ps = Pattern.compile("^#PDB:\\s*(\\w+)");
146     ms = ps.matcher(line);
147     if (ms.find()){
148     accode=ms.group(1);
149     }
150     ps = Pattern.compile("^#PDB CHAIN CODE:\\s*(\\w)");
151     ms = ps.matcher(line);
152     if (ms.find()){
153     chaincode=ms.group(1);
154     }
155     ps = Pattern.compile("^#CHAIN:\\s*(\\w)");
156     ms = ps.matcher(line);
157     if (ms.find()){
158     chain=ms.group(1);
159     }
160     ps = Pattern.compile("^#CT:\\s*([a-zA-Z/]+)");
161     ms = ps.matcher(line);
162     if (ms.find()){
163     ct=ms.group(1);
164     }
165     ps = Pattern.compile("^#CUTOFF:\\s*(\\d+\\.\\d+)");
166     ms = ps.matcher(line);
167     if (ms.find()){
168     cutoff=Double.parseDouble(ms.group(1));
169     }
170     }
171     else{
172     int i = Integer.parseInt(line.split("\\s+")[0]);
173     int j = Integer.parseInt(line.split("\\s+")[1]);
174     contacts.add(new Contact(i,j));
175     }
176 duarte 134 }
177     fcont.close();
178 duarte 144 nodes = new TreeMap<Integer, String>();
179     for (int i=0;i<sequence.length();i++){
180     String letter = String.valueOf(sequence.charAt(i));
181     nodes.put(i+1, letter);
182     }
183    
184 duarte 134 }
185    
186 duarte 135 /**
187     * Reads contacts and nodes from db.
188     * The db must be a graph db following our standard format, i.e. must have tables:
189     * chain_graph, single_model_graph, single_model_node, single_model_edge
190     * We don't care here about the origin of the data (msdsd, pdbase, predicted) for the generation of the graph as long as it follows our data format
191     * We read both edges and nodes from single_model_edge and single_model_node.
192     * The sequence is taken from nodes, thus it won't have unobserved or non standard aas.
193     * @param conn
194     */
195     public void read_graph_from_db(MySQLConnection conn){
196     contacts = new ArrayList<Contact>();
197     nodes = new TreeMap<Integer, String>();
198     try {
199 duarte 142 // we read only half of the matrix (contacts in one direction only) so that we have the same type of contacts as when creating Graph from Pdb object
200 duarte 135 String sql="SELECT i_num,j_num FROM single_model_edge WHERE graph_id="+graphid+" AND j_num>i_num ORDER BY i_num,j_num ";
201     Statement stmt = conn.createStatement();
202     ResultSet rsst = stmt.executeQuery(sql);
203     while (rsst.next()) {
204     int i=rsst.getInt(1);
205     int j=rsst.getInt(2);
206     contacts.add(new Contact(i,j));
207     }
208     rsst.close();
209     stmt.close();
210     sql="SELECT num,res FROM single_model_node WHERE graph_id="+graphid+" ORDER BY num ";
211     stmt = conn.createStatement();
212     rsst = stmt.executeQuery(sql);
213     while (rsst.next()){
214     int num=rsst.getInt(1);
215     String res=rsst.getString(2);
216     nodes.put(num, AA.oneletter2threeletter(res));
217     }
218     rsst.close();
219     stmt.close();
220     } catch (SQLException e) {
221     e.printStackTrace();
222     }
223    
224     }
225    
226 duarte 141 public void getgraphid (MySQLConnection conn, String chaincode) throws GraphIdNotFoundError{
227     // input is chaincode i.e. pdb chain code
228     // we take chain (internal chain identifier, pchain_code for msdsd and asym_id for pdbase) from pchain_code field in chain_graph
229     // (in the chain_graph table the internal chain identifier is called 'pchain_code')
230 duarte 135 int pgraphid=0;
231 duarte 141 String chainstr="='"+chaincode+"' ";
232     if (chaincode.equals("NULL")){
233     chainstr=" IS NULL ";
234     }
235 duarte 135 try {
236 duarte 141 String sql="SELECT graph_id, pchain_code FROM chain_graph WHERE accession_code='"+accode+"' AND chain_pdb_code"+chainstr+" AND dist="+cutoff;
237 duarte 135 Statement stmt = conn.createStatement();
238     ResultSet rsst = stmt.executeQuery(sql);
239     int check=0;
240     while (rsst.next()) {
241     check++;
242     pgraphid=rsst.getInt(1);
243 duarte 141 chain=rsst.getString(2);
244 duarte 135 }
245     if (check!=1){
246 duarte 141 System.err.println("No pgraph_id match or more than 1 match for accession_code="+accode+", chain_pdb_code="+chaincode+", dist="+cutoff);
247 duarte 135 }
248     rsst.close();
249     stmt.close();
250     // we set the ctstr to the same as ct except in ALL case, where it is BB+SC+BB/SC
251     String ctstr=ct;
252     if (ct.equals("ALL")){
253     ctstr="BB+SC+BB/SC";
254     }
255     sql="SELECT graph_id,single_model_id FROM single_model_graph WHERE pgraph_id="+pgraphid+" AND CT='"+ctstr+"' AND dist="+cutoff+" AND CR='(true)' AND CW=1";
256     stmt = conn.createStatement();
257     rsst = stmt.executeQuery(sql);
258     check=0;
259     while (rsst.next()){
260     check++;
261     graphid=rsst.getInt(1);
262     sm_id=rsst.getInt(2);
263     }
264     if (check!=1){
265     System.err.println("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
266     throw new GraphIdNotFoundError("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
267     }
268     } catch (SQLException e) {
269     e.printStackTrace();
270     }
271    
272     }
273    
274 duarte 123 public void write_contacts_to_file (String outfile) throws IOException {
275     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
276     for (Contact pair:contacts){
277     int i_resser=pair.i;
278     int j_resser=pair.j;
279     Out.println(i_resser+"\t"+j_resser);
280     }
281     Out.close();
282     }
283 duarte 144
284     public void write_graph_to_file (String outfile) throws IOException {
285     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
286     Out.println("#VER: "+GRAPHFILEFORMATVERSION);
287     Out.println("#SEQUENCE: "+sequence);
288     Out.println("#PDB: "+accode);
289     Out.println("#PDB CHAIN CODE: "+chaincode);
290     Out.println("#CHAIN: "+chain);
291     Out.println("#CT: "+ct);
292     Out.println("#CUTOFF: "+cutoff);
293     for (Contact pair:contacts){
294     int i_resser=pair.i;
295     int j_resser=pair.j;
296     Out.println(i_resser+"\t"+j_resser);
297     }
298     Out.close();
299     }
300    
301 duarte 129 public ContactMap getCM() {
302     // residues is the map from residue nums to residue types used in ContactMap class, i.e. it is the same as Pdb.resser2restype or Graph.nodes
303     TreeMap<Integer,String> residues = new TreeMap<Integer,String>();
304     // we copy residues from nodes (deep copy)
305     for (int node:nodes.keySet()){
306     residues.put(node, nodes.get(node));
307     }
308     // check if we are in directed or undirected case. If undirected we fill the opposite contacts to pass a full list of contacts to ContactMap (which contains full matrix)
309     ArrayList<Contact> contacts2pass = new ArrayList<Contact>();
310     if (directed){
311     contacts2pass=contacts;
312     } else {
313     for (Contact cont:contacts){
314     int i_resser = cont.i;
315     int j_resser = cont.j;
316     contacts2pass.add(new Contact(i_resser,j_resser));
317     contacts2pass.add(new Contact(j_resser,i_resser));
318     }
319     }
320     // construct the ContactMap object and return it
321     ContactMap cm = new ContactMap(contacts2pass,residues,sequence);
322     return cm;
323    
324     }
325    
326 duarte 123 }