ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 143
Committed: Tue May 15 10:06:47 2007 UTC (17 years, 4 months ago) by duarte
File size: 9215 byte(s)
Log Message:
FIXED BUG: last commit didn't fix bug correctly:
Changed the ContactMap constructor to put into residueNums and residueTypes nums from 1 to maximum of residues.keySet()

Made public accode and chain fields in Graph
Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 134 import java.io.BufferedReader;
3     import java.io.File;
4     import java.io.FileNotFoundException;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 134 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8     import java.io.IOException;
9 duarte 135 import java.sql.ResultSet;
10     import java.sql.SQLException;
11     import java.sql.Statement;
12 duarte 123 import java.util.ArrayList;
13 duarte 129 import java.util.TreeMap;
14 duarte 135 import tools.MySQLConnection;
15 duarte 123
16    
17     public class Graph {
18    
19 duarte 135 public final static String MYSQLSERVER="white";
20     public final static String MYSQLUSER=getUserName();
21     public final static String MYSQLPWD="nieve";
22    
23 duarte 123 ArrayList<Contact> contacts;
24 duarte 135 // nodes is a TreeMap of residue serials to residue types (3 letter code)
25 duarte 129 TreeMap<Integer,String> nodes;
26     String sequence;
27 duarte 143 public String accode;
28     public String chain;
29 duarte 123 double cutoff;
30     String ct;
31 duarte 135 boolean directed=false;
32 duarte 123
33 duarte 135 // these 2 fields only used when reading from db
34     int graphid=0;
35     int sm_id=0;
36    
37 duarte 134 /**
38     * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
39     * Must also pass contact type, cutoff, accession code and chain
40     * @param contacts
41     * @param nodes
42     * @param sequence
43     * @param cutoff
44     * @param ct
45     * @param accode
46     * @param chain
47     */
48 duarte 129 public Graph (ArrayList<Contact> contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String accode, String chain) {
49 duarte 123 this.contacts=contacts;
50     this.cutoff=cutoff;
51 duarte 129 this.nodes=nodes;
52     this.sequence=sequence;
53     this.accode=accode;
54     this.chain=chain;
55 duarte 123 this.ct=ct;
56 duarte 129 if (ct.contains("/")){
57     directed=true;
58     }
59 duarte 123 }
60 duarte 135
61     /**
62 duarte 141 * Constructs Graph object from graph db, given the dbname, accode, chaincode (classic pdb chain code), ct and cutoff
63 duarte 135 * @param dbname
64     * @param accode
65 duarte 141 * @param chaincode
66 duarte 135 * @param cutoff
67     * @param ct
68     */
69 duarte 141 public Graph(String dbname, String accode, String chaincode, double cutoff, String ct) throws GraphIdNotFoundError{
70 duarte 135 this.cutoff=cutoff;
71     this.accode=accode;
72     this.ct=ct;
73 duarte 142 // we set the sequence to empty when we read from graph db. We don't have the full sequence in graph db
74     // when we pass the sequence in getCM to the ContactMap constructor we want to have either a full sequence (with unobserveds) or a blank in case we don't have the info
75     this.sequence="";
76 duarte 135 //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
77     if (ct.contains("/")){
78     directed=true;
79     }
80     MySQLConnection conn = new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD,dbname);
81 duarte 141 getgraphid(conn, chaincode); // initialises graphid, sm_id and chain
82     read_graph_from_db(conn); // gets contacts, nodes and sequence
83 duarte 135 conn.close();
84     }
85 duarte 129
86 duarte 134 /**
87     * Constructs Graph object by reading a file with contacts
88     * An object created with this constructor will be missing the fields sequence and nodes
89     * That means it's not possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
90     * @param contactsfile
91     * @param cutoff
92     * @param ct
93     * @throws IOException
94     * @throws FileNotFoundException
95     */
96     public Graph (String contactsfile, double cutoff,String ct) throws IOException, FileNotFoundException{
97     this.cutoff=cutoff;
98     this.ct=ct;
99 duarte 142 // we set the sequence to blank when we read from file as we don't have the full sequence
100     this.sequence="";
101 duarte 134 if (ct.contains("/")){
102     directed=true;
103     }
104     read_contacts_from_file(contactsfile);
105     }
106    
107 duarte 135 //TODO implement (from python) write_graph_to_db, do we really need it here??
108    
109     /** get user name from operating system (for use as database username) */
110     private static String getUserName() {
111     String user = null;
112     user = System.getProperty("user.name");
113     if(user == null) {
114     System.err.println("Could not get user name from operating system. Exiting");
115     System.exit(1);
116     }
117     return user;
118     }
119    
120 duarte 134 public void read_contacts_from_file (String contactsfile) throws FileNotFoundException, IOException {
121 duarte 142 //TODO eventually should read also nodes: either from file or obtain them from contacts (that would give only anyway nodes with contacts)
122 duarte 134 contacts = new ArrayList<Contact>();
123     System.out.println("Reading contacts from file "+contactsfile);
124     BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
125     String line;
126     while ((line = fcont.readLine() ) != null ) {
127     int i = Integer.parseInt(line.split("\\s+")[0]);
128     int j = Integer.parseInt(line.split("\\s+")[1]);
129     contacts.add(new Contact(i,j));
130     }
131     fcont.close();
132     }
133    
134 duarte 135 /**
135     * Reads contacts and nodes from db.
136     * The db must be a graph db following our standard format, i.e. must have tables:
137     * chain_graph, single_model_graph, single_model_node, single_model_edge
138     * We don't care here about the origin of the data (msdsd, pdbase, predicted) for the generation of the graph as long as it follows our data format
139     * We read both edges and nodes from single_model_edge and single_model_node.
140     * The sequence is taken from nodes, thus it won't have unobserved or non standard aas.
141     * @param conn
142     */
143     public void read_graph_from_db(MySQLConnection conn){
144     contacts = new ArrayList<Contact>();
145     nodes = new TreeMap<Integer, String>();
146     try {
147 duarte 142 // we read only half of the matrix (contacts in one direction only) so that we have the same type of contacts as when creating Graph from Pdb object
148 duarte 135 String sql="SELECT i_num,j_num FROM single_model_edge WHERE graph_id="+graphid+" AND j_num>i_num ORDER BY i_num,j_num ";
149     Statement stmt = conn.createStatement();
150     ResultSet rsst = stmt.executeQuery(sql);
151     while (rsst.next()) {
152     int i=rsst.getInt(1);
153     int j=rsst.getInt(2);
154     contacts.add(new Contact(i,j));
155     }
156     rsst.close();
157     stmt.close();
158     sql="SELECT num,res FROM single_model_node WHERE graph_id="+graphid+" ORDER BY num ";
159     stmt = conn.createStatement();
160     rsst = stmt.executeQuery(sql);
161     while (rsst.next()){
162     int num=rsst.getInt(1);
163     String res=rsst.getString(2);
164     nodes.put(num, AA.oneletter2threeletter(res));
165     }
166     rsst.close();
167     stmt.close();
168     } catch (SQLException e) {
169     e.printStackTrace();
170     }
171    
172     }
173    
174 duarte 141 public void getgraphid (MySQLConnection conn, String chaincode) throws GraphIdNotFoundError{
175     // input is chaincode i.e. pdb chain code
176     // we take chain (internal chain identifier, pchain_code for msdsd and asym_id for pdbase) from pchain_code field in chain_graph
177     // (in the chain_graph table the internal chain identifier is called 'pchain_code')
178 duarte 135 int pgraphid=0;
179 duarte 141 String chainstr="='"+chaincode+"' ";
180     if (chaincode.equals("NULL")){
181     chainstr=" IS NULL ";
182     }
183 duarte 135 try {
184 duarte 141 String sql="SELECT graph_id, pchain_code FROM chain_graph WHERE accession_code='"+accode+"' AND chain_pdb_code"+chainstr+" AND dist="+cutoff;
185 duarte 135 Statement stmt = conn.createStatement();
186     ResultSet rsst = stmt.executeQuery(sql);
187     int check=0;
188     while (rsst.next()) {
189     check++;
190     pgraphid=rsst.getInt(1);
191 duarte 141 chain=rsst.getString(2);
192 duarte 135 }
193     if (check!=1){
194 duarte 141 System.err.println("No pgraph_id match or more than 1 match for accession_code="+accode+", chain_pdb_code="+chaincode+", dist="+cutoff);
195 duarte 135 }
196     rsst.close();
197     stmt.close();
198     // we set the ctstr to the same as ct except in ALL case, where it is BB+SC+BB/SC
199     String ctstr=ct;
200     if (ct.equals("ALL")){
201     ctstr="BB+SC+BB/SC";
202     }
203     sql="SELECT graph_id,single_model_id FROM single_model_graph WHERE pgraph_id="+pgraphid+" AND CT='"+ctstr+"' AND dist="+cutoff+" AND CR='(true)' AND CW=1";
204     stmt = conn.createStatement();
205     rsst = stmt.executeQuery(sql);
206     check=0;
207     while (rsst.next()){
208     check++;
209     graphid=rsst.getInt(1);
210     sm_id=rsst.getInt(2);
211     }
212     if (check!=1){
213     System.err.println("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
214     throw new GraphIdNotFoundError("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
215     }
216     } catch (SQLException e) {
217     e.printStackTrace();
218     }
219    
220     }
221    
222 duarte 123 public void write_contacts_to_file (String outfile) throws IOException {
223     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
224     for (Contact pair:contacts){
225     int i_resser=pair.i;
226     int j_resser=pair.j;
227     Out.println(i_resser+"\t"+j_resser);
228     }
229     Out.close();
230     }
231 duarte 129
232     public ContactMap getCM() {
233     // residues is the map from residue nums to residue types used in ContactMap class, i.e. it is the same as Pdb.resser2restype or Graph.nodes
234     TreeMap<Integer,String> residues = new TreeMap<Integer,String>();
235     // we copy residues from nodes (deep copy)
236     for (int node:nodes.keySet()){
237     residues.put(node, nodes.get(node));
238     }
239     // check if we are in directed or undirected case. If undirected we fill the opposite contacts to pass a full list of contacts to ContactMap (which contains full matrix)
240     ArrayList<Contact> contacts2pass = new ArrayList<Contact>();
241     if (directed){
242     contacts2pass=contacts;
243     } else {
244     for (Contact cont:contacts){
245     int i_resser = cont.i;
246     int j_resser = cont.j;
247     contacts2pass.add(new Contact(i_resser,j_resser));
248     contacts2pass.add(new Contact(j_resser,i_resser));
249     }
250     }
251     // construct the ContactMap object and return it
252     ContactMap cm = new ContactMap(contacts2pass,residues,sequence);
253     return cm;
254    
255     }
256    
257 duarte 123 }