ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 135
Committed: Mon May 14 09:49:04 2007 UTC (17 years, 9 months ago) by duarte
File size: 8357 byte(s)
Log Message:
NEW FUNCTIONALITY: reading of graph from db is fully implemented for all cases

New chain member variable in the Info classes, read in get_asym_id (Pdbase) and in get_chain_id (Msdsd)
Reading chain also in Pdb in read_pdb_data_from_file
Not reading chain anymore in read_atomData of PdbaseInfo
Added oneletter2threeletter and getoneletter2threeletter to AA class
Changes in Graph:
- added db static vars and getUserName method
- new member variables graphid and sm_id
- new method read_graph_from_db to read contacts, nodes (and sequence from nodes) from db
- new method getgraphid
New Exception class GraphIdNotFoundError
Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 134 import java.io.BufferedReader;
3     import java.io.File;
4     import java.io.FileNotFoundException;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 134 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8     import java.io.IOException;
9 duarte 135 import java.sql.ResultSet;
10     import java.sql.SQLException;
11     import java.sql.Statement;
12 duarte 123 import java.util.ArrayList;
13 duarte 129 import java.util.TreeMap;
14 duarte 135 import tools.MySQLConnection;
15 duarte 123
16    
17     public class Graph {
18    
19 duarte 135 public final static String MYSQLSERVER="white";
20     public final static String MYSQLUSER=getUserName();
21     public final static String MYSQLPWD="nieve";
22    
23 duarte 123 ArrayList<Contact> contacts;
24 duarte 135 // nodes is a TreeMap of residue serials to residue types (3 letter code)
25 duarte 129 TreeMap<Integer,String> nodes;
26     String sequence;
27     String accode;
28     String chain;
29 duarte 123 double cutoff;
30     String ct;
31 duarte 135 boolean directed=false;
32 duarte 123
33 duarte 135 // these 2 fields only used when reading from db
34     int graphid=0;
35     int sm_id=0;
36    
37 duarte 134 /**
38     * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
39     * Must also pass contact type, cutoff, accession code and chain
40     * @param contacts
41     * @param nodes
42     * @param sequence
43     * @param cutoff
44     * @param ct
45     * @param accode
46     * @param chain
47     */
48 duarte 129 public Graph (ArrayList<Contact> contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String accode, String chain) {
49 duarte 123 this.contacts=contacts;
50     this.cutoff=cutoff;
51 duarte 129 this.nodes=nodes;
52     this.sequence=sequence;
53     this.accode=accode;
54     this.chain=chain;
55 duarte 123 this.ct=ct;
56 duarte 129 if (ct.contains("/")){
57     directed=true;
58     }
59 duarte 123 }
60 duarte 135
61     /**
62     * Constructs Graph object from graph db
63     * ATTENTION!! chain is the internal database identifier, NOT! the pdb chain code
64     * TODO: we should also have a method to construct Graph from db using a pdb chain code
65     * @param dbname
66     * @param accode
67     * @param chain
68     * @param cutoff
69     * @param ct
70     */
71     public Graph(String dbname, String accode, String chain, double cutoff, String ct) throws GraphIdNotFoundError{
72     this.cutoff=cutoff;
73     this.accode=accode;
74     this.chain=chain;
75     this.ct=ct;
76     //TODO graphs in db are never directed, so this doesn't really apply here. Must solve all this!
77     if (ct.contains("/")){
78     directed=true;
79     }
80     MySQLConnection conn = new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD,dbname);
81     getgraphid(conn); // initialises graphid and sm_id
82     read_graph_from_db(conn);
83     conn.close();
84     }
85 duarte 129
86 duarte 134 /**
87     * Constructs Graph object by reading a file with contacts
88     * An object created with this constructor will be missing the fields sequence and nodes
89     * That means it's not possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
90     * @param contactsfile
91     * @param cutoff
92     * @param ct
93     * @throws IOException
94     * @throws FileNotFoundException
95     */
96     public Graph (String contactsfile, double cutoff,String ct) throws IOException, FileNotFoundException{
97     this.cutoff=cutoff;
98     this.ct=ct;
99     if (ct.contains("/")){
100     directed=true;
101     }
102     read_contacts_from_file(contactsfile);
103     }
104    
105 duarte 135 //TODO implement (from python) write_graph_to_db, do we really need it here??
106    
107     /** get user name from operating system (for use as database username) */
108     private static String getUserName() {
109     String user = null;
110     user = System.getProperty("user.name");
111     if(user == null) {
112     System.err.println("Could not get user name from operating system. Exiting");
113     System.exit(1);
114     }
115     return user;
116     }
117    
118 duarte 134 public void read_contacts_from_file (String contactsfile) throws FileNotFoundException, IOException {
119     contacts = new ArrayList<Contact>();
120     System.out.println("Reading contacts from file "+contactsfile);
121     BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
122     String line;
123     while ((line = fcont.readLine() ) != null ) {
124     int i = Integer.parseInt(line.split("\\s+")[0]);
125     int j = Integer.parseInt(line.split("\\s+")[1]);
126     contacts.add(new Contact(i,j));
127     }
128     fcont.close();
129     }
130    
131 duarte 135 /**
132     * Reads contacts and nodes from db.
133     * The db must be a graph db following our standard format, i.e. must have tables:
134     * chain_graph, single_model_graph, single_model_node, single_model_edge
135     * We don't care here about the origin of the data (msdsd, pdbase, predicted) for the generation of the graph as long as it follows our data format
136     * We read both edges and nodes from single_model_edge and single_model_node.
137     * The sequence is taken from nodes, thus it won't have unobserved or non standard aas.
138     * @param conn
139     */
140     public void read_graph_from_db(MySQLConnection conn){
141     contacts = new ArrayList<Contact>();
142     nodes = new TreeMap<Integer, String>();
143     sequence = "";
144     try {
145     String sql="SELECT i_num,j_num FROM single_model_edge WHERE graph_id="+graphid+" AND j_num>i_num ORDER BY i_num,j_num ";
146     Statement stmt = conn.createStatement();
147     ResultSet rsst = stmt.executeQuery(sql);
148     while (rsst.next()) {
149     int i=rsst.getInt(1);
150     int j=rsst.getInt(2);
151     contacts.add(new Contact(i,j));
152     }
153     rsst.close();
154     stmt.close();
155     sql="SELECT num,res FROM single_model_node WHERE graph_id="+graphid+" ORDER BY num ";
156     stmt = conn.createStatement();
157     rsst = stmt.executeQuery(sql);
158     while (rsst.next()){
159     int num=rsst.getInt(1);
160     String res=rsst.getString(2);
161     nodes.put(num, AA.oneletter2threeletter(res));
162     sequence+=res;
163     }
164     rsst.close();
165     stmt.close();
166     } catch (SQLException e) {
167     e.printStackTrace();
168     }
169    
170     }
171    
172     public void getgraphid (MySQLConnection conn) throws GraphIdNotFoundError{
173     // NOTE: as chain we are using our internal identifier, which is the pchain_code in msdsd or the asym_id in pdbase
174     // in the chain_graph table the internal chain identifier is called 'pchain_code'
175     int pgraphid=0;
176     try {
177     String sql="SELECT graph_id FROM chain_graph WHERE accession_code='"+accode+"' AND pchain_code='"+chain+"' AND dist="+cutoff;
178     Statement stmt = conn.createStatement();
179     ResultSet rsst = stmt.executeQuery(sql);
180     int check=0;
181     while (rsst.next()) {
182     check++;
183     pgraphid=rsst.getInt(1);
184     }
185     if (check!=1){
186     System.err.println("No pgraph_id match or more than 1 match for accession_code="+accode+", pchain_code="+chain+", dist="+cutoff);
187     }
188     rsst.close();
189     stmt.close();
190     // we set the ctstr to the same as ct except in ALL case, where it is BB+SC+BB/SC
191     String ctstr=ct;
192     if (ct.equals("ALL")){
193     ctstr="BB+SC+BB/SC";
194     }
195     sql="SELECT graph_id,single_model_id FROM single_model_graph WHERE pgraph_id="+pgraphid+" AND CT='"+ctstr+"' AND dist="+cutoff+" AND CR='(true)' AND CW=1";
196     stmt = conn.createStatement();
197     rsst = stmt.executeQuery(sql);
198     check=0;
199     while (rsst.next()){
200     check++;
201     graphid=rsst.getInt(1);
202     sm_id=rsst.getInt(2);
203     }
204     if (check!=1){
205     System.err.println("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
206     throw new GraphIdNotFoundError("No graph_id match or more than 1 match for pgraph_id="+pgraphid+", CT="+ctstr+" and cutoff="+cutoff);
207     }
208     } catch (SQLException e) {
209     e.printStackTrace();
210     }
211    
212     }
213    
214 duarte 123 public void write_contacts_to_file (String outfile) throws IOException {
215     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
216     for (Contact pair:contacts){
217     int i_resser=pair.i;
218     int j_resser=pair.j;
219     Out.println(i_resser+"\t"+j_resser);
220     }
221     Out.close();
222     }
223 duarte 129
224     public ContactMap getCM() {
225     // residues is the map from residue nums to residue types used in ContactMap class, i.e. it is the same as Pdb.resser2restype or Graph.nodes
226     TreeMap<Integer,String> residues = new TreeMap<Integer,String>();
227     // we copy residues from nodes (deep copy)
228     for (int node:nodes.keySet()){
229     residues.put(node, nodes.get(node));
230     }
231     // check if we are in directed or undirected case. If undirected we fill the opposite contacts to pass a full list of contacts to ContactMap (which contains full matrix)
232     ArrayList<Contact> contacts2pass = new ArrayList<Contact>();
233     if (directed){
234     contacts2pass=contacts;
235     } else {
236     for (Contact cont:contacts){
237     int i_resser = cont.i;
238     int j_resser = cont.j;
239     contacts2pass.add(new Contact(i_resser,j_resser));
240     contacts2pass.add(new Contact(j_resser,i_resser));
241     }
242     }
243     // construct the ContactMap object and return it
244     ContactMap cm = new ContactMap(contacts2pass,residues,sequence);
245     return cm;
246    
247     }
248    
249 duarte 123 }