ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/MsdsdPdb.java
Revision: 492
Committed: Wed Jan 2 13:18:57 2008 UTC (16 years, 9 months ago) by duarte
File size: 14582 byte(s)
Log Message:
Copied the aglappe-jung branch into trunk.

Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.sql.ResultSet;
4     import java.sql.SQLException;
5     import java.sql.Statement;
6     import java.util.ArrayList;
7 duarte 222 import java.util.Collections;
8 duarte 207 import java.util.HashMap;
9 duarte 219 import java.util.TreeMap;
10 duarte 441 import java.util.TreeSet;
11 duarte 207
12 duarte 226 import javax.vecmath.Point3d;
13    
14 duarte 207 import tools.MySQLConnection;
15    
16     /**
17     * A single chain pdb protein structure loaded from a MSDSD database
18     * See http://www.ebi.ac.uk/msd-srv/docs/dbdoc/refaindex.html to know what MSDSD is
19     *
20     * @author Jose Duarte
21     * Class: MsdsdPdb
22     * Package: proteinstructure
23     */
24     public class MsdsdPdb extends Pdb {
25    
26     private final static String MYSQLSERVER="white";
27     private final static String MYSQLUSER=MySQLConnection.getUserName();
28     private final static String MYSQLPWD="nieve";
29     //private final static String DEFAULT_MYMSDSD_DB="my_msdsd_00_07_a";
30     private final static String DEFAULT_MSDSD_DB="msdsd_00_07_a";
31    
32     private MySQLConnection conn;
33    
34     private int chainid;
35     private int modelid;
36    
37 duarte 441 // TODO for this to be used by other people we need to do things without a myMsdsdDb (or also distribute our fixes database)
38 duarte 207 private String myMsdsdDb; // our database with add-ons and fixes to msdsd
39    
40     /**
41 duarte 441 * Constructs an empty Pdb object given pdb code
42     * Data will be loaded from database upon call of load(pdbChainCode, modelSerial)
43 duarte 207 * MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
44     * Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB
45     * @param pdbCode
46     * @throws SQLException
47     */
48 duarte 441 public MsdsdPdb (String pdbCode) throws SQLException {
49     this(pdbCode,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
50 duarte 207 }
51    
52     /**
53 duarte 441 * Constructs an empty Pdb object given pdb code, a source db and a MySQLConnection.
54     * Data will be loaded from database upon call of load(pdbChainCode, modelSerial)
55 duarte 207 * @param pdbCode
56 duarte 441 * @param db a msdsd database
57 duarte 207 * @param conn
58     */
59 duarte 441 public MsdsdPdb (String pdbCode, String db, MySQLConnection conn) {
60 stehr 217 this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case
61 duarte 207 this.db=db;
62     this.myMsdsdDb="my_"+db; // i.e. for db=msdsd_00_07_a then myMsdsdDb=my_msdsd_00_07_a
63 duarte 441 this.dataLoaded = false;
64 duarte 207
65     this.conn = conn;
66    
67 duarte 441 }
68 duarte 207
69 duarte 441 public void load(String pdbChainCode, int modelSerial) throws PdbLoadError {
70     this.model = modelSerial;
71     this.pdbChainCode=pdbChainCode; // NOTE! pdb chain codes are case sensitive!
72     try {
73     this.getchainid();// initialises chainid, modelid and chainCode
74    
75     if (check_inconsistent_res_numbering()){
76     throw new MsdsdInconsistentResidueNumbersError("Inconsistent residue numbering in msdsd for accession_code "+this.pdbCode+", chain_pdb_code "+this.pdbChainCode);
77     }
78    
79     this.sequence = read_seq();
80     this.fullLength = sequence.length();
81    
82     this.pdbresser2resser = get_ressers_mapping();
83    
84     this.read_atomData();
85    
86     this.obsLength = resser2restype.size();
87    
88     // we initialise resser2pdbresser from the pdbresser2resser HashMap
89     this.resser2pdbresser = new HashMap<Integer, String>();
90     for (String pdbresser:pdbresser2resser.keySet()){
91     resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser);
92     }
93    
94     secondaryStructure = new SecondaryStructure(); // create empty secondary structure first to make sure object is not null
95     readSecStructure();
96     if(!secondaryStructure.isEmpty()) {
97     secondaryStructure.setComment("MSDSD");
98     }
99    
100     // initialising atomser2atom from resser_atom2atomserial
101     atomser2atom = new HashMap<Integer, String>();
102     for (String resser_atom:resser_atom2atomserial.keySet()){
103     int atomserial = resser_atom2atomserial.get(resser_atom);
104     String atom = resser_atom.split("_")[1];
105     atomser2atom.put(atomserial,atom);
106     }
107    
108     dataLoaded = true;
109    
110     } catch (PdbCodeNotFoundError e) {
111     throw new PdbLoadError(e);
112     } catch (SQLException e) {
113     throw new PdbLoadError(e);
114     } catch (MsdsdInconsistentResidueNumbersError e) {
115     throw new PdbLoadError(e);
116     }
117 duarte 207
118 duarte 441 }
119    
120 stehr 470 public String[] getChains() throws PdbLoadError {
121 duarte 441 TreeSet<String> chains = new TreeSet<String>();
122     try {
123     String sql = "SELECT DISTINCT chain_pdb_code " + // the DISTINCT is because there can be a multi model entry
124     " FROM "+myMsdsdDb+".mmol_chain_info " +
125     " WHERE accession_code='"+pdbCode+"' " +
126     " AND chain_type='C' " +
127     " AND asu_chain=1 ";
128     Statement stmt = conn.createStatement();
129     ResultSet rsst = stmt.executeQuery(sql);
130     while (rsst.next()) {
131     String chain = rsst.getString(1);
132     if (chain==null) chain="NULL";
133     chains.add(chain);
134     }
135     rsst.close();
136     stmt.close();
137     } catch (SQLException e) {
138 stehr 470 throw new PdbLoadError(e);
139 duarte 207 }
140 duarte 219
141 duarte 441 if (chains.isEmpty()) return null;
142    
143     String[] chainsArray = new String[chains.size()];
144     chains.toArray(chainsArray);
145     return chainsArray;
146     }
147    
148 stehr 470 public Integer[] getModels() throws PdbLoadError {
149 duarte 441 TreeSet<Integer> models = new TreeSet<Integer>();
150     try {
151     String sql = "SELECT DISTINCT model_serial " +
152     " FROM "+myMsdsdDb+".mmol_chain_info " +
153     " WHERE accession_code='"+pdbCode+"' " +
154     " AND chain_type='C' " +
155     " AND asu_chain=1 ";
156     Statement stmt = conn.createStatement();
157     ResultSet rsst = stmt.executeQuery(sql);
158     while (rsst.next()) {
159     models.add(rsst.getInt(1));
160     }
161     rsst.close();
162     stmt.close();
163    
164     } catch (SQLException e) {
165 stehr 470 throw new PdbLoadError(e);
166 stehr 259 }
167 duarte 237
168 duarte 441 if (models.isEmpty()) return null;
169     Integer[] modelsArray = new Integer[models.size()];
170     models.toArray(modelsArray);
171     return modelsArray;
172 duarte 207 }
173 duarte 441
174 stehr 215 private void getchainid() throws PdbCodeNotFoundError, SQLException {
175 duarte 207 chainid=0;
176     String chaincodestr="='"+pdbChainCode+"'";
177 stehr 336 if (pdbChainCode.equals(Pdb.NULL_CHAIN_CODE)){
178 duarte 207 chaincodestr="IS NULL";
179     }
180     String sql = "SELECT chain_id, model_id, pchain_code " +
181     " FROM "+myMsdsdDb+".mmol_chain_info " +
182     " WHERE accession_code='"+pdbCode+"' " +
183     " AND chain_pdb_code "+chaincodestr +
184     " AND chain_type='C' " +
185     " AND asu_chain=1 " +
186     " AND model_serial="+model;
187    
188 duarte 210 Statement stmt = conn.createStatement();
189     ResultSet rsst = stmt.executeQuery(sql);
190     if (rsst.next()) {
191     chainid = rsst.getInt(1);
192     modelid = rsst.getInt(2);
193     chainCode=rsst.getString(3);
194     if (! rsst.isLast()) {
195 stehr 215 //System.err.println("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
196     throw new PdbCodeNotFoundError("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
197 duarte 210 }
198     } else {
199 stehr 215 //System.err.println("No chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
200     throw new PdbCodeNotFoundError("No chain_id could be matched for accession_code "+pdbCode+", chain_pdb_code "+pdbChainCode);
201 duarte 210 }
202     rsst.close();
203     stmt.close();
204 duarte 207 }
205    
206 duarte 210 private boolean check_inconsistent_res_numbering() throws SQLException{
207 duarte 207 int count=0;
208     int numserial=0;
209 duarte 210
210     String sql="SELECT count(*) " +
211     " FROM "+myMsdsdDb+".problem_serial_chain " +
212     " WHERE chain_id="+chainid +
213     " AND (min_serial!=1 OR num_serial!=num_dist_serial OR num_serial!=max_serial-min_serial+1)";
214     Statement stmt = conn.createStatement();
215     ResultSet rsst = stmt.executeQuery(sql);
216     while (rsst.next()) {
217     count = rsst.getInt(1);
218     if (count>0){
219 duarte 207 return true;
220     }
221     }
222 duarte 210 sql="SELECT num_serial FROM "+myMsdsdDb+".problem_serial_chain WHERE chain_id="+chainid;
223     rsst = stmt.executeQuery(sql);
224     int check = 0;
225     while (rsst.next()){
226     check++;
227     numserial=rsst.getInt(1);
228     }
229     if (check!=1){
230     System.err.println("No num_serial match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
231     }
232     String allresseq = read_seq();
233     if (allresseq.length()!=numserial){
234     System.err.println("num_serial and length of all_res_seq don't match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
235     return true;
236     }
237     rsst.close();
238     stmt.close();
239 duarte 207 return false;
240     }
241    
242 duarte 210 private void read_atomData() throws SQLException{
243 duarte 207 resser_atom2atomserial = new HashMap<String,Integer>();
244     resser2restype = new HashMap<Integer,String>();
245 duarte 226 atomser2coord = new HashMap<Integer,Point3d>();
246 duarte 207 atomser2resser = new HashMap<Integer,Integer>();
247    
248     String sql = "SELECT serial,chem_atom_name,code_3_letter,residue_serial,x,y,z " +
249     " FROM "+db+".atom_data " +
250     " WHERE (model_id = "+modelid+") " +
251     " AND (chain_id = "+chainid+") " +
252     " AND (graph_alt_code_used = 1) " +
253     " AND (graph_standard_aa=1) " +
254     " AND (pdb_group = 'A')" +
255     " ORDER BY chain_code, residue_serial, serial";
256    
257 duarte 210 Statement stmt = conn.createStatement();
258     ResultSet rsst = stmt.executeQuery(sql);
259     int count=0;
260     while (rsst.next()){
261     count++;
262    
263     int atomserial = rsst.getInt(1); // atomserial
264     String atom = rsst.getString(2).trim(); // atom
265     String res_type = rsst.getString(3).trim(); // res_type
266     int res_serial = rsst.getInt(4); // res_serial
267     double x = rsst.getDouble(5); // x
268     double y = rsst.getDouble(6); // y
269     double z = rsst.getDouble(7); // z
270 duarte 226 Point3d coords = new Point3d(x, y, z);
271 duarte 326 if (AAinfo.isValidAA(res_type)) {
272 duarte 210 atomser2coord.put(atomserial, coords);
273     atomser2resser.put(atomserial, res_serial);
274     resser2restype.put(res_serial, res_type);
275 duarte 326 if (AAinfo.isValidAtomWithOXT(res_type,atom)){
276 duarte 210 resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
277     }
278 duarte 207 }
279 duarte 210
280    
281 duarte 207 }
282 duarte 210 if (count==0){
283     System.err.println("atom data query returned no data at all for model_id="+modelid+", model_id="+modelid);
284     }
285     rsst.close();
286     stmt.close();
287 duarte 207 }
288    
289 duarte 210 private String read_seq() throws SQLException{
290 duarte 207 String allresseq="";
291     String sql="SELECT all_res_seq FROM "+myMsdsdDb+".chain_seq WHERE chain_id="+chainid;
292    
293 duarte 210 Statement stmt = conn.createStatement();
294     ResultSet rsst = stmt.executeQuery(sql);
295     int check = 0;
296     if (rsst.next()) {
297     check++;
298     allresseq=rsst.getString(1);
299     }
300     if (check!=1) {
301     System.err.println("No all_res_seq match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode+", chain_id="+chainid);
302     }
303     rsst.close();
304     stmt.close();
305    
306 duarte 207 return allresseq;
307     }
308    
309 duarte 210 private HashMap<String,Integer> get_ressers_mapping() throws SQLException {
310 duarte 207 HashMap<String,Integer> map = new HashMap<String, Integer>();
311     String sql="SELECT serial, concat(pdb_seq,IF(pdb_insert_code IS NULL,'',pdb_insert_code)) " +
312     " FROM "+db+".residue " +
313     " WHERE chain_id="+chainid+
314     " AND pdb_seq IS NOT NULL";
315 duarte 210
316     Statement stmt = conn.createStatement();
317     ResultSet rsst = stmt.executeQuery(sql);
318     int count=0;
319     while (rsst.next()) {
320     count++;
321     int resser = rsst.getInt(1);
322     String pdbresser = rsst.getString(2);
323     map.put(pdbresser, resser);
324     }
325     if (count==0) {
326     System.err.println("No residue serials mapping data match for chain_id="+chainid);
327 duarte 207 }
328 duarte 210 rsst.close();
329     stmt.close();
330 duarte 207
331     return map;
332     }
333    
334 duarte 219 private void readSecStructure() throws SQLException{
335 stehr 274 this.secondaryStructure = new SecondaryStructure();
336 duarte 219
337     //HELIX -- helix table
338     String sql = "SELECT helix_serial, beg_residue_serial, end_residue_serial " +
339     " FROM "+db+".helix " +
340     " WHERE (model_id = "+modelid+") " +
341     " AND (chain_id = "+chainid+") ";
342     Statement stmt = conn.createStatement();
343     ResultSet rsst = stmt.executeQuery(sql);
344     int count=0;
345     while (rsst.next()) {
346     count++;
347     int serial = rsst.getInt(1);
348     int beg = rsst.getInt(2);
349     int end =rsst.getInt(3);
350 stehr 274 String ssId = "" + SecStrucElement.HELIX+serial;
351     SecStrucElement ssElem = new SecStrucElement(SecStrucElement.HELIX,beg,end,ssId);
352     secondaryStructure.add(ssElem);
353 duarte 219 }
354     rsst.close();
355     stmt.close();
356     //SHEET -- strand table
357     sql = "SELECT sheet_serial, strand_serial, strand_beg_residue_serial, strand_end_residue_serial " +
358     " FROM "+db+".strand " +
359     " WHERE (model_id = "+modelid+") " +
360     " AND (chain_id = "+chainid+") ";
361     stmt = conn.createStatement();
362     rsst = stmt.executeQuery(sql);
363     // we store everything in these 2 maps to assign later to resser2secstruct based on our own ids (ids are not very consistent in msdsd)
364 duarte 222 HashMap<Integer,Interval> strands2begEnd = new HashMap<Integer, Interval>();
365 duarte 219 TreeMap<Integer,ArrayList<Integer>> sheets2strands = new TreeMap<Integer, ArrayList<Integer>>();
366     count=0;
367     while (rsst.next()) {
368     count++;
369     int sheetSerial = rsst.getInt(1);
370     int strandSerial = rsst.getInt(2);
371     int beg = rsst.getInt(3);
372     int end =rsst.getInt(4);
373 duarte 222 strands2begEnd.put(strandSerial, new Interval(beg,end));
374 duarte 219 if (sheets2strands.containsKey(sheetSerial)){
375     sheets2strands.get(sheetSerial).add(strandSerial);
376     } else {
377     ArrayList<Integer> strands = new ArrayList<Integer>();
378     strands.add(strandSerial);
379     sheets2strands.put(sheetSerial, strands);
380     }
381     }
382     rsst.close();
383     stmt.close();
384     char sheet='A';
385     for (int sheetSerial:sheets2strands.keySet()){
386     int strand=1;
387     for (int strandSerial:sheets2strands.get(sheetSerial)){
388 duarte 222 Interval begEnd = strands2begEnd.get(strandSerial);
389 stehr 274 String ssId = ""+SecStrucElement.STRAND+sheet+strand;
390     SecStrucElement ssElem = new SecStrucElement(SecStrucElement.STRAND,begEnd.beg,begEnd.end,ssId);
391     secondaryStructure.add(ssElem);
392 duarte 219 strand++;
393     }
394     sheet++;
395     }
396    
397     //TURN -- turn table
398     // they forgot to fill up the turn_serial field so we have to use turn_id and get a serial from it that is unique within the chain only
399     sql = "SELECT turn_id, res_1_residue_serial, res_2_residue_serial, res_3_residue_serial, res_4_residue_serial " +
400     " FROM "+db+".turn " +
401     " WHERE (model_id = "+modelid+") " +
402     " AND (chain_id = "+chainid+") ";
403     stmt = conn.createStatement();
404     rsst = stmt.executeQuery(sql);
405 duarte 222 TreeMap<Integer,ArrayList<Integer>> turns = new TreeMap<Integer, ArrayList<Integer>>();
406 duarte 219 count=0;
407     while (rsst.next()) {
408     count++;
409     int dbId = rsst.getInt(1);
410     int res1 = rsst.getInt(2);
411     int res2 = rsst.getInt(3);
412     int res3 = rsst.getInt(4);
413     int res4 = rsst.getInt(5);
414 duarte 222 ArrayList<Integer> residues = new ArrayList<Integer>();
415     if (res1!=0) residues.add(res1); // res is 0 when the field is NULL in database
416     if (res2!=0) residues.add(res2);
417     if (res3!=0) residues.add(res3);
418     if (res4!=0) residues.add(res4);
419 duarte 219 turns.put(dbId, residues);
420     }
421     rsst.close();
422     stmt.close();
423     int serial=1;
424     for (int dbId:turns.keySet()){
425 stehr 274 String ssId = "" + SecStrucElement.TURN + serial;
426 duarte 222 int beg = Collections.min(turns.get(dbId));
427     int end = Collections.max(turns.get(dbId));
428 stehr 274 SecStrucElement ssElem = new SecStrucElement(SecStrucElement.TURN,beg,end,ssId);
429     secondaryStructure.add(ssElem);
430 duarte 219 serial++;
431     }
432    
433     }
434 duarte 207 }