ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/MsdsdPdb.java
Revision: 278
Committed: Tue Aug 14 12:14:04 2007 UTC (17 years, 2 months ago) by duarte
File size: 14509 byte(s)
Log Message:
Fixed bug with getFullLength when reading from pdb file
Now if SEQRES was missing then fullLength is maximum observed residue number instead of total number of observed residue numbers
New member variables obsLength and fullLength
Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.sql.ResultSet;
4     import java.sql.SQLException;
5     import java.sql.Statement;
6     import java.util.ArrayList;
7 duarte 222 import java.util.Collections;
8 duarte 207 import java.util.HashMap;
9 duarte 219 import java.util.TreeMap;
10 duarte 207
11 duarte 226 import javax.vecmath.Point3d;
12    
13 duarte 207 import tools.MySQLConnection;
14    
15     /**
16     * A single chain pdb protein structure loaded from a MSDSD database
17     * See http://www.ebi.ac.uk/msd-srv/docs/dbdoc/refaindex.html to know what MSDSD is
18     *
19     * @author Jose Duarte
20     * Class: MsdsdPdb
21     * Package: proteinstructure
22     */
23     public class MsdsdPdb extends Pdb {
24    
25     private final static String MYSQLSERVER="white";
26     private final static String MYSQLUSER=MySQLConnection.getUserName();
27     private final static String MYSQLPWD="nieve";
28     //private final static String DEFAULT_MYMSDSD_DB="my_msdsd_00_07_a";
29     private final static String DEFAULT_MSDSD_DB="msdsd_00_07_a";
30    
31     private MySQLConnection conn;
32    
33     private int chainid;
34     private int modelid;
35    
36     // TODO for this to be able to be used by other people we need to do things without a myMsdsdDb (or also distribute our fixes database)
37     private String myMsdsdDb; // our database with add-ons and fixes to msdsd
38    
39     /**
40     * Constructs Pdb object given pdb code and pdb chain code.
41     * Model will be DEFAULT_MODEL
42     * MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
43     * Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB
44     * @param pdbCode
45     * @param pdbChainCode
46 stehr 215 * @throws PdbCodeNotFoundError
47 duarte 207 * @throws MsdsdInconsistentResidueNumbersError
48     * @throws SQLException
49     */
50 stehr 215 public MsdsdPdb (String pdbCode, String pdbChainCode) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
51 duarte 207 this(pdbCode,pdbChainCode,DEFAULT_MODEL,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
52     }
53    
54     /**
55     * Constructs Pdb object given pdb code, pdb chain code, db and MySQLConnection
56     * Model will be DEFAULT_MODEL
57     * db must be a msdsd database
58     * @param pdbCode
59     * @param pdbChainCode
60     * @param db
61     * @param conn
62 stehr 215 * @throws PdbCodeNotFoundError
63 duarte 207 * @throws MsdsdInconsistentResidueNumbersError
64 duarte 210 * @throws SQLException
65 duarte 207 */
66 stehr 215 public MsdsdPdb (String pdbCode, String pdbChainCode, String db, MySQLConnection conn) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
67 duarte 207 this(pdbCode,pdbChainCode,DEFAULT_MODEL,db,conn);
68     }
69    
70     /**
71     * Constructs Pdb object given pdb code, pdb chain code and a model serial
72     * MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
73     * Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB
74     * @param pdbCode
75     * @param pdbChainCode
76     * @param model_serial
77 stehr 215 * @throws PdbCodeNotFoundError
78 duarte 207 * @throws MsdsdInconsistentResidueNumbersError
79     * @throws SQLException
80     */
81 stehr 215 public MsdsdPdb (String pdbCode, String pdbChainCode, int model_serial) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
82 duarte 207 this(pdbCode,pdbChainCode,model_serial,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
83     }
84    
85     /**
86     * Constructs Pdb object given pdb code, pdb chain code, model serial, a source db and a MySQLConnection.
87     * db must be a msdsd database
88     * @param pdbCode
89     * @param pdbChainCode
90     * @param model_serial
91     * @param db
92     * @param conn
93 stehr 215 * @throws PdbCodeNotFoundError
94 duarte 207 * @throws MsdsdInconsistentResidueNumbersError
95 duarte 210 * @throws SQLException
96 duarte 207 */
97 stehr 215 public MsdsdPdb (String pdbCode, String pdbChainCode, int model_serial, String db, MySQLConnection conn) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
98 stehr 217 this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case
99     this.pdbChainCode=pdbChainCode.toUpperCase(); // our convention: chain codes are upper case
100 duarte 207 this.model=model_serial;
101     this.db=db;
102     this.myMsdsdDb="my_"+db; // i.e. for db=msdsd_00_07_a then myMsdsdDb=my_msdsd_00_07_a
103    
104     this.conn = conn;
105    
106     this.getchainid();// initialises chainid, modelid and chainCode
107    
108     if (check_inconsistent_res_numbering()){
109     throw new MsdsdInconsistentResidueNumbersError("Inconsistent residue numbering in msdsd for accession_code "+this.pdbCode+", chain_pdb_code "+this.pdbChainCode);
110     }
111    
112     this.sequence = read_seq();
113 duarte 278 this.fullLength = sequence.length();
114    
115 duarte 207 this.pdbresser2resser = get_ressers_mapping();
116    
117     this.read_atomData();
118    
119 duarte 278 this.obsLength = resser2restype.size();
120    
121 duarte 207 // we initialise resser2pdbresser from the pdbresser2resser HashMap
122     this.resser2pdbresser = new HashMap<Integer, String>();
123     for (String pdbresser:pdbresser2resser.keySet()){
124     resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser);
125     }
126 duarte 219
127 stehr 274 secondaryStructure = new SecondaryStructure(); // create empty secondary structure first to make sure object is not null
128     readSecStructure();
129     if(!secondaryStructure.isEmpty()) {
130     secondaryStructure.setComment("MSDSD");
131 stehr 259 }
132 duarte 237
133     // initialising atomser2atom from resser_atom2atomserial
134     atomser2atom = new HashMap<Integer, String>();
135     for (String resser_atom:resser_atom2atomserial.keySet()){
136     int atomserial = resser_atom2atomserial.get(resser_atom);
137     String atom = resser_atom.split("_")[1];
138     atomser2atom.put(atomserial,atom);
139     }
140 duarte 207 }
141    
142 stehr 215 private void getchainid() throws PdbCodeNotFoundError, SQLException {
143 duarte 207 chainid=0;
144     String chaincodestr="='"+pdbChainCode+"'";
145     if (pdbChainCode.equals("NULL")){
146     chaincodestr="IS NULL";
147     }
148     String sql = "SELECT chain_id, model_id, pchain_code " +
149     " FROM "+myMsdsdDb+".mmol_chain_info " +
150     " WHERE accession_code='"+pdbCode+"' " +
151     " AND chain_pdb_code "+chaincodestr +
152     " AND chain_type='C' " +
153     " AND asu_chain=1 " +
154     " AND model_serial="+model;
155    
156 duarte 210 Statement stmt = conn.createStatement();
157     ResultSet rsst = stmt.executeQuery(sql);
158     if (rsst.next()) {
159     chainid = rsst.getInt(1);
160     modelid = rsst.getInt(2);
161     chainCode=rsst.getString(3);
162     if (! rsst.isLast()) {
163 stehr 215 //System.err.println("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
164     throw new PdbCodeNotFoundError("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
165 duarte 210 }
166     } else {
167 stehr 215 //System.err.println("No chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
168     throw new PdbCodeNotFoundError("No chain_id could be matched for accession_code "+pdbCode+", chain_pdb_code "+pdbChainCode);
169 duarte 210 }
170     rsst.close();
171     stmt.close();
172 duarte 207 }
173    
174 duarte 210 private boolean check_inconsistent_res_numbering() throws SQLException{
175 duarte 207 int count=0;
176     int numserial=0;
177 duarte 210
178     String sql="SELECT count(*) " +
179     " FROM "+myMsdsdDb+".problem_serial_chain " +
180     " WHERE chain_id="+chainid +
181     " AND (min_serial!=1 OR num_serial!=num_dist_serial OR num_serial!=max_serial-min_serial+1)";
182     Statement stmt = conn.createStatement();
183     ResultSet rsst = stmt.executeQuery(sql);
184     while (rsst.next()) {
185     count = rsst.getInt(1);
186     if (count>0){
187 duarte 207 return true;
188     }
189     }
190 duarte 210 sql="SELECT num_serial FROM "+myMsdsdDb+".problem_serial_chain WHERE chain_id="+chainid;
191     rsst = stmt.executeQuery(sql);
192     int check = 0;
193     while (rsst.next()){
194     check++;
195     numserial=rsst.getInt(1);
196     }
197     if (check!=1){
198     System.err.println("No num_serial match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
199     }
200     String allresseq = read_seq();
201     if (allresseq.length()!=numserial){
202     System.err.println("num_serial and length of all_res_seq don't match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
203     return true;
204     }
205     rsst.close();
206     stmt.close();
207 duarte 207 return false;
208     }
209    
210 duarte 210 private void read_atomData() throws SQLException{
211 duarte 207 resser_atom2atomserial = new HashMap<String,Integer>();
212     resser2restype = new HashMap<Integer,String>();
213 duarte 226 atomser2coord = new HashMap<Integer,Point3d>();
214 duarte 207 atomser2resser = new HashMap<Integer,Integer>();
215    
216     String sql = "SELECT serial,chem_atom_name,code_3_letter,residue_serial,x,y,z " +
217     " FROM "+db+".atom_data " +
218     " WHERE (model_id = "+modelid+") " +
219     " AND (chain_id = "+chainid+") " +
220     " AND (graph_alt_code_used = 1) " +
221     " AND (graph_standard_aa=1) " +
222     " AND (pdb_group = 'A')" +
223     " ORDER BY chain_code, residue_serial, serial";
224    
225 duarte 210 Statement stmt = conn.createStatement();
226     ResultSet rsst = stmt.executeQuery(sql);
227     int count=0;
228     while (rsst.next()){
229     count++;
230    
231     int atomserial = rsst.getInt(1); // atomserial
232     String atom = rsst.getString(2).trim(); // atom
233     String res_type = rsst.getString(3).trim(); // res_type
234     int res_serial = rsst.getInt(4); // res_serial
235     double x = rsst.getDouble(5); // x
236     double y = rsst.getDouble(6); // y
237     double z = rsst.getDouble(7); // z
238 duarte 226 Point3d coords = new Point3d(x, y, z);
239 duarte 210 ArrayList<String> aalist=AA.aas();
240     if (aalist.contains(res_type)) {
241     atomser2coord.put(atomserial, coords);
242     atomser2resser.put(atomserial, res_serial);
243     resser2restype.put(res_serial, res_type);
244     ArrayList<String> atomlist = aas2atoms.get(res_type);
245 duarte 275 atomlist.add("OXT"); // the extra atom OXT is there in the last residue of the chain
246 duarte 210 if (atomlist.contains(atom)){
247     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
248     }
249 duarte 207 }
250 duarte 210
251    
252 duarte 207 }
253 duarte 210 if (count==0){
254     System.err.println("atom data query returned no data at all for model_id="+modelid+", model_id="+modelid);
255     }
256     rsst.close();
257     stmt.close();
258 duarte 207 }
259    
260 duarte 210 private String read_seq() throws SQLException{
261 duarte 207 String allresseq="";
262     String sql="SELECT all_res_seq FROM "+myMsdsdDb+".chain_seq WHERE chain_id="+chainid;
263    
264 duarte 210 Statement stmt = conn.createStatement();
265     ResultSet rsst = stmt.executeQuery(sql);
266     int check = 0;
267     if (rsst.next()) {
268     check++;
269     allresseq=rsst.getString(1);
270     }
271     if (check!=1) {
272     System.err.println("No all_res_seq match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode+", chain_id="+chainid);
273     }
274     rsst.close();
275     stmt.close();
276    
277 duarte 207 return allresseq;
278     }
279    
280 duarte 210 private HashMap<String,Integer> get_ressers_mapping() throws SQLException {
281 duarte 207 HashMap<String,Integer> map = new HashMap<String, Integer>();
282     String sql="SELECT serial, concat(pdb_seq,IF(pdb_insert_code IS NULL,'',pdb_insert_code)) " +
283     " FROM "+db+".residue " +
284     " WHERE chain_id="+chainid+
285     " AND pdb_seq IS NOT NULL";
286 duarte 210
287     Statement stmt = conn.createStatement();
288     ResultSet rsst = stmt.executeQuery(sql);
289     int count=0;
290     while (rsst.next()) {
291     count++;
292     int resser = rsst.getInt(1);
293     String pdbresser = rsst.getString(2);
294     map.put(pdbresser, resser);
295     }
296     if (count==0) {
297     System.err.println("No residue serials mapping data match for chain_id="+chainid);
298 duarte 207 }
299 duarte 210 rsst.close();
300     stmt.close();
301 duarte 207
302     return map;
303     }
304    
305 duarte 219 private void readSecStructure() throws SQLException{
306 stehr 274 this.secondaryStructure = new SecondaryStructure();
307 duarte 219
308     //HELIX -- helix table
309     String sql = "SELECT helix_serial, beg_residue_serial, end_residue_serial " +
310     " FROM "+db+".helix " +
311     " WHERE (model_id = "+modelid+") " +
312     " AND (chain_id = "+chainid+") ";
313     Statement stmt = conn.createStatement();
314     ResultSet rsst = stmt.executeQuery(sql);
315     int count=0;
316     while (rsst.next()) {
317     count++;
318     int serial = rsst.getInt(1);
319     int beg = rsst.getInt(2);
320     int end =rsst.getInt(3);
321 stehr 274 String ssId = "" + SecStrucElement.HELIX+serial;
322     SecStrucElement ssElem = new SecStrucElement(SecStrucElement.HELIX,beg,end,ssId);
323     secondaryStructure.add(ssElem);
324 duarte 219 }
325     rsst.close();
326     stmt.close();
327     //SHEET -- strand table
328     sql = "SELECT sheet_serial, strand_serial, strand_beg_residue_serial, strand_end_residue_serial " +
329     " FROM "+db+".strand " +
330     " WHERE (model_id = "+modelid+") " +
331     " AND (chain_id = "+chainid+") ";
332     stmt = conn.createStatement();
333     rsst = stmt.executeQuery(sql);
334     // we store everything in these 2 maps to assign later to resser2secstruct based on our own ids (ids are not very consistent in msdsd)
335 duarte 222 HashMap<Integer,Interval> strands2begEnd = new HashMap<Integer, Interval>();
336 duarte 219 TreeMap<Integer,ArrayList<Integer>> sheets2strands = new TreeMap<Integer, ArrayList<Integer>>();
337     count=0;
338     while (rsst.next()) {
339     count++;
340     int sheetSerial = rsst.getInt(1);
341     int strandSerial = rsst.getInt(2);
342     int beg = rsst.getInt(3);
343     int end =rsst.getInt(4);
344 duarte 222 strands2begEnd.put(strandSerial, new Interval(beg,end));
345 duarte 219 if (sheets2strands.containsKey(sheetSerial)){
346     sheets2strands.get(sheetSerial).add(strandSerial);
347     } else {
348     ArrayList<Integer> strands = new ArrayList<Integer>();
349     strands.add(strandSerial);
350     sheets2strands.put(sheetSerial, strands);
351     }
352     }
353     rsst.close();
354     stmt.close();
355     char sheet='A';
356     for (int sheetSerial:sheets2strands.keySet()){
357     int strand=1;
358     for (int strandSerial:sheets2strands.get(sheetSerial)){
359 duarte 222 Interval begEnd = strands2begEnd.get(strandSerial);
360 stehr 274 String ssId = ""+SecStrucElement.STRAND+sheet+strand;
361     SecStrucElement ssElem = new SecStrucElement(SecStrucElement.STRAND,begEnd.beg,begEnd.end,ssId);
362     secondaryStructure.add(ssElem);
363 duarte 219 strand++;
364     }
365     sheet++;
366     }
367    
368     //TURN -- turn table
369     // they forgot to fill up the turn_serial field so we have to use turn_id and get a serial from it that is unique within the chain only
370     sql = "SELECT turn_id, res_1_residue_serial, res_2_residue_serial, res_3_residue_serial, res_4_residue_serial " +
371     " FROM "+db+".turn " +
372     " WHERE (model_id = "+modelid+") " +
373     " AND (chain_id = "+chainid+") ";
374     stmt = conn.createStatement();
375     rsst = stmt.executeQuery(sql);
376 duarte 222 TreeMap<Integer,ArrayList<Integer>> turns = new TreeMap<Integer, ArrayList<Integer>>();
377 duarte 219 count=0;
378     while (rsst.next()) {
379     count++;
380     int dbId = rsst.getInt(1);
381     int res1 = rsst.getInt(2);
382     int res2 = rsst.getInt(3);
383     int res3 = rsst.getInt(4);
384     int res4 = rsst.getInt(5);
385 duarte 222 ArrayList<Integer> residues = new ArrayList<Integer>();
386     if (res1!=0) residues.add(res1); // res is 0 when the field is NULL in database
387     if (res2!=0) residues.add(res2);
388     if (res3!=0) residues.add(res3);
389     if (res4!=0) residues.add(res4);
390 duarte 219 turns.put(dbId, residues);
391     }
392     rsst.close();
393     stmt.close();
394     int serial=1;
395     for (int dbId:turns.keySet()){
396 stehr 274 String ssId = "" + SecStrucElement.TURN + serial;
397 duarte 222 int beg = Collections.min(turns.get(dbId));
398     int end = Collections.max(turns.get(dbId));
399 stehr 274 SecStrucElement ssElem = new SecStrucElement(SecStrucElement.TURN,beg,end,ssId);
400     secondaryStructure.add(ssElem);
401 duarte 219 serial++;
402     }
403    
404     }
405 duarte 207 }