ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/MsdsdPdb.java
Revision: 250
Committed: Mon Aug 6 14:48:37 2007 UTC (17 years, 2 months ago) by stehr
File size: 14984 byte(s)
Log Message:
added application to count grid density, suppressed warning for secondary structure reassignment
Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.sql.ResultSet;
4     import java.sql.SQLException;
5     import java.sql.Statement;
6     import java.util.ArrayList;
7 duarte 222 import java.util.Collections;
8 duarte 207 import java.util.HashMap;
9 duarte 219 import java.util.TreeMap;
10 duarte 207
11 duarte 226 import javax.vecmath.Point3d;
12    
13 duarte 207 import tools.MySQLConnection;
14    
15     /**
16     * A single chain pdb protein structure loaded from a MSDSD database
17     * See http://www.ebi.ac.uk/msd-srv/docs/dbdoc/refaindex.html to know what MSDSD is
18     *
19     * @author Jose Duarte
20     * Class: MsdsdPdb
21     * Package: proteinstructure
22     */
23     public class MsdsdPdb extends Pdb {
24    
25     private final static String MYSQLSERVER="white";
26     private final static String MYSQLUSER=MySQLConnection.getUserName();
27     private final static String MYSQLPWD="nieve";
28     //private final static String DEFAULT_MYMSDSD_DB="my_msdsd_00_07_a";
29     private final static String DEFAULT_MSDSD_DB="msdsd_00_07_a";
30    
31     private MySQLConnection conn;
32    
33     private int chainid;
34     private int modelid;
35    
36     // TODO for this to be able to be used by other people we need to do things without a myMsdsdDb (or also distribute our fixes database)
37     private String myMsdsdDb; // our database with add-ons and fixes to msdsd
38    
39     /**
40     * Constructs Pdb object given pdb code and pdb chain code.
41     * Model will be DEFAULT_MODEL
42     * MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
43     * Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB
44     * @param pdbCode
45     * @param pdbChainCode
46 stehr 215 * @throws PdbCodeNotFoundError
47 duarte 207 * @throws MsdsdInconsistentResidueNumbersError
48     * @throws SQLException
49     */
50 stehr 215 public MsdsdPdb (String pdbCode, String pdbChainCode) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
51 duarte 207 this(pdbCode,pdbChainCode,DEFAULT_MODEL,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
52     }
53    
54     /**
55     * Constructs Pdb object given pdb code, pdb chain code, db and MySQLConnection
56     * Model will be DEFAULT_MODEL
57     * db must be a msdsd database
58     * @param pdbCode
59     * @param pdbChainCode
60     * @param db
61     * @param conn
62 stehr 215 * @throws PdbCodeNotFoundError
63 duarte 207 * @throws MsdsdInconsistentResidueNumbersError
64 duarte 210 * @throws SQLException
65 duarte 207 */
66 stehr 215 public MsdsdPdb (String pdbCode, String pdbChainCode, String db, MySQLConnection conn) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
67 duarte 207 this(pdbCode,pdbChainCode,DEFAULT_MODEL,db,conn);
68     }
69    
70     /**
71     * Constructs Pdb object given pdb code, pdb chain code and a model serial
72     * MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
73     * Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB
74     * @param pdbCode
75     * @param pdbChainCode
76     * @param model_serial
77 stehr 215 * @throws PdbCodeNotFoundError
78 duarte 207 * @throws MsdsdInconsistentResidueNumbersError
79     * @throws SQLException
80     */
81 stehr 215 public MsdsdPdb (String pdbCode, String pdbChainCode, int model_serial) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
82 duarte 207 this(pdbCode,pdbChainCode,model_serial,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
83     }
84    
85     /**
86     * Constructs Pdb object given pdb code, pdb chain code, model serial, a source db and a MySQLConnection.
87     * db must be a msdsd database
88     * @param pdbCode
89     * @param pdbChainCode
90     * @param model_serial
91     * @param db
92     * @param conn
93 stehr 215 * @throws PdbCodeNotFoundError
94 duarte 207 * @throws MsdsdInconsistentResidueNumbersError
95 duarte 210 * @throws SQLException
96 duarte 207 */
97 stehr 215 public MsdsdPdb (String pdbCode, String pdbChainCode, int model_serial, String db, MySQLConnection conn) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
98 stehr 217 this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case
99     this.pdbChainCode=pdbChainCode.toUpperCase(); // our convention: chain codes are upper case
100 duarte 207 this.model=model_serial;
101     this.db=db;
102     this.myMsdsdDb="my_"+db; // i.e. for db=msdsd_00_07_a then myMsdsdDb=my_msdsd_00_07_a
103    
104     this.conn = conn;
105    
106     this.getchainid();// initialises chainid, modelid and chainCode
107    
108     if (check_inconsistent_res_numbering()){
109     throw new MsdsdInconsistentResidueNumbersError("Inconsistent residue numbering in msdsd for accession_code "+this.pdbCode+", chain_pdb_code "+this.pdbChainCode);
110     }
111    
112     this.sequence = read_seq();
113     this.pdbresser2resser = get_ressers_mapping();
114    
115     this.read_atomData();
116    
117     // we initialise resser2pdbresser from the pdbresser2resser HashMap
118     this.resser2pdbresser = new HashMap<Integer, String>();
119     for (String pdbresser:pdbresser2resser.keySet()){
120     resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser);
121     }
122 duarte 219
123     this.readSecStructure();
124 duarte 237
125     // initialising atomser2atom from resser_atom2atomserial
126     atomser2atom = new HashMap<Integer, String>();
127     for (String resser_atom:resser_atom2atomserial.keySet()){
128     int atomserial = resser_atom2atomserial.get(resser_atom);
129     String atom = resser_atom.split("_")[1];
130     atomser2atom.put(atomserial,atom);
131     }
132 duarte 207 }
133    
134 stehr 215 private void getchainid() throws PdbCodeNotFoundError, SQLException {
135 duarte 207 chainid=0;
136     String chaincodestr="='"+pdbChainCode+"'";
137     if (pdbChainCode.equals("NULL")){
138     chaincodestr="IS NULL";
139     }
140     String sql = "SELECT chain_id, model_id, pchain_code " +
141     " FROM "+myMsdsdDb+".mmol_chain_info " +
142     " WHERE accession_code='"+pdbCode+"' " +
143     " AND chain_pdb_code "+chaincodestr +
144     " AND chain_type='C' " +
145     " AND asu_chain=1 " +
146     " AND model_serial="+model;
147    
148 duarte 210 Statement stmt = conn.createStatement();
149     ResultSet rsst = stmt.executeQuery(sql);
150     if (rsst.next()) {
151     chainid = rsst.getInt(1);
152     modelid = rsst.getInt(2);
153     chainCode=rsst.getString(3);
154     if (! rsst.isLast()) {
155 stehr 215 //System.err.println("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
156     throw new PdbCodeNotFoundError("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
157 duarte 210 }
158     } else {
159 stehr 215 //System.err.println("No chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
160     throw new PdbCodeNotFoundError("No chain_id could be matched for accession_code "+pdbCode+", chain_pdb_code "+pdbChainCode);
161 duarte 210 }
162     rsst.close();
163     stmt.close();
164 duarte 207 }
165    
166 duarte 210 private boolean check_inconsistent_res_numbering() throws SQLException{
167 duarte 207 int count=0;
168     int numserial=0;
169 duarte 210
170     String sql="SELECT count(*) " +
171     " FROM "+myMsdsdDb+".problem_serial_chain " +
172     " WHERE chain_id="+chainid +
173     " AND (min_serial!=1 OR num_serial!=num_dist_serial OR num_serial!=max_serial-min_serial+1)";
174     Statement stmt = conn.createStatement();
175     ResultSet rsst = stmt.executeQuery(sql);
176     while (rsst.next()) {
177     count = rsst.getInt(1);
178     if (count>0){
179 duarte 207 return true;
180     }
181     }
182 duarte 210 sql="SELECT num_serial FROM "+myMsdsdDb+".problem_serial_chain WHERE chain_id="+chainid;
183     rsst = stmt.executeQuery(sql);
184     int check = 0;
185     while (rsst.next()){
186     check++;
187     numserial=rsst.getInt(1);
188     }
189     if (check!=1){
190     System.err.println("No num_serial match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
191     }
192     String allresseq = read_seq();
193     if (allresseq.length()!=numserial){
194     System.err.println("num_serial and length of all_res_seq don't match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
195     return true;
196     }
197     rsst.close();
198     stmt.close();
199 duarte 207 return false;
200     }
201    
202 duarte 210 private void read_atomData() throws SQLException{
203 duarte 207 resser_atom2atomserial = new HashMap<String,Integer>();
204     resser2restype = new HashMap<Integer,String>();
205 duarte 226 atomser2coord = new HashMap<Integer,Point3d>();
206 duarte 207 atomser2resser = new HashMap<Integer,Integer>();
207    
208     String sql = "SELECT serial,chem_atom_name,code_3_letter,residue_serial,x,y,z " +
209     " FROM "+db+".atom_data " +
210     " WHERE (model_id = "+modelid+") " +
211     " AND (chain_id = "+chainid+") " +
212     " AND (graph_alt_code_used = 1) " +
213     " AND (graph_standard_aa=1) " +
214     " AND (pdb_group = 'A')" +
215     " ORDER BY chain_code, residue_serial, serial";
216    
217 duarte 210 Statement stmt = conn.createStatement();
218     ResultSet rsst = stmt.executeQuery(sql);
219     int count=0;
220     while (rsst.next()){
221     count++;
222    
223     int atomserial = rsst.getInt(1); // atomserial
224     String atom = rsst.getString(2).trim(); // atom
225     String res_type = rsst.getString(3).trim(); // res_type
226     int res_serial = rsst.getInt(4); // res_serial
227     double x = rsst.getDouble(5); // x
228     double y = rsst.getDouble(6); // y
229     double z = rsst.getDouble(7); // z
230 duarte 226 Point3d coords = new Point3d(x, y, z);
231 duarte 210 ArrayList<String> aalist=AA.aas();
232     if (aalist.contains(res_type)) {
233     atomser2coord.put(atomserial, coords);
234     atomser2resser.put(atomserial, res_serial);
235     resser2restype.put(res_serial, res_type);
236     ArrayList<String> atomlist = aas2atoms.get(res_type);
237     if (atomlist.contains(atom)){
238     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
239     }
240 duarte 207 }
241 duarte 210
242    
243 duarte 207 }
244 duarte 210 if (count==0){
245     System.err.println("atom data query returned no data at all for model_id="+modelid+", model_id="+modelid);
246     }
247     rsst.close();
248     stmt.close();
249 duarte 207 }
250    
251 duarte 210 private String read_seq() throws SQLException{
252 duarte 207 String allresseq="";
253     String sql="SELECT all_res_seq FROM "+myMsdsdDb+".chain_seq WHERE chain_id="+chainid;
254    
255 duarte 210 Statement stmt = conn.createStatement();
256     ResultSet rsst = stmt.executeQuery(sql);
257     int check = 0;
258     if (rsst.next()) {
259     check++;
260     allresseq=rsst.getString(1);
261     }
262     if (check!=1) {
263     System.err.println("No all_res_seq match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode+", chain_id="+chainid);
264     }
265     rsst.close();
266     stmt.close();
267    
268 duarte 207 return allresseq;
269     }
270    
271 duarte 210 private HashMap<String,Integer> get_ressers_mapping() throws SQLException {
272 duarte 207 HashMap<String,Integer> map = new HashMap<String, Integer>();
273     String sql="SELECT serial, concat(pdb_seq,IF(pdb_insert_code IS NULL,'',pdb_insert_code)) " +
274     " FROM "+db+".residue " +
275     " WHERE chain_id="+chainid+
276     " AND pdb_seq IS NOT NULL";
277 duarte 210
278     Statement stmt = conn.createStatement();
279     ResultSet rsst = stmt.executeQuery(sql);
280     int count=0;
281     while (rsst.next()) {
282     count++;
283     int resser = rsst.getInt(1);
284     String pdbresser = rsst.getString(2);
285     map.put(pdbresser, resser);
286     }
287     if (count==0) {
288     System.err.println("No residue serials mapping data match for chain_id="+chainid);
289 duarte 207 }
290 duarte 210 rsst.close();
291     stmt.close();
292 duarte 207
293     return map;
294     }
295    
296 duarte 219 private void readSecStructure() throws SQLException{
297 duarte 222 this.resser2secstruct = new HashMap<Integer, String>();
298     this.secstruct2resinterval = new TreeMap<String, Interval>();
299 duarte 219
300     //HELIX -- helix table
301     String sql = "SELECT helix_serial, beg_residue_serial, end_residue_serial " +
302     " FROM "+db+".helix " +
303     " WHERE (model_id = "+modelid+") " +
304     " AND (chain_id = "+chainid+") ";
305     Statement stmt = conn.createStatement();
306     ResultSet rsst = stmt.executeQuery(sql);
307     int count=0;
308     while (rsst.next()) {
309     count++;
310     int serial = rsst.getInt(1);
311     int beg = rsst.getInt(2);
312     int end =rsst.getInt(3);
313     String ssId = "H"+serial;
314 duarte 222 secstruct2resinterval.put(ssId, new Interval(beg,end));
315 duarte 219 for (int i=beg;i<=end;i++){
316     if (resser2secstruct.containsKey(i)){ // if already assigned we print a warning and then assign it
317 stehr 250 //System.err.println("Inconsistency in secondary structure assignment. " +
318     // "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId);
319 duarte 219 }
320     resser2secstruct.put(i,ssId);
321     }
322     }
323     rsst.close();
324     stmt.close();
325     //SHEET -- strand table
326     sql = "SELECT sheet_serial, strand_serial, strand_beg_residue_serial, strand_end_residue_serial " +
327     " FROM "+db+".strand " +
328     " WHERE (model_id = "+modelid+") " +
329     " AND (chain_id = "+chainid+") ";
330     stmt = conn.createStatement();
331     rsst = stmt.executeQuery(sql);
332     // we store everything in these 2 maps to assign later to resser2secstruct based on our own ids (ids are not very consistent in msdsd)
333 duarte 222 HashMap<Integer,Interval> strands2begEnd = new HashMap<Integer, Interval>();
334 duarte 219 TreeMap<Integer,ArrayList<Integer>> sheets2strands = new TreeMap<Integer, ArrayList<Integer>>();
335     count=0;
336     while (rsst.next()) {
337     count++;
338     int sheetSerial = rsst.getInt(1);
339     int strandSerial = rsst.getInt(2);
340     int beg = rsst.getInt(3);
341     int end =rsst.getInt(4);
342 duarte 222 strands2begEnd.put(strandSerial, new Interval(beg,end));
343 duarte 219 if (sheets2strands.containsKey(sheetSerial)){
344     sheets2strands.get(sheetSerial).add(strandSerial);
345     } else {
346     ArrayList<Integer> strands = new ArrayList<Integer>();
347     strands.add(strandSerial);
348     sheets2strands.put(sheetSerial, strands);
349     }
350     }
351     rsst.close();
352     stmt.close();
353     char sheet='A';
354     for (int sheetSerial:sheets2strands.keySet()){
355     int strand=1;
356     for (int strandSerial:sheets2strands.get(sheetSerial)){
357 duarte 222 Interval begEnd = strands2begEnd.get(strandSerial);
358     for (int i=begEnd.beg;i<=begEnd.end;i++){
359 duarte 219 String ssId = "S"+sheet+strand;
360 duarte 222 secstruct2resinterval.put(ssId, begEnd);
361 duarte 219 if (resser2secstruct.containsKey(i)){ // if already assigned we print a warning and then assign it
362 stehr 250 //System.err.println("Inconsistency in secondary structure assignment. " +
363     // "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId);
364 duarte 219 }
365     resser2secstruct.put(i,ssId);
366     }
367     strand++;
368     }
369     sheet++;
370     }
371    
372     //TURN -- turn table
373     // they forgot to fill up the turn_serial field so we have to use turn_id and get a serial from it that is unique within the chain only
374     sql = "SELECT turn_id, res_1_residue_serial, res_2_residue_serial, res_3_residue_serial, res_4_residue_serial " +
375     " FROM "+db+".turn " +
376     " WHERE (model_id = "+modelid+") " +
377     " AND (chain_id = "+chainid+") ";
378     stmt = conn.createStatement();
379     rsst = stmt.executeQuery(sql);
380 duarte 222 TreeMap<Integer,ArrayList<Integer>> turns = new TreeMap<Integer, ArrayList<Integer>>();
381 duarte 219 count=0;
382     while (rsst.next()) {
383     count++;
384     int dbId = rsst.getInt(1);
385     int res1 = rsst.getInt(2);
386     int res2 = rsst.getInt(3);
387     int res3 = rsst.getInt(4);
388     int res4 = rsst.getInt(5);
389 duarte 222 ArrayList<Integer> residues = new ArrayList<Integer>();
390     if (res1!=0) residues.add(res1); // res is 0 when the field is NULL in database
391     if (res2!=0) residues.add(res2);
392     if (res3!=0) residues.add(res3);
393     if (res4!=0) residues.add(res4);
394 duarte 219 turns.put(dbId, residues);
395     }
396     rsst.close();
397     stmt.close();
398     int serial=1;
399     for (int dbId:turns.keySet()){
400 duarte 222 String ssId="T"+serial;
401     int beg = Collections.min(turns.get(dbId));
402     int end = Collections.max(turns.get(dbId));
403     secstruct2resinterval.put(ssId, new Interval(beg,end));
404 duarte 219 for (int i:turns.get(dbId)){
405 duarte 222 if (resser2secstruct.containsKey(i)){ // if already assigned we print a warning and then assign it
406 stehr 250 //System.err.println("Inconsistency in secondary structure assignment. " +
407     // "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId);
408 duarte 219 }
409 duarte 222 resser2secstruct.put(i,ssId);
410 duarte 219 }
411     serial++;
412     }
413    
414     }
415 duarte 207 }