ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/PdbasePdb.java
Revision: 311
Committed: Thu Aug 30 17:31:38 2007 UTC (17 years, 1 month ago) by duarte
File size: 15958 byte(s)
Log Message:
Fixed buf: sometimes struct_conf can be non-loop elements, now also taking care of that particular case
Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.sql.ResultSet;
4     import java.sql.SQLException;
5     import java.sql.Statement;
6     import java.util.ArrayList;
7     import java.util.Collections;
8     import java.util.HashMap;
9 duarte 219 import java.util.regex.Matcher;
10     import java.util.regex.Pattern;
11 duarte 207
12 duarte 226 import javax.vecmath.Point3d;
13    
14 duarte 207 import tools.MySQLConnection;
15    
16     /**
17     * A single chain pdb protein structure loaded from a PDBASE database
18     * See http://openmms.sdsc.edu/OpenMMS-1.5.1_Std/openmms/docs/guides/PDBase.html to know what PDBASE is
19     *
20     * @author Jose Duarte
21     * Class: PdbasePdb
22     * Package: proteinstructure
23     */
24     public class PdbasePdb extends Pdb {
25    
26     private final static String MYSQLSERVER="white";
27     private final static String MYSQLUSER=MySQLConnection.getUserName();
28     private final static String MYSQLPWD="nieve";
29     private final static String DEFAULT_PDBASE_DB="pdbase";
30    
31     private MySQLConnection conn;
32    
33     private int entrykey;
34     private String asymid;
35     private int entitykey;
36     private String alt_locs_sql_str;
37    
38     /**
39     * Constructs Pdb object given pdb code and pdb chain code.
40     * Model will be DEFAULT_MODEL
41     * MySQLConnection is taken from defaults in PdbasePdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
42     * Database is taken from default pdbase database in PdbasePdb class: DEFAULT_PDBASE_DB
43     * @param pdbCode
44     * @param pdbChainCode
45     * @throws PdbaseInconsistencyError
46 stehr 215 * @throws PdbCodeNotFoundError
47 duarte 207 * @throws SQLException
48 stehr 215 * @throws PdbChainCodeNotFoundError
49 duarte 207 */
50 stehr 215 public PdbasePdb (String pdbCode, String pdbChainCode) throws PdbaseInconsistencyError, PdbCodeNotFoundError, SQLException, PdbChainCodeNotFoundError {
51 duarte 207 this(pdbCode, pdbChainCode, DEFAULT_MODEL, DEFAULT_PDBASE_DB, new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
52     }
53    
54     /**
55     * Constructs Pdb object given pdb code, pdb chain code, source db and a MySQLConnection
56     * Model will be DEFAULT_MODEL
57     * The db must be a pdbase database
58     * @param pdbCode
59     * @param pdbChainCode
60     * @param db
61     * @param conn
62     * @throws PdbaseInconsistencyError
63 stehr 215 * @throws PdbCodeNotFoundError
64 duarte 210 * @throws SQLException
65 stehr 215 * @throws PdbChainCodeNotFoundError
66 duarte 207 */
67 stehr 215 public PdbasePdb (String pdbCode, String pdbChainCode, String db, MySQLConnection conn) throws PdbaseInconsistencyError, PdbCodeNotFoundError, SQLException, PdbChainCodeNotFoundError {
68 duarte 207 this(pdbCode,pdbChainCode,DEFAULT_MODEL,db, conn);
69     }
70    
71     /**
72     * Constructs Pdb object given pdb code, pdb chain code and model serial.
73     * MySQLConnection is taken from defaults in PdbasePdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
74     * Database is taken from default pdbase database in PdbasePdb class: DEFAULT_PDBASE_DB
75     * @param pdbCode
76     * @param pdbChainCode
77     * @param model_serial
78     * @throws PdbaseInconsistencyError
79 stehr 215 * @throws PdbCodeNotFoundError
80 duarte 207 * @throws SQLException
81 stehr 215 * @throws PdbChainCodeNotFoundError
82 duarte 207 */
83 stehr 215 public PdbasePdb (String pdbCode, String pdbChainCode, int model_serial) throws PdbaseInconsistencyError, PdbCodeNotFoundError, SQLException, PdbChainCodeNotFoundError {
84 duarte 207 this(pdbCode, pdbChainCode, model_serial, DEFAULT_PDBASE_DB, new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
85     }
86    
87     /**
88     * Constructs Pdb object given pdb code, pdb chain code, model serial, source db and a MySQLConnection.
89     * The db must be a pdbase database
90     * @param pdbCode
91     * @param pdbChainCode
92     * @param model_serial
93     * @param db
94     * @param conn
95     * @throws PdbaseInconsistencyError
96 stehr 215 * @throws PdbCodeNotFoundError
97 duarte 210 * @throws SQLException
98 stehr 215 * @throws PdbChainCodeNotFoundError
99 duarte 207 */
100 stehr 215 public PdbasePdb (String pdbCode, String pdbChainCode, int model_serial, String db, MySQLConnection conn) throws PdbaseInconsistencyError, PdbCodeNotFoundError, SQLException, PdbChainCodeNotFoundError {
101 stehr 217 this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case
102     this.pdbChainCode=pdbChainCode.toUpperCase(); // our convention: chain codes are upper case
103 duarte 207 this.model=model_serial;
104     this.db=db;
105    
106     this.conn = conn;
107     this.entrykey=get_entry_key();
108     this.asymid=get_asym_id(); // sets asymid and chainCode
109     this.entitykey=get_entity_key();
110     this.alt_locs_sql_str=get_atom_alt_locs();
111    
112     this.sequence = read_seq();
113 duarte 278 this.fullLength = sequence.length();
114    
115 duarte 207 this.pdbresser2resser = get_ressers_mapping();
116    
117     this.read_atomData(); // populates resser_atom2atomserial, resser2restype, atomser2coord, atomser2resser
118    
119 duarte 278 this.obsLength = resser2restype.size();
120    
121 duarte 207 // we initialise resser2pdbresser from the pdbresser2resser HashMap
122     this.resser2pdbresser = new HashMap<Integer, String>();
123     for (String pdbresser:pdbresser2resser.keySet()){
124     resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser);
125     }
126 duarte 219
127 stehr 274 secondaryStructure = new SecondaryStructure(); // create empty secondary structure first to make sure object is not null
128 duarte 219 readSecStructure();
129 stehr 274 if(!secondaryStructure.isEmpty()) {
130     secondaryStructure.setComment("Pdbase");
131 stehr 259 }
132 duarte 237
133     // initialising atomser2atom from resser_atom2atomserial
134     atomser2atom = new HashMap<Integer, String>();
135     for (String resser_atom:resser_atom2atomserial.keySet()){
136     int atomserial = resser_atom2atomserial.get(resser_atom);
137     String atom = resser_atom.split("_")[1];
138     atomser2atom.put(atomserial,atom);
139     }
140 duarte 207 }
141    
142 stehr 215 private int get_entry_key() throws PdbCodeNotFoundError, SQLException {
143 duarte 207 String sql="SELECT entry_key FROM "+db+".struct WHERE entry_id='"+pdbCode.toUpperCase()+"'";
144 duarte 210 Statement stmt = conn.createStatement();
145     ResultSet rsst = stmt.executeQuery(sql);
146     if (rsst.next()) {
147     entrykey = rsst.getInt(1);
148     if (! rsst.isLast()) {
149 stehr 215 //System.err.println("More than 1 entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
150     throw new PdbCodeNotFoundError("More than 1 entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
151 duarte 207 }
152 duarte 210 } else {
153 stehr 215 //System.err.println("No entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
154     throw new PdbCodeNotFoundError("No entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
155 duarte 207 }
156 duarte 210 rsst.close();
157     stmt.close();
158 duarte 207 return entrykey;
159     }
160    
161 stehr 215 private String get_asym_id() throws PdbChainCodeNotFoundError, SQLException {
162 duarte 207 String pdbstrandid=pdbChainCode;
163     if (pdbChainCode.equals("NULL")){
164     pdbstrandid="A";
165     }
166     String sql="SELECT asym_id " +
167     " FROM "+db+".pdbx_poly_seq_scheme " +
168     " WHERE entry_key=" + entrykey +
169     " AND pdb_strand_id='"+pdbstrandid+"' " +
170     " LIMIT 1";
171 duarte 210
172     Statement stmt = conn.createStatement();
173     ResultSet rsst = stmt.executeQuery(sql);
174     if (rsst.next()) {
175     asymid = rsst.getString(1);
176     } else {
177 stehr 215 //System.err.println("No asym_id match for entry_key="+entrykey+", pdb_strand_id="+pdbChainCode);
178     throw new PdbChainCodeNotFoundError("No asym_id match for entry_key="+entrykey+", pdb_strand_id="+pdbChainCode);
179 duarte 207 }
180 duarte 210 rsst.close();
181     stmt.close();
182 duarte 207 // we set the internal chain identifier chainCode from asymid
183     chainCode = asymid;
184     return asymid;
185     }
186    
187 duarte 210 private int get_entity_key() throws PdbaseInconsistencyError, SQLException {
188 duarte 207 String sql="SELECT entity_key " +
189     " FROM "+db+".struct_asym " +
190     " WHERE entry_key="+ entrykey +
191     " AND id='"+asymid+"'";
192 duarte 210
193     Statement stmt = conn.createStatement();
194     ResultSet rsst = stmt.executeQuery(sql);
195     if (rsst.next()) {
196     entitykey = rsst.getInt(1);
197     if (! rsst.isLast()) {
198 stehr 215 //System.err.println("More than 1 entity_key match for entry_key="+entrykey+", asym_id="+asymid);
199 duarte 210 throw new PdbaseInconsistencyError("More than 1 entity_key match for entry_key="+entrykey+", asym_id="+asymid);
200 duarte 207 }
201 duarte 210 } else {
202 stehr 215 //System.err.println("No entity_key match for entry_key="+entrykey+", asym_id="+asymid);
203 duarte 210 throw new PdbaseInconsistencyError("No entity_key match for entry_key="+entrykey+", asym_id="+asymid);
204 duarte 207 }
205 duarte 210 rsst.close();
206     stmt.close();
207 duarte 207 return entitykey;
208     }
209    
210 duarte 210 private String get_atom_alt_locs() throws PdbaseInconsistencyError, SQLException{
211 duarte 207 ArrayList<String> alt_ids = new ArrayList<String>();
212     HashMap<String,Integer> alt_ids2keys = new HashMap<String,Integer>();
213     String alt_loc_field="label_alt_key";
214     String sql="SELECT id, atom_sites_alt_key FROM "+db+".atom_sites_alt WHERE entry_key="+entrykey;
215    
216 duarte 210 Statement stmt = conn.createStatement();
217     ResultSet rsst = stmt.executeQuery(sql);
218     int count=0;
219     while (rsst.next()) {
220     count++;
221     alt_ids.add(rsst.getString(1));
222     alt_ids2keys.put(rsst.getString(1), rsst.getInt(2));
223 duarte 207 }
224 duarte 210 if (count!=0){
225     if ((! alt_ids.contains(".")) || alt_ids.indexOf(".")!=alt_ids.lastIndexOf(".")){ // second term is a way of finding out if there is more than 1 ocurrence of "." in the ArrayList
226 stehr 215 //System.err.println("alt_codes exist for entry_key "+entrykey+" but there is either no default value '.' or more than 1 '.'. Something wrong with this entry_key or with "+DEFAULT_PDBASE_DB+" db!");
227 duarte 210 throw new PdbaseInconsistencyError("alt_codes exist for entry_key "+entrykey+" but there is either no default value '.' or more than 1 '.'. Something wrong with this entry_key or with "+DEFAULT_PDBASE_DB+" db!");
228     }
229     alt_ids.remove(".");
230     Collections.sort(alt_ids);
231     String lowest_alt_id = alt_ids.get(0);
232     alt_locs_sql_str = "("+alt_loc_field+"="+alt_ids2keys.get(".")+" OR "+alt_loc_field+"="+alt_ids2keys.get(lowest_alt_id)+")";
233     } else {
234     alt_locs_sql_str=alt_loc_field+" IS NULL";
235     }
236    
237     rsst.close();
238     stmt.close();
239    
240 duarte 207 return alt_locs_sql_str;
241     }
242    
243 duarte 210 private void read_atomData() throws PdbaseInconsistencyError, SQLException{
244 duarte 207 resser_atom2atomserial = new HashMap<String,Integer>();
245     resser2restype = new HashMap<Integer,String>();
246 duarte 226 atomser2coord = new HashMap<Integer,Point3d>();
247 duarte 207 atomser2resser = new HashMap<Integer,Integer>();
248    
249    
250     String sql = "SELECT id, label_atom_id, label_comp_id, label_seq_id, Cartn_x, Cartn_y, Cartn_z " +
251     " FROM "+db+".atom_site " +
252     " WHERE entry_key="+entrykey +
253     " AND label_asym_id='"+asymid+"' " +
254     " AND label_entity_key="+ entitykey +
255     " AND model_num="+ model +
256     " AND "+alt_locs_sql_str;
257    
258 duarte 210 Statement stmt = conn.createStatement();
259     ResultSet rsst = stmt.executeQuery(sql);
260     int count=0;
261     while (rsst.next()){
262     count++;
263    
264     int atomserial = rsst.getInt(1); // atomserial
265     String atom = rsst.getString(2).trim(); // atom
266     String res_type = rsst.getString(3).trim(); // res_type
267     int res_serial = rsst.getInt(4); // res_serial
268     double x = rsst.getDouble(5); // x
269     double y = rsst.getDouble(6); // y
270     double z = rsst.getDouble(7); // z
271 duarte 226 Point3d coords = new Point3d(x, y, z);
272 duarte 210 ArrayList<String> aalist=AA.aas();
273     if (aalist.contains(res_type)) {
274     atomser2coord.put(atomserial, coords);
275     atomser2resser.put(atomserial, res_serial);
276     resser2restype.put(res_serial, res_type);
277     ArrayList<String> atomlist = aas2atoms.get(res_type);
278 duarte 275 atomlist.add("OXT"); // the extra atom OXT is there in the last residue of the chain
279 duarte 210 if (atomlist.contains(atom)){
280     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
281 duarte 207 }
282     }
283 duarte 210
284 duarte 207 }
285 duarte 210 if (count==0){
286     throw new PdbaseInconsistencyError("atom data query returned no data at all for entry_key="+entrykey+", asym_id="+asymid+", entity_key="+entitykey+", model_num="+model+", alt_locs_sql_str='"+alt_locs_sql_str+"'");
287     }
288     rsst.close();
289     stmt.close();
290 duarte 207 }
291    
292 duarte 210 private String read_seq() throws PdbaseInconsistencyError, SQLException{
293 duarte 207 String sequence="";
294     String pdbstrandid=pdbChainCode;
295     if (pdbChainCode.equals("NULL")){
296     pdbstrandid="A";
297     }
298     // we use seq_id+0 (implicitly converts to int) in ORDER BY because seq_id is varchar!!
299     String sql="SELECT mon_id" +
300     " FROM "+db+".pdbx_poly_seq_scheme " +
301     " WHERE entry_key=" + entrykey +
302     " AND asym_id='"+asymid+"' " +
303     " AND pdb_strand_id='"+pdbstrandid+"' " +
304     " ORDER BY seq_id+0";
305    
306 duarte 210 Statement stmt = conn.createStatement();
307     ResultSet rsst = stmt.executeQuery(sql);
308     ArrayList<String> aalist=AA.aas();
309     int count=0;
310     while (rsst.next()) {
311     count++;
312     String res_type = rsst.getString(1);
313     if (aalist.contains(res_type)){
314     sequence+=AA.threeletter2oneletter(res_type);
315     } else {
316     sequence+=NONSTANDARD_AA_LETTER;
317     }
318     }
319     if (count==0) {
320 stehr 215 //System.err.println("No sequence data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
321 duarte 210 throw new PdbaseInconsistencyError("No sequence data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
322     }
323     rsst.close();
324     stmt.close();
325    
326 duarte 207 return sequence;
327     }
328    
329 duarte 210 private HashMap<String,Integer> get_ressers_mapping() throws PdbaseInconsistencyError, SQLException{
330 duarte 207 String pdbstrandid=pdbChainCode;
331     if (pdbChainCode.equals("NULL")){
332     pdbstrandid="A";
333     }
334    
335     HashMap<String,Integer> map = new HashMap<String, Integer>();
336 duarte 311 //TODO revise: do we want auth_seq_num or pdb_seq_num here??
337 duarte 207 String sql="SELECT seq_id, concat(auth_seq_num,IF(pdb_ins_code='.','',pdb_ins_code))" +
338     " FROM "+db+".pdbx_poly_seq_scheme " +
339     " WHERE entry_key=" + entrykey +
340     " AND asym_id='"+asymid+"' " +
341     " AND pdb_strand_id='"+pdbstrandid+"' " +
342     " AND auth_seq_num!='?'" +
343     " ORDER BY seq_id+0";
344 duarte 210
345     Statement stmt = conn.createStatement();
346     ResultSet rsst = stmt.executeQuery(sql);
347     int count=0;
348     while (rsst.next()) {
349     count++;
350     int resser = Integer.parseInt(rsst.getString(1));
351     String pdbresser = rsst.getString(2);
352     map.put(pdbresser, resser);
353     }
354     if (count==0) {
355 stehr 215 //System.err.println("No residue serials mapping data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
356 duarte 210 throw new PdbaseInconsistencyError("No residue serials mapping data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
357 duarte 207 }
358 duarte 210 rsst.close();
359     stmt.close();
360 duarte 207
361     return map;
362     }
363    
364 duarte 219 private void readSecStructure() throws SQLException {
365 stehr 274 this.secondaryStructure = new SecondaryStructure();
366 duarte 219
367     // HELIX AND TURN -- struct_conf table
368     String sql = "SELECT id,beg_label_seq_id,end_label_seq_id " +
369     " FROM "+db+".struct_conf " +
370     " WHERE entry_key="+entrykey+
371     " AND beg_label_asym_id='"+asymid+"'";
372     Statement stmt = conn.createStatement();
373     ResultSet rsst = stmt.executeQuery(sql);
374     int count=0;
375     while (rsst.next()) {
376     count++;
377     String id = rsst.getString(1).trim(); // id is either HELIX_Pnn or TURN_Pnn
378     Pattern p = Pattern.compile("^(\\w).+_P(\\d)+$");
379     Matcher m = p.matcher(id);
380     String ssId="Unknown";
381     if (m.find()){
382     ssId = m.group(1)+m.group(2); // e.g.: Hnn (helices) or Tnn (turns)
383     }
384     int beg = rsst.getInt(2);
385     int end =rsst.getInt(3);
386 stehr 274 char ssType = SecStrucElement.OTHER;
387     if(id.startsWith("H")) {
388     ssType = SecStrucElement.HELIX;
389     } else if(id.startsWith("T")) {
390     ssType = SecStrucElement.TURN;
391     } else {
392     System.err.println("Unknown secondary structure type " + id + " encountered when reading from Pdbase. Skipping.");
393 duarte 219 }
394 stehr 274 if(ssType != SecStrucElement.OTHER) {
395     SecStrucElement ssElem = new SecStrucElement(ssType, beg, end, ssId);
396     secondaryStructure.add(ssElem);
397     }
398 duarte 219 }
399     rsst.close();
400     stmt.close();
401    
402     // SHEET -- struct_sheet_range table
403     sql = "SELECT sheet_id, id, beg_label_seq_id, end_label_seq_id " +
404     " FROM "+db+".struct_sheet_range " +
405     " WHERE entry_key="+entrykey+
406     " AND beg_label_asym_id='"+asymid+"'";
407     stmt = conn.createStatement();
408     rsst = stmt.executeQuery(sql);
409     count=0;
410     while (rsst.next()) {
411     count++;
412     String sheetid = rsst.getString(1).trim();
413     int id = rsst.getInt(2);
414     int beg = rsst.getInt(3);
415     int end =rsst.getInt(4);
416 stehr 274 String ssId=SecStrucElement.STRAND+sheetid+id; // e.g.: SA1, SA2..., SB1, SB2,...
417     SecStrucElement ssElem = new SecStrucElement(SecStrucElement.STRAND, beg, end, ssId);
418     secondaryStructure.add(ssElem);
419 duarte 219 }
420     rsst.close();
421     stmt.close();
422    
423     }
424 duarte 207 }