ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/PdbasePdb.java
Revision: 492
Committed: Wed Jan 2 13:18:57 2008 UTC (16 years, 9 months ago) by duarte
File size: 15840 byte(s)
Log Message:
Copied the aglappe-jung branch into trunk.

Line File contents
1 package proteinstructure;
2
3 import java.sql.ResultSet;
4 import java.sql.SQLException;
5 import java.sql.Statement;
6 import java.util.ArrayList;
7 import java.util.Collections;
8 import java.util.HashMap;
9 import java.util.TreeSet;
10 import java.util.regex.Matcher;
11 import java.util.regex.Pattern;
12
13 import javax.vecmath.Point3d;
14
15 import tools.MySQLConnection;
16
17 /**
18 * A single chain pdb protein structure loaded from a PDBASE database
19 * See http://openmms.sdsc.edu/OpenMMS-1.5.1_Std/openmms/docs/guides/PDBase.html to know what PDBASE is
20 *
21 * @author Jose Duarte
22 * Class: PdbasePdb
23 * Package: proteinstructure
24 */
25 public class PdbasePdb extends Pdb {
26
27 private final static String MYSQLSERVER="white";
28 private final static String MYSQLUSER=MySQLConnection.getUserName();
29 private final static String MYSQLPWD="nieve";
30 private final static String DEFAULT_PDBASE_DB="pdbase";
31
32 private MySQLConnection conn;
33
34 private int entrykey;
35 private String asymid;
36 private int entitykey;
37 private String alt_locs_sql_str;
38
39 /**
40 * Constructs an empty Pdb object given pdb code
41 * Data will be loaded from database upon call of load(pdbChainCode, modelSerial)
42 * MySQLConnection is taken from defaults in PdbasePdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
43 * Database is taken from default pdbase database in PdbasePdb class: DEFAULT_PDBASE_DB
44 * @param pdbCode
45 * @throws SQLException
46 * @throws PdbCodeNotFoundError
47 */
48 public PdbasePdb (String pdbCode) throws SQLException, PdbCodeNotFoundError {
49 this(pdbCode, DEFAULT_PDBASE_DB, new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
50 }
51
52 /**
53 * Constructs an empty Pdb object given pdb code, source db and a MySQLConnection.
54 * Data will be loaded from database upon call of load(pdbChainCode, modelSerial)
55 * @param pdbCode
56 * @param db a pdbase database
57 * @param conn
58 * @throws SQLException
59 * @throws PdbCodeNotFoundError
60 */
61 public PdbasePdb (String pdbCode, String db, MySQLConnection conn) throws PdbCodeNotFoundError, SQLException {
62 this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case
63 this.db=db;
64 this.dataLoaded = false;
65
66 this.conn = conn;
67
68 // this makes sure that we find the pdb code in the database
69 this.entrykey = get_entry_key();
70 }
71
72 public void load(String pdbChainCode, int modelSerial) throws PdbLoadError {
73 try {
74 this.model = modelSerial;
75 this.pdbChainCode=pdbChainCode; // NOTE! pdb chain code are case sensitive!
76 this.asymid=get_asym_id(); // sets asymid and chainCode
77 this.entitykey=get_entity_key();
78 this.alt_locs_sql_str=get_atom_alt_locs();
79
80 this.sequence = read_seq();
81 this.fullLength = sequence.length();
82
83 this.pdbresser2resser = get_ressers_mapping();
84
85 this.read_atomData(); // populates resser_atom2atomserial, resser2restype, atomser2coord, atomser2resser
86
87 this.obsLength = resser2restype.size();
88
89 // we initialise resser2pdbresser from the pdbresser2resser HashMap
90 this.resser2pdbresser = new HashMap<Integer, String>();
91 for (String pdbresser:pdbresser2resser.keySet()){
92 resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser);
93 }
94
95 secondaryStructure = new SecondaryStructure(); // create empty secondary structure first to make sure object is not null
96 readSecStructure();
97 if(!secondaryStructure.isEmpty()) {
98 secondaryStructure.setComment("Pdbase");
99 }
100
101 // initialising atomser2atom from resser_atom2atomserial
102 atomser2atom = new HashMap<Integer, String>();
103 for (String resser_atom:resser_atom2atomserial.keySet()){
104 int atomserial = resser_atom2atomserial.get(resser_atom);
105 String atom = resser_atom.split("_")[1];
106 atomser2atom.put(atomserial,atom);
107 }
108
109 dataLoaded = true;
110
111 } catch (SQLException e) {
112 throw new PdbLoadError(e);
113 } catch (PdbChainCodeNotFoundError e) {
114 throw new PdbLoadError(e);
115 } catch (PdbaseInconsistencyError e) {
116 throw new PdbLoadError(e);
117 }
118
119 }
120
121 public String[] getChains() throws PdbLoadError {
122 TreeSet<String> chains = new TreeSet<String>();
123 try {
124 String sql = "SELECT DISTINCT pdb_strand_id FROM "+db+".pdbx_poly_seq_scheme WHERE entry_key="+entrykey;
125 Statement stmt = conn.createStatement();
126 ResultSet rsst = stmt.executeQuery(sql);
127 while (rsst.next()) {
128 chains.add(rsst.getString(1));
129 }
130 rsst.close();
131 stmt.close();
132 } catch (SQLException e) {
133 throw new PdbLoadError(e);
134 }
135
136 if (chains.isEmpty()) return null;
137
138 String[] chainsArray = new String[chains.size()];
139 chains.toArray(chainsArray);
140 return chainsArray;
141 }
142
143 public Integer[] getModels() throws PdbLoadError {
144 TreeSet<Integer> models = new TreeSet<Integer>();
145 try {
146 String sql = "SELECT DISTINCT model_num FROM "+db+".atom_site WHERE entry_key="+entrykey;
147 Statement stmt = conn.createStatement();
148 ResultSet rsst = stmt.executeQuery(sql);
149 while (rsst.next()) {
150 models.add(rsst.getInt(1));
151 }
152 rsst.close();
153 stmt.close();
154
155 } catch (SQLException e) {
156 throw new PdbLoadError(e);
157 }
158
159 if (models.isEmpty()) return null;
160 Integer[] modelsArray = new Integer[models.size()];
161 models.toArray(modelsArray);
162 return modelsArray;
163 }
164
165 private int get_entry_key() throws PdbCodeNotFoundError, SQLException {
166 String sql="SELECT entry_key FROM "+db+".struct WHERE entry_id='"+pdbCode.toUpperCase()+"'";
167 Statement stmt = conn.createStatement();
168 ResultSet rsst = stmt.executeQuery(sql);
169 if (rsst.next()) {
170 entrykey = rsst.getInt(1);
171 if (! rsst.isLast()) {
172 //System.err.println("More than 1 entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
173 throw new PdbCodeNotFoundError("More than 1 entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
174 }
175 } else {
176 //System.err.println("No entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
177 throw new PdbCodeNotFoundError("No entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
178 }
179 rsst.close();
180 stmt.close();
181 return entrykey;
182 }
183
184 private String get_asym_id() throws PdbChainCodeNotFoundError, SQLException {
185 String pdbstrandid=pdbChainCode;
186 if (pdbChainCode.equals(NULL_CHAIN_CODE)){
187 pdbstrandid="A";
188 }
189 // NOTE: 'limit 1' not really needed since there can be only one asym_id
190 // per entry_key,pdb_strand_id combination (pdb_strand_id case sensitive!)
191 String sql="SELECT asym_id " +
192 " FROM "+db+".pdbx_poly_seq_scheme " +
193 " WHERE entry_key=" + entrykey +
194 " AND pdb_strand_id='"+pdbstrandid+"' " +
195 " LIMIT 1";
196
197 Statement stmt = conn.createStatement();
198 ResultSet rsst = stmt.executeQuery(sql);
199 if (rsst.next()) {
200 asymid = rsst.getString(1);
201 } else {
202 //System.err.println("No asym_id match for entry_key="+entrykey+", pdb_strand_id="+pdbChainCode);
203 throw new PdbChainCodeNotFoundError("No asym_id match for entry_key="+entrykey+", pdb_strand_id="+pdbChainCode);
204 }
205 rsst.close();
206 stmt.close();
207 // we set the internal chain identifier chainCode from asymid
208 chainCode = asymid;
209 return asymid;
210 }
211
212 // NOTE: Entity key not really needed since there can be only one entity_key
213 // per entry_key,asym_id combination
214 private int get_entity_key() throws PdbaseInconsistencyError, SQLException {
215 String sql="SELECT entity_key " +
216 " FROM "+db+".struct_asym " +
217 " WHERE entry_key="+ entrykey +
218 " AND id='"+asymid+"'";
219
220 Statement stmt = conn.createStatement();
221 ResultSet rsst = stmt.executeQuery(sql);
222 if (rsst.next()) {
223 entitykey = rsst.getInt(1);
224 if (! rsst.isLast()) {
225 //System.err.println("More than 1 entity_key match for entry_key="+entrykey+", asym_id="+asymid);
226 throw new PdbaseInconsistencyError("More than 1 entity_key match for entry_key="+entrykey+", asym_id="+asymid);
227 }
228 } else {
229 //System.err.println("No entity_key match for entry_key="+entrykey+", asym_id="+asymid);
230 throw new PdbaseInconsistencyError("No entity_key match for entry_key="+entrykey+", asym_id="+asymid);
231 }
232 rsst.close();
233 stmt.close();
234 return entitykey;
235 }
236
237 private String get_atom_alt_locs() throws PdbaseInconsistencyError, SQLException{
238 ArrayList<String> alt_ids = new ArrayList<String>();
239 HashMap<String,Integer> alt_ids2keys = new HashMap<String,Integer>();
240 String alt_loc_field="label_alt_key";
241 String sql="SELECT id, atom_sites_alt_key FROM "+db+".atom_sites_alt WHERE entry_key="+entrykey;
242
243 Statement stmt = conn.createStatement();
244 ResultSet rsst = stmt.executeQuery(sql);
245 int count=0;
246 while (rsst.next()) {
247 count++;
248 alt_ids.add(rsst.getString(1));
249 alt_ids2keys.put(rsst.getString(1), rsst.getInt(2));
250 }
251 if (count!=0){
252 if ((! alt_ids.contains(".")) || alt_ids.indexOf(".")!=alt_ids.lastIndexOf(".")){ // second term is a way of finding out if there is more than 1 ocurrence of "." in the ArrayList
253 //System.err.println("alt_codes exist for entry_key "+entrykey+" but there is either no default value '.' or more than 1 '.'. Something wrong with this entry_key or with "+DEFAULT_PDBASE_DB+" db!");
254 throw new PdbaseInconsistencyError("alt_codes exist for entry_key "+entrykey+" but there is either no default value '.' or more than 1 '.'. Something wrong with this entry_key or with "+DEFAULT_PDBASE_DB+" db!");
255 }
256 alt_ids.remove(".");
257 Collections.sort(alt_ids);
258 String lowest_alt_id = alt_ids.get(0);
259 alt_locs_sql_str = "("+alt_loc_field+"="+alt_ids2keys.get(".")+" OR "+alt_loc_field+"="+alt_ids2keys.get(lowest_alt_id)+")";
260 } else {
261 alt_locs_sql_str=alt_loc_field+" IS NULL";
262 }
263
264 rsst.close();
265 stmt.close();
266
267 return alt_locs_sql_str;
268 }
269
270 private void read_atomData() throws PdbaseInconsistencyError, SQLException{
271 resser_atom2atomserial = new HashMap<String,Integer>();
272 resser2restype = new HashMap<Integer,String>();
273 atomser2coord = new HashMap<Integer,Point3d>();
274 atomser2resser = new HashMap<Integer,Integer>();
275
276 // NOTE: label_entity_key not really needed since there can be only one entity_key
277 // per entry_key,asym_id combination
278 String sql = "SELECT id, label_atom_id, label_comp_id, label_seq_id, Cartn_x, Cartn_y, Cartn_z " +
279 " FROM "+db+".atom_site " +
280 " WHERE entry_key="+entrykey +
281 " AND label_asym_id='"+asymid+"' " +
282 " AND label_entity_key="+ entitykey +
283 " AND model_num="+ model +
284 " AND "+alt_locs_sql_str;
285
286 Statement stmt = conn.createStatement();
287 ResultSet rsst = stmt.executeQuery(sql);
288 int count=0;
289 while (rsst.next()){
290 count++;
291
292 int atomserial = rsst.getInt(1); // atomserial
293 String atom = rsst.getString(2).trim(); // atom
294 String res_type = rsst.getString(3).trim(); // res_type
295 int res_serial = rsst.getInt(4); // res_serial
296 double x = rsst.getDouble(5); // x
297 double y = rsst.getDouble(6); // y
298 double z = rsst.getDouble(7); // z
299 Point3d coords = new Point3d(x, y, z);
300 if (AAinfo.isValidAA(res_type)) {
301 atomser2coord.put(atomserial, coords);
302 atomser2resser.put(atomserial, res_serial);
303 resser2restype.put(res_serial, res_type);
304 if (AAinfo.isValidAtomWithOXT(res_type,atom)){
305 resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
306 }
307 }
308
309 }
310 if (count==0){
311 throw new PdbaseInconsistencyError("atom data query returned no data at all for entry_key="+entrykey+", asym_id="+asymid+", entity_key="+entitykey+", model_num="+model+", alt_locs_sql_str='"+alt_locs_sql_str+"'");
312 }
313 rsst.close();
314 stmt.close();
315 }
316
317 private String read_seq() throws PdbaseInconsistencyError, SQLException{
318 String sequence="";
319
320 // we use seq_id+0 (implicitly converts to int) in ORDER BY because seq_id is varchar!!
321 String sql="SELECT mon_id" +
322 " FROM "+db+".pdbx_poly_seq_scheme " +
323 " WHERE entry_key=" + entrykey +
324 " AND asym_id='"+asymid+"' " +
325 " ORDER BY seq_id+0";
326
327 Statement stmt = conn.createStatement();
328 ResultSet rsst = stmt.executeQuery(sql);
329 int count=0;
330 while (rsst.next()) {
331 count++;
332 String res_type = rsst.getString(1);
333 if (AAinfo.isValidAA(res_type)){
334 sequence+=AAinfo.threeletter2oneletter(res_type);
335 } else {
336 sequence+=NONSTANDARD_AA_LETTER;
337 }
338 }
339 if (count==0) {
340 //System.err.println("No sequence data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
341 throw new PdbaseInconsistencyError("No sequence data match for entry_key="+entrykey+", asym_id="+asymid);
342 }
343 rsst.close();
344 stmt.close();
345
346 return sequence;
347 }
348
349 private HashMap<String,Integer> get_ressers_mapping() throws PdbaseInconsistencyError, SQLException{
350 String pdbstrandid=pdbChainCode;
351 if (pdbChainCode.equals(NULL_CHAIN_CODE)){
352 pdbstrandid="A";
353 }
354
355 HashMap<String,Integer> map = new HashMap<String, Integer>();
356 String sql="SELECT seq_id, concat(pdb_seq_num,IF(pdb_ins_code='.','',pdb_ins_code))" +
357 " FROM "+db+".pdbx_poly_seq_scheme " +
358 " WHERE entry_key=" + entrykey +
359 " AND asym_id='"+asymid+"' " +
360 " AND pdb_strand_id='"+pdbstrandid+"' " +
361 " ORDER BY seq_id+0";
362
363 Statement stmt = conn.createStatement();
364 ResultSet rsst = stmt.executeQuery(sql);
365 int count=0;
366 while (rsst.next()) {
367 count++;
368 int resser = Integer.parseInt(rsst.getString(1));
369 String pdbresser = rsst.getString(2);
370 map.put(pdbresser, resser);
371 }
372 if (count==0) {
373 //System.err.println("No residue serials mapping data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
374 throw new PdbaseInconsistencyError("No residue serials mapping data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
375 }
376 rsst.close();
377 stmt.close();
378
379 return map;
380 }
381
382 private void readSecStructure() throws SQLException {
383 this.secondaryStructure = new SecondaryStructure();
384
385 // HELIX AND TURN -- struct_conf table
386 String sql = "SELECT id,beg_label_seq_id,end_label_seq_id " +
387 " FROM "+db+".struct_conf " +
388 " WHERE entry_key="+entrykey+
389 " AND beg_label_asym_id='"+asymid+"'";
390 Statement stmt = conn.createStatement();
391 ResultSet rsst = stmt.executeQuery(sql);
392 int count=0;
393 while (rsst.next()) {
394 count++;
395 String id = rsst.getString(1).trim(); // id is either HELIX_Pnn or TURN_Pnn
396 Pattern p = Pattern.compile("^(\\w).+_P(\\d)+$");
397 Matcher m = p.matcher(id);
398 String ssId="Unknown";
399 if (m.find()){
400 ssId = m.group(1)+m.group(2); // e.g.: Hnn (helices) or Tnn (turns)
401 }
402 int beg = rsst.getInt(2);
403 int end =rsst.getInt(3);
404 char ssType = SecStrucElement.OTHER;
405 if(id.startsWith("H")) {
406 ssType = SecStrucElement.HELIX;
407 } else if(id.startsWith("T")) {
408 ssType = SecStrucElement.TURN;
409 } else {
410 System.err.println("Unknown secondary structure type " + id + " encountered when reading from Pdbase. Skipping.");
411 }
412 if(ssType != SecStrucElement.OTHER) {
413 SecStrucElement ssElem = new SecStrucElement(ssType, beg, end, ssId);
414 secondaryStructure.add(ssElem);
415 }
416 }
417 rsst.close();
418 stmt.close();
419
420 // SHEET -- struct_sheet_range table
421 sql = "SELECT sheet_id, id, beg_label_seq_id, end_label_seq_id " +
422 " FROM "+db+".struct_sheet_range " +
423 " WHERE entry_key="+entrykey+
424 " AND beg_label_asym_id='"+asymid+"'";
425 stmt = conn.createStatement();
426 rsst = stmt.executeQuery(sql);
427 count=0;
428 while (rsst.next()) {
429 count++;
430 String sheetid = rsst.getString(1).trim();
431 int id = rsst.getInt(2);
432 int beg = rsst.getInt(3);
433 int end =rsst.getInt(4);
434 String ssId=SecStrucElement.STRAND+sheetid+id; // e.g.: SA1, SA2..., SB1, SB2,...
435 SecStrucElement ssElem = new SecStrucElement(SecStrucElement.STRAND, beg, end, ssId);
436 secondaryStructure.add(ssElem);
437 }
438 rsst.close();
439 stmt.close();
440
441 }
442 }