ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/PdbasePdb.java
Revision: 222
Committed: Wed Jul 4 15:39:01 2007 UTC (17 years, 3 months ago) by duarte
File size: 15481 byte(s)
Log Message:
New secstruct2resinterval TreeMap to store secondary structure elements as a map of ss string ids to intervals
New class Interval
Line File contents
1 package proteinstructure;
2
3 import java.sql.ResultSet;
4 import java.sql.SQLException;
5 import java.sql.Statement;
6 import java.util.ArrayList;
7 import java.util.Collections;
8 import java.util.HashMap;
9 import java.util.TreeMap;
10 import java.util.regex.Matcher;
11 import java.util.regex.Pattern;
12
13 import tools.MySQLConnection;
14
15 /**
16 * A single chain pdb protein structure loaded from a PDBASE database
17 * See http://openmms.sdsc.edu/OpenMMS-1.5.1_Std/openmms/docs/guides/PDBase.html to know what PDBASE is
18 *
19 * @author Jose Duarte
20 * Class: PdbasePdb
21 * Package: proteinstructure
22 */
23 public class PdbasePdb extends Pdb {
24
25 private final static String MYSQLSERVER="white";
26 private final static String MYSQLUSER=MySQLConnection.getUserName();
27 private final static String MYSQLPWD="nieve";
28 private final static String DEFAULT_PDBASE_DB="pdbase";
29
30 private MySQLConnection conn;
31
32 private int entrykey;
33 private String asymid;
34 private int entitykey;
35 private String alt_locs_sql_str;
36
37 /**
38 * Constructs Pdb object given pdb code and pdb chain code.
39 * Model will be DEFAULT_MODEL
40 * MySQLConnection is taken from defaults in PdbasePdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
41 * Database is taken from default pdbase database in PdbasePdb class: DEFAULT_PDBASE_DB
42 * @param pdbCode
43 * @param pdbChainCode
44 * @throws PdbaseInconsistencyError
45 * @throws PdbCodeNotFoundError
46 * @throws SQLException
47 * @throws PdbChainCodeNotFoundError
48 */
49 public PdbasePdb (String pdbCode, String pdbChainCode) throws PdbaseInconsistencyError, PdbCodeNotFoundError, SQLException, PdbChainCodeNotFoundError {
50 this(pdbCode, pdbChainCode, DEFAULT_MODEL, DEFAULT_PDBASE_DB, new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
51 }
52
53 /**
54 * Constructs Pdb object given pdb code, pdb chain code, source db and a MySQLConnection
55 * Model will be DEFAULT_MODEL
56 * The db must be a pdbase database
57 * @param pdbCode
58 * @param pdbChainCode
59 * @param db
60 * @param conn
61 * @throws PdbaseInconsistencyError
62 * @throws PdbCodeNotFoundError
63 * @throws SQLException
64 * @throws PdbChainCodeNotFoundError
65 */
66 public PdbasePdb (String pdbCode, String pdbChainCode, String db, MySQLConnection conn) throws PdbaseInconsistencyError, PdbCodeNotFoundError, SQLException, PdbChainCodeNotFoundError {
67 this(pdbCode,pdbChainCode,DEFAULT_MODEL,db, conn);
68 }
69
70 /**
71 * Constructs Pdb object given pdb code, pdb chain code and model serial.
72 * MySQLConnection is taken from defaults in PdbasePdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
73 * Database is taken from default pdbase database in PdbasePdb class: DEFAULT_PDBASE_DB
74 * @param pdbCode
75 * @param pdbChainCode
76 * @param model_serial
77 * @throws PdbaseInconsistencyError
78 * @throws PdbCodeNotFoundError
79 * @throws SQLException
80 * @throws PdbChainCodeNotFoundError
81 */
82 public PdbasePdb (String pdbCode, String pdbChainCode, int model_serial) throws PdbaseInconsistencyError, PdbCodeNotFoundError, SQLException, PdbChainCodeNotFoundError {
83 this(pdbCode, pdbChainCode, model_serial, DEFAULT_PDBASE_DB, new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
84 }
85
86 /**
87 * Constructs Pdb object given pdb code, pdb chain code, model serial, source db and a MySQLConnection.
88 * The db must be a pdbase database
89 * @param pdbCode
90 * @param pdbChainCode
91 * @param model_serial
92 * @param db
93 * @param conn
94 * @throws PdbaseInconsistencyError
95 * @throws PdbCodeNotFoundError
96 * @throws SQLException
97 * @throws PdbChainCodeNotFoundError
98 */
99 public PdbasePdb (String pdbCode, String pdbChainCode, int model_serial, String db, MySQLConnection conn) throws PdbaseInconsistencyError, PdbCodeNotFoundError, SQLException, PdbChainCodeNotFoundError {
100 this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case
101 this.pdbChainCode=pdbChainCode.toUpperCase(); // our convention: chain codes are upper case
102 this.model=model_serial;
103 this.db=db;
104
105 this.conn = conn;
106 this.entrykey=get_entry_key();
107 this.asymid=get_asym_id(); // sets asymid and chainCode
108 this.entitykey=get_entity_key();
109 this.alt_locs_sql_str=get_atom_alt_locs();
110
111 this.chainCode = getChainCode();
112 this.sequence = read_seq();
113 this.pdbresser2resser = get_ressers_mapping();
114
115 this.read_atomData(); // populates resser_atom2atomserial, resser2restype, atomser2coord, atomser2resser
116
117 // we initialise resser2pdbresser from the pdbresser2resser HashMap
118 this.resser2pdbresser = new HashMap<Integer, String>();
119 for (String pdbresser:pdbresser2resser.keySet()){
120 resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser);
121 }
122
123 readSecStructure();
124 }
125
126 private int get_entry_key() throws PdbCodeNotFoundError, SQLException {
127 String sql="SELECT entry_key FROM "+db+".struct WHERE entry_id='"+pdbCode.toUpperCase()+"'";
128 Statement stmt = conn.createStatement();
129 ResultSet rsst = stmt.executeQuery(sql);
130 if (rsst.next()) {
131 entrykey = rsst.getInt(1);
132 if (! rsst.isLast()) {
133 //System.err.println("More than 1 entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
134 throw new PdbCodeNotFoundError("More than 1 entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
135 }
136 } else {
137 //System.err.println("No entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
138 throw new PdbCodeNotFoundError("No entry_key match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
139 }
140 rsst.close();
141 stmt.close();
142 return entrykey;
143 }
144
145 private String get_asym_id() throws PdbChainCodeNotFoundError, SQLException {
146 String pdbstrandid=pdbChainCode;
147 if (pdbChainCode.equals("NULL")){
148 pdbstrandid="A";
149 }
150 String sql="SELECT asym_id " +
151 " FROM "+db+".pdbx_poly_seq_scheme " +
152 " WHERE entry_key=" + entrykey +
153 " AND pdb_strand_id='"+pdbstrandid+"' " +
154 " LIMIT 1";
155
156 Statement stmt = conn.createStatement();
157 ResultSet rsst = stmt.executeQuery(sql);
158 if (rsst.next()) {
159 asymid = rsst.getString(1);
160 } else {
161 //System.err.println("No asym_id match for entry_key="+entrykey+", pdb_strand_id="+pdbChainCode);
162 throw new PdbChainCodeNotFoundError("No asym_id match for entry_key="+entrykey+", pdb_strand_id="+pdbChainCode);
163 }
164 rsst.close();
165 stmt.close();
166 // we set the internal chain identifier chainCode from asymid
167 chainCode = asymid;
168 return asymid;
169 }
170
171 private int get_entity_key() throws PdbaseInconsistencyError, SQLException {
172 String sql="SELECT entity_key " +
173 " FROM "+db+".struct_asym " +
174 " WHERE entry_key="+ entrykey +
175 " AND id='"+asymid+"'";
176
177 Statement stmt = conn.createStatement();
178 ResultSet rsst = stmt.executeQuery(sql);
179 if (rsst.next()) {
180 entitykey = rsst.getInt(1);
181 if (! rsst.isLast()) {
182 //System.err.println("More than 1 entity_key match for entry_key="+entrykey+", asym_id="+asymid);
183 throw new PdbaseInconsistencyError("More than 1 entity_key match for entry_key="+entrykey+", asym_id="+asymid);
184 }
185 } else {
186 //System.err.println("No entity_key match for entry_key="+entrykey+", asym_id="+asymid);
187 throw new PdbaseInconsistencyError("No entity_key match for entry_key="+entrykey+", asym_id="+asymid);
188 }
189 rsst.close();
190 stmt.close();
191 return entitykey;
192 }
193
194 private String get_atom_alt_locs() throws PdbaseInconsistencyError, SQLException{
195 ArrayList<String> alt_ids = new ArrayList<String>();
196 HashMap<String,Integer> alt_ids2keys = new HashMap<String,Integer>();
197 String alt_loc_field="label_alt_key";
198 String sql="SELECT id, atom_sites_alt_key FROM "+db+".atom_sites_alt WHERE entry_key="+entrykey;
199
200 Statement stmt = conn.createStatement();
201 ResultSet rsst = stmt.executeQuery(sql);
202 int count=0;
203 while (rsst.next()) {
204 count++;
205 alt_ids.add(rsst.getString(1));
206 alt_ids2keys.put(rsst.getString(1), rsst.getInt(2));
207 }
208 if (count!=0){
209 if ((! alt_ids.contains(".")) || alt_ids.indexOf(".")!=alt_ids.lastIndexOf(".")){ // second term is a way of finding out if there is more than 1 ocurrence of "." in the ArrayList
210 //System.err.println("alt_codes exist for entry_key "+entrykey+" but there is either no default value '.' or more than 1 '.'. Something wrong with this entry_key or with "+DEFAULT_PDBASE_DB+" db!");
211 throw new PdbaseInconsistencyError("alt_codes exist for entry_key "+entrykey+" but there is either no default value '.' or more than 1 '.'. Something wrong with this entry_key or with "+DEFAULT_PDBASE_DB+" db!");
212 }
213 alt_ids.remove(".");
214 Collections.sort(alt_ids);
215 String lowest_alt_id = alt_ids.get(0);
216 alt_locs_sql_str = "("+alt_loc_field+"="+alt_ids2keys.get(".")+" OR "+alt_loc_field+"="+alt_ids2keys.get(lowest_alt_id)+")";
217 } else {
218 alt_locs_sql_str=alt_loc_field+" IS NULL";
219 }
220
221 rsst.close();
222 stmt.close();
223
224 return alt_locs_sql_str;
225 }
226
227 private void read_atomData() throws PdbaseInconsistencyError, SQLException{
228 resser_atom2atomserial = new HashMap<String,Integer>();
229 resser2restype = new HashMap<Integer,String>();
230 atomser2coord = new HashMap<Integer,Double[]>();
231 atomser2resser = new HashMap<Integer,Integer>();
232
233
234 String sql = "SELECT id, label_atom_id, label_comp_id, label_seq_id, Cartn_x, Cartn_y, Cartn_z " +
235 " FROM "+db+".atom_site " +
236 " WHERE entry_key="+entrykey +
237 " AND label_asym_id='"+asymid+"' " +
238 " AND label_entity_key="+ entitykey +
239 " AND model_num="+ model +
240 " AND "+alt_locs_sql_str;
241
242 Statement stmt = conn.createStatement();
243 ResultSet rsst = stmt.executeQuery(sql);
244 int count=0;
245 while (rsst.next()){
246 count++;
247
248 int atomserial = rsst.getInt(1); // atomserial
249 String atom = rsst.getString(2).trim(); // atom
250 String res_type = rsst.getString(3).trim(); // res_type
251 int res_serial = rsst.getInt(4); // res_serial
252 double x = rsst.getDouble(5); // x
253 double y = rsst.getDouble(6); // y
254 double z = rsst.getDouble(7); // z
255 Double[] coords = {x, y, z};
256 ArrayList<String> aalist=AA.aas();
257 if (aalist.contains(res_type)) {
258 atomser2coord.put(atomserial, coords);
259 atomser2resser.put(atomserial, res_serial);
260 resser2restype.put(res_serial, res_type);
261 ArrayList<String> atomlist = aas2atoms.get(res_type);
262 if (atomlist.contains(atom)){
263 resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
264 }
265 }
266
267 }
268 if (count==0){
269 throw new PdbaseInconsistencyError("atom data query returned no data at all for entry_key="+entrykey+", asym_id="+asymid+", entity_key="+entitykey+", model_num="+model+", alt_locs_sql_str='"+alt_locs_sql_str+"'");
270 }
271 rsst.close();
272 stmt.close();
273 }
274
275 private String read_seq() throws PdbaseInconsistencyError, SQLException{
276 String sequence="";
277 String pdbstrandid=pdbChainCode;
278 if (pdbChainCode.equals("NULL")){
279 pdbstrandid="A";
280 }
281 // we use seq_id+0 (implicitly converts to int) in ORDER BY because seq_id is varchar!!
282 String sql="SELECT mon_id" +
283 " FROM "+db+".pdbx_poly_seq_scheme " +
284 " WHERE entry_key=" + entrykey +
285 " AND asym_id='"+asymid+"' " +
286 " AND pdb_strand_id='"+pdbstrandid+"' " +
287 " ORDER BY seq_id+0";
288
289 Statement stmt = conn.createStatement();
290 ResultSet rsst = stmt.executeQuery(sql);
291 ArrayList<String> aalist=AA.aas();
292 int count=0;
293 while (rsst.next()) {
294 count++;
295 String res_type = rsst.getString(1);
296 if (aalist.contains(res_type)){
297 sequence+=AA.threeletter2oneletter(res_type);
298 } else {
299 sequence+=NONSTANDARD_AA_LETTER;
300 }
301 }
302 if (count==0) {
303 //System.err.println("No sequence data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
304 throw new PdbaseInconsistencyError("No sequence data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
305 }
306 rsst.close();
307 stmt.close();
308
309 return sequence;
310 }
311
312 private HashMap<String,Integer> get_ressers_mapping() throws PdbaseInconsistencyError, SQLException{
313 String pdbstrandid=pdbChainCode;
314 if (pdbChainCode.equals("NULL")){
315 pdbstrandid="A";
316 }
317
318 HashMap<String,Integer> map = new HashMap<String, Integer>();
319 String sql="SELECT seq_id, concat(auth_seq_num,IF(pdb_ins_code='.','',pdb_ins_code))" +
320 " FROM "+db+".pdbx_poly_seq_scheme " +
321 " WHERE entry_key=" + entrykey +
322 " AND asym_id='"+asymid+"' " +
323 " AND pdb_strand_id='"+pdbstrandid+"' " +
324 " AND auth_seq_num!='?'" +
325 " ORDER BY seq_id+0";
326
327 Statement stmt = conn.createStatement();
328 ResultSet rsst = stmt.executeQuery(sql);
329 int count=0;
330 while (rsst.next()) {
331 count++;
332 int resser = Integer.parseInt(rsst.getString(1));
333 String pdbresser = rsst.getString(2);
334 map.put(pdbresser, resser);
335 }
336 if (count==0) {
337 //System.err.println("No residue serials mapping data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
338 throw new PdbaseInconsistencyError("No residue serials mapping data match for entry_key="+entrykey+", asym_id="+asymid+", pdb_strand_id="+pdbstrandid);
339 }
340 rsst.close();
341 stmt.close();
342
343 return map;
344 }
345
346 private void readSecStructure() throws SQLException {
347 this.resser2secstruct = new HashMap<Integer, String>();
348 this.secstruct2resinterval = new TreeMap<String, Interval>();
349
350 // HELIX AND TURN -- struct_conf table
351 String sql = "SELECT id,beg_label_seq_id,end_label_seq_id " +
352 " FROM "+db+".struct_conf " +
353 " WHERE entry_key="+entrykey+
354 " AND beg_label_asym_id='"+asymid+"'";
355 Statement stmt = conn.createStatement();
356 ResultSet rsst = stmt.executeQuery(sql);
357 int count=0;
358 while (rsst.next()) {
359 count++;
360 String id = rsst.getString(1).trim(); // id is either HELIX_Pnn or TURN_Pnn
361 Pattern p = Pattern.compile("^(\\w).+_P(\\d)+$");
362 Matcher m = p.matcher(id);
363 String ssId="Unknown";
364 if (m.find()){
365 ssId = m.group(1)+m.group(2); // e.g.: Hnn (helices) or Tnn (turns)
366 }
367 int beg = rsst.getInt(2);
368 int end =rsst.getInt(3);
369 secstruct2resinterval.put(ssId, new Interval(beg,end));
370 for (int i=beg;i<=end;i++){
371 if (resser2secstruct.containsKey(i)){// if already assigned we print a warning and then assign it
372 System.err.println("Inconsistency in secondary structure assignment. " +
373 "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId);
374 }
375 resser2secstruct.put(i,ssId);
376 }
377 }
378 rsst.close();
379 stmt.close();
380
381 // SHEET -- struct_sheet_range table
382 sql = "SELECT sheet_id, id, beg_label_seq_id, end_label_seq_id " +
383 " FROM "+db+".struct_sheet_range " +
384 " WHERE entry_key="+entrykey+
385 " AND beg_label_asym_id='"+asymid+"'";
386 stmt = conn.createStatement();
387 rsst = stmt.executeQuery(sql);
388 count=0;
389 while (rsst.next()) {
390 count++;
391 String sheetid = rsst.getString(1).trim();
392 int id = rsst.getInt(2);
393 int beg = rsst.getInt(3);
394 int end =rsst.getInt(4);
395 String ssId="S"+sheetid+id; // e.g.: SA1, SA2..., SB1, SB2,...
396 secstruct2resinterval.put(ssId, new Interval(beg,end));
397 for (int i=beg;i<=end;i++){
398 if (resser2secstruct.containsKey(i)){// if already assigned we print a warning and then assign it
399 System.err.println("Inconsistency in secondary structure assignment. " +
400 "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId);
401 }
402 resser2secstruct.put(i,ssId);
403 }
404 }
405 rsst.close();
406 stmt.close();
407
408 }
409 }