ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/MsdsdPdb.java
Revision: 492
Committed: Wed Jan 2 13:18:57 2008 UTC (16 years, 9 months ago) by duarte
File size: 14582 byte(s)
Log Message:
Copied the aglappe-jung branch into trunk.

Line File contents
1 package proteinstructure;
2
3 import java.sql.ResultSet;
4 import java.sql.SQLException;
5 import java.sql.Statement;
6 import java.util.ArrayList;
7 import java.util.Collections;
8 import java.util.HashMap;
9 import java.util.TreeMap;
10 import java.util.TreeSet;
11
12 import javax.vecmath.Point3d;
13
14 import tools.MySQLConnection;
15
16 /**
17 * A single chain pdb protein structure loaded from a MSDSD database
18 * See http://www.ebi.ac.uk/msd-srv/docs/dbdoc/refaindex.html to know what MSDSD is
19 *
20 * @author Jose Duarte
21 * Class: MsdsdPdb
22 * Package: proteinstructure
23 */
24 public class MsdsdPdb extends Pdb {
25
26 private final static String MYSQLSERVER="white";
27 private final static String MYSQLUSER=MySQLConnection.getUserName();
28 private final static String MYSQLPWD="nieve";
29 //private final static String DEFAULT_MYMSDSD_DB="my_msdsd_00_07_a";
30 private final static String DEFAULT_MSDSD_DB="msdsd_00_07_a";
31
32 private MySQLConnection conn;
33
34 private int chainid;
35 private int modelid;
36
37 // TODO for this to be used by other people we need to do things without a myMsdsdDb (or also distribute our fixes database)
38 private String myMsdsdDb; // our database with add-ons and fixes to msdsd
39
40 /**
41 * Constructs an empty Pdb object given pdb code
42 * Data will be loaded from database upon call of load(pdbChainCode, modelSerial)
43 * MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
44 * Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB
45 * @param pdbCode
46 * @throws SQLException
47 */
48 public MsdsdPdb (String pdbCode) throws SQLException {
49 this(pdbCode,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
50 }
51
52 /**
53 * Constructs an empty Pdb object given pdb code, a source db and a MySQLConnection.
54 * Data will be loaded from database upon call of load(pdbChainCode, modelSerial)
55 * @param pdbCode
56 * @param db a msdsd database
57 * @param conn
58 */
59 public MsdsdPdb (String pdbCode, String db, MySQLConnection conn) {
60 this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case
61 this.db=db;
62 this.myMsdsdDb="my_"+db; // i.e. for db=msdsd_00_07_a then myMsdsdDb=my_msdsd_00_07_a
63 this.dataLoaded = false;
64
65 this.conn = conn;
66
67 }
68
69 public void load(String pdbChainCode, int modelSerial) throws PdbLoadError {
70 this.model = modelSerial;
71 this.pdbChainCode=pdbChainCode; // NOTE! pdb chain codes are case sensitive!
72 try {
73 this.getchainid();// initialises chainid, modelid and chainCode
74
75 if (check_inconsistent_res_numbering()){
76 throw new MsdsdInconsistentResidueNumbersError("Inconsistent residue numbering in msdsd for accession_code "+this.pdbCode+", chain_pdb_code "+this.pdbChainCode);
77 }
78
79 this.sequence = read_seq();
80 this.fullLength = sequence.length();
81
82 this.pdbresser2resser = get_ressers_mapping();
83
84 this.read_atomData();
85
86 this.obsLength = resser2restype.size();
87
88 // we initialise resser2pdbresser from the pdbresser2resser HashMap
89 this.resser2pdbresser = new HashMap<Integer, String>();
90 for (String pdbresser:pdbresser2resser.keySet()){
91 resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser);
92 }
93
94 secondaryStructure = new SecondaryStructure(); // create empty secondary structure first to make sure object is not null
95 readSecStructure();
96 if(!secondaryStructure.isEmpty()) {
97 secondaryStructure.setComment("MSDSD");
98 }
99
100 // initialising atomser2atom from resser_atom2atomserial
101 atomser2atom = new HashMap<Integer, String>();
102 for (String resser_atom:resser_atom2atomserial.keySet()){
103 int atomserial = resser_atom2atomserial.get(resser_atom);
104 String atom = resser_atom.split("_")[1];
105 atomser2atom.put(atomserial,atom);
106 }
107
108 dataLoaded = true;
109
110 } catch (PdbCodeNotFoundError e) {
111 throw new PdbLoadError(e);
112 } catch (SQLException e) {
113 throw new PdbLoadError(e);
114 } catch (MsdsdInconsistentResidueNumbersError e) {
115 throw new PdbLoadError(e);
116 }
117
118 }
119
120 public String[] getChains() throws PdbLoadError {
121 TreeSet<String> chains = new TreeSet<String>();
122 try {
123 String sql = "SELECT DISTINCT chain_pdb_code " + // the DISTINCT is because there can be a multi model entry
124 " FROM "+myMsdsdDb+".mmol_chain_info " +
125 " WHERE accession_code='"+pdbCode+"' " +
126 " AND chain_type='C' " +
127 " AND asu_chain=1 ";
128 Statement stmt = conn.createStatement();
129 ResultSet rsst = stmt.executeQuery(sql);
130 while (rsst.next()) {
131 String chain = rsst.getString(1);
132 if (chain==null) chain="NULL";
133 chains.add(chain);
134 }
135 rsst.close();
136 stmt.close();
137 } catch (SQLException e) {
138 throw new PdbLoadError(e);
139 }
140
141 if (chains.isEmpty()) return null;
142
143 String[] chainsArray = new String[chains.size()];
144 chains.toArray(chainsArray);
145 return chainsArray;
146 }
147
148 public Integer[] getModels() throws PdbLoadError {
149 TreeSet<Integer> models = new TreeSet<Integer>();
150 try {
151 String sql = "SELECT DISTINCT model_serial " +
152 " FROM "+myMsdsdDb+".mmol_chain_info " +
153 " WHERE accession_code='"+pdbCode+"' " +
154 " AND chain_type='C' " +
155 " AND asu_chain=1 ";
156 Statement stmt = conn.createStatement();
157 ResultSet rsst = stmt.executeQuery(sql);
158 while (rsst.next()) {
159 models.add(rsst.getInt(1));
160 }
161 rsst.close();
162 stmt.close();
163
164 } catch (SQLException e) {
165 throw new PdbLoadError(e);
166 }
167
168 if (models.isEmpty()) return null;
169 Integer[] modelsArray = new Integer[models.size()];
170 models.toArray(modelsArray);
171 return modelsArray;
172 }
173
174 private void getchainid() throws PdbCodeNotFoundError, SQLException {
175 chainid=0;
176 String chaincodestr="='"+pdbChainCode+"'";
177 if (pdbChainCode.equals(Pdb.NULL_CHAIN_CODE)){
178 chaincodestr="IS NULL";
179 }
180 String sql = "SELECT chain_id, model_id, pchain_code " +
181 " FROM "+myMsdsdDb+".mmol_chain_info " +
182 " WHERE accession_code='"+pdbCode+"' " +
183 " AND chain_pdb_code "+chaincodestr +
184 " AND chain_type='C' " +
185 " AND asu_chain=1 " +
186 " AND model_serial="+model;
187
188 Statement stmt = conn.createStatement();
189 ResultSet rsst = stmt.executeQuery(sql);
190 if (rsst.next()) {
191 chainid = rsst.getInt(1);
192 modelid = rsst.getInt(2);
193 chainCode=rsst.getString(3);
194 if (! rsst.isLast()) {
195 //System.err.println("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
196 throw new PdbCodeNotFoundError("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
197 }
198 } else {
199 //System.err.println("No chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
200 throw new PdbCodeNotFoundError("No chain_id could be matched for accession_code "+pdbCode+", chain_pdb_code "+pdbChainCode);
201 }
202 rsst.close();
203 stmt.close();
204 }
205
206 private boolean check_inconsistent_res_numbering() throws SQLException{
207 int count=0;
208 int numserial=0;
209
210 String sql="SELECT count(*) " +
211 " FROM "+myMsdsdDb+".problem_serial_chain " +
212 " WHERE chain_id="+chainid +
213 " AND (min_serial!=1 OR num_serial!=num_dist_serial OR num_serial!=max_serial-min_serial+1)";
214 Statement stmt = conn.createStatement();
215 ResultSet rsst = stmt.executeQuery(sql);
216 while (rsst.next()) {
217 count = rsst.getInt(1);
218 if (count>0){
219 return true;
220 }
221 }
222 sql="SELECT num_serial FROM "+myMsdsdDb+".problem_serial_chain WHERE chain_id="+chainid;
223 rsst = stmt.executeQuery(sql);
224 int check = 0;
225 while (rsst.next()){
226 check++;
227 numserial=rsst.getInt(1);
228 }
229 if (check!=1){
230 System.err.println("No num_serial match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
231 }
232 String allresseq = read_seq();
233 if (allresseq.length()!=numserial){
234 System.err.println("num_serial and length of all_res_seq don't match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
235 return true;
236 }
237 rsst.close();
238 stmt.close();
239 return false;
240 }
241
242 private void read_atomData() throws SQLException{
243 resser_atom2atomserial = new HashMap<String,Integer>();
244 resser2restype = new HashMap<Integer,String>();
245 atomser2coord = new HashMap<Integer,Point3d>();
246 atomser2resser = new HashMap<Integer,Integer>();
247
248 String sql = "SELECT serial,chem_atom_name,code_3_letter,residue_serial,x,y,z " +
249 " FROM "+db+".atom_data " +
250 " WHERE (model_id = "+modelid+") " +
251 " AND (chain_id = "+chainid+") " +
252 " AND (graph_alt_code_used = 1) " +
253 " AND (graph_standard_aa=1) " +
254 " AND (pdb_group = 'A')" +
255 " ORDER BY chain_code, residue_serial, serial";
256
257 Statement stmt = conn.createStatement();
258 ResultSet rsst = stmt.executeQuery(sql);
259 int count=0;
260 while (rsst.next()){
261 count++;
262
263 int atomserial = rsst.getInt(1); // atomserial
264 String atom = rsst.getString(2).trim(); // atom
265 String res_type = rsst.getString(3).trim(); // res_type
266 int res_serial = rsst.getInt(4); // res_serial
267 double x = rsst.getDouble(5); // x
268 double y = rsst.getDouble(6); // y
269 double z = rsst.getDouble(7); // z
270 Point3d coords = new Point3d(x, y, z);
271 if (AAinfo.isValidAA(res_type)) {
272 atomser2coord.put(atomserial, coords);
273 atomser2resser.put(atomserial, res_serial);
274 resser2restype.put(res_serial, res_type);
275 if (AAinfo.isValidAtomWithOXT(res_type,atom)){
276 resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
277 }
278 }
279
280
281 }
282 if (count==0){
283 System.err.println("atom data query returned no data at all for model_id="+modelid+", model_id="+modelid);
284 }
285 rsst.close();
286 stmt.close();
287 }
288
289 private String read_seq() throws SQLException{
290 String allresseq="";
291 String sql="SELECT all_res_seq FROM "+myMsdsdDb+".chain_seq WHERE chain_id="+chainid;
292
293 Statement stmt = conn.createStatement();
294 ResultSet rsst = stmt.executeQuery(sql);
295 int check = 0;
296 if (rsst.next()) {
297 check++;
298 allresseq=rsst.getString(1);
299 }
300 if (check!=1) {
301 System.err.println("No all_res_seq match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode+", chain_id="+chainid);
302 }
303 rsst.close();
304 stmt.close();
305
306 return allresseq;
307 }
308
309 private HashMap<String,Integer> get_ressers_mapping() throws SQLException {
310 HashMap<String,Integer> map = new HashMap<String, Integer>();
311 String sql="SELECT serial, concat(pdb_seq,IF(pdb_insert_code IS NULL,'',pdb_insert_code)) " +
312 " FROM "+db+".residue " +
313 " WHERE chain_id="+chainid+
314 " AND pdb_seq IS NOT NULL";
315
316 Statement stmt = conn.createStatement();
317 ResultSet rsst = stmt.executeQuery(sql);
318 int count=0;
319 while (rsst.next()) {
320 count++;
321 int resser = rsst.getInt(1);
322 String pdbresser = rsst.getString(2);
323 map.put(pdbresser, resser);
324 }
325 if (count==0) {
326 System.err.println("No residue serials mapping data match for chain_id="+chainid);
327 }
328 rsst.close();
329 stmt.close();
330
331 return map;
332 }
333
334 private void readSecStructure() throws SQLException{
335 this.secondaryStructure = new SecondaryStructure();
336
337 //HELIX -- helix table
338 String sql = "SELECT helix_serial, beg_residue_serial, end_residue_serial " +
339 " FROM "+db+".helix " +
340 " WHERE (model_id = "+modelid+") " +
341 " AND (chain_id = "+chainid+") ";
342 Statement stmt = conn.createStatement();
343 ResultSet rsst = stmt.executeQuery(sql);
344 int count=0;
345 while (rsst.next()) {
346 count++;
347 int serial = rsst.getInt(1);
348 int beg = rsst.getInt(2);
349 int end =rsst.getInt(3);
350 String ssId = "" + SecStrucElement.HELIX+serial;
351 SecStrucElement ssElem = new SecStrucElement(SecStrucElement.HELIX,beg,end,ssId);
352 secondaryStructure.add(ssElem);
353 }
354 rsst.close();
355 stmt.close();
356 //SHEET -- strand table
357 sql = "SELECT sheet_serial, strand_serial, strand_beg_residue_serial, strand_end_residue_serial " +
358 " FROM "+db+".strand " +
359 " WHERE (model_id = "+modelid+") " +
360 " AND (chain_id = "+chainid+") ";
361 stmt = conn.createStatement();
362 rsst = stmt.executeQuery(sql);
363 // we store everything in these 2 maps to assign later to resser2secstruct based on our own ids (ids are not very consistent in msdsd)
364 HashMap<Integer,Interval> strands2begEnd = new HashMap<Integer, Interval>();
365 TreeMap<Integer,ArrayList<Integer>> sheets2strands = new TreeMap<Integer, ArrayList<Integer>>();
366 count=0;
367 while (rsst.next()) {
368 count++;
369 int sheetSerial = rsst.getInt(1);
370 int strandSerial = rsst.getInt(2);
371 int beg = rsst.getInt(3);
372 int end =rsst.getInt(4);
373 strands2begEnd.put(strandSerial, new Interval(beg,end));
374 if (sheets2strands.containsKey(sheetSerial)){
375 sheets2strands.get(sheetSerial).add(strandSerial);
376 } else {
377 ArrayList<Integer> strands = new ArrayList<Integer>();
378 strands.add(strandSerial);
379 sheets2strands.put(sheetSerial, strands);
380 }
381 }
382 rsst.close();
383 stmt.close();
384 char sheet='A';
385 for (int sheetSerial:sheets2strands.keySet()){
386 int strand=1;
387 for (int strandSerial:sheets2strands.get(sheetSerial)){
388 Interval begEnd = strands2begEnd.get(strandSerial);
389 String ssId = ""+SecStrucElement.STRAND+sheet+strand;
390 SecStrucElement ssElem = new SecStrucElement(SecStrucElement.STRAND,begEnd.beg,begEnd.end,ssId);
391 secondaryStructure.add(ssElem);
392 strand++;
393 }
394 sheet++;
395 }
396
397 //TURN -- turn table
398 // they forgot to fill up the turn_serial field so we have to use turn_id and get a serial from it that is unique within the chain only
399 sql = "SELECT turn_id, res_1_residue_serial, res_2_residue_serial, res_3_residue_serial, res_4_residue_serial " +
400 " FROM "+db+".turn " +
401 " WHERE (model_id = "+modelid+") " +
402 " AND (chain_id = "+chainid+") ";
403 stmt = conn.createStatement();
404 rsst = stmt.executeQuery(sql);
405 TreeMap<Integer,ArrayList<Integer>> turns = new TreeMap<Integer, ArrayList<Integer>>();
406 count=0;
407 while (rsst.next()) {
408 count++;
409 int dbId = rsst.getInt(1);
410 int res1 = rsst.getInt(2);
411 int res2 = rsst.getInt(3);
412 int res3 = rsst.getInt(4);
413 int res4 = rsst.getInt(5);
414 ArrayList<Integer> residues = new ArrayList<Integer>();
415 if (res1!=0) residues.add(res1); // res is 0 when the field is NULL in database
416 if (res2!=0) residues.add(res2);
417 if (res3!=0) residues.add(res3);
418 if (res4!=0) residues.add(res4);
419 turns.put(dbId, residues);
420 }
421 rsst.close();
422 stmt.close();
423 int serial=1;
424 for (int dbId:turns.keySet()){
425 String ssId = "" + SecStrucElement.TURN + serial;
426 int beg = Collections.min(turns.get(dbId));
427 int end = Collections.max(turns.get(dbId));
428 SecStrucElement ssElem = new SecStrucElement(SecStrucElement.TURN,beg,end,ssId);
429 secondaryStructure.add(ssElem);
430 serial++;
431 }
432
433 }
434 }