ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/MsdsdPdb.java
Revision: 259
Committed: Fri Aug 10 16:40:44 2007 UTC (17 years, 2 months ago) by stehr
File size: 15096 byte(s)
Log Message:
completed first version of dssp secondary structure assignment, can be switched on/off by status variable in Start, some other little fixes
Line File contents
1 package proteinstructure;
2
3 import java.sql.ResultSet;
4 import java.sql.SQLException;
5 import java.sql.Statement;
6 import java.util.ArrayList;
7 import java.util.Collections;
8 import java.util.HashMap;
9 import java.util.TreeMap;
10
11 import javax.vecmath.Point3d;
12
13 import tools.MySQLConnection;
14
15 /**
16 * A single chain pdb protein structure loaded from a MSDSD database
17 * See http://www.ebi.ac.uk/msd-srv/docs/dbdoc/refaindex.html to know what MSDSD is
18 *
19 * @author Jose Duarte
20 * Class: MsdsdPdb
21 * Package: proteinstructure
22 */
23 public class MsdsdPdb extends Pdb {
24
25 private final static String MYSQLSERVER="white";
26 private final static String MYSQLUSER=MySQLConnection.getUserName();
27 private final static String MYSQLPWD="nieve";
28 //private final static String DEFAULT_MYMSDSD_DB="my_msdsd_00_07_a";
29 private final static String DEFAULT_MSDSD_DB="msdsd_00_07_a";
30
31 private MySQLConnection conn;
32
33 private int chainid;
34 private int modelid;
35
36 // TODO for this to be able to be used by other people we need to do things without a myMsdsdDb (or also distribute our fixes database)
37 private String myMsdsdDb; // our database with add-ons and fixes to msdsd
38
39 /**
40 * Constructs Pdb object given pdb code and pdb chain code.
41 * Model will be DEFAULT_MODEL
42 * MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
43 * Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB
44 * @param pdbCode
45 * @param pdbChainCode
46 * @throws PdbCodeNotFoundError
47 * @throws MsdsdInconsistentResidueNumbersError
48 * @throws SQLException
49 */
50 public MsdsdPdb (String pdbCode, String pdbChainCode) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
51 this(pdbCode,pdbChainCode,DEFAULT_MODEL,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
52 }
53
54 /**
55 * Constructs Pdb object given pdb code, pdb chain code, db and MySQLConnection
56 * Model will be DEFAULT_MODEL
57 * db must be a msdsd database
58 * @param pdbCode
59 * @param pdbChainCode
60 * @param db
61 * @param conn
62 * @throws PdbCodeNotFoundError
63 * @throws MsdsdInconsistentResidueNumbersError
64 * @throws SQLException
65 */
66 public MsdsdPdb (String pdbCode, String pdbChainCode, String db, MySQLConnection conn) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
67 this(pdbCode,pdbChainCode,DEFAULT_MODEL,db,conn);
68 }
69
70 /**
71 * Constructs Pdb object given pdb code, pdb chain code and a model serial
72 * MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD
73 * Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB
74 * @param pdbCode
75 * @param pdbChainCode
76 * @param model_serial
77 * @throws PdbCodeNotFoundError
78 * @throws MsdsdInconsistentResidueNumbersError
79 * @throws SQLException
80 */
81 public MsdsdPdb (String pdbCode, String pdbChainCode, int model_serial) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
82 this(pdbCode,pdbChainCode,model_serial,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD));
83 }
84
85 /**
86 * Constructs Pdb object given pdb code, pdb chain code, model serial, a source db and a MySQLConnection.
87 * db must be a msdsd database
88 * @param pdbCode
89 * @param pdbChainCode
90 * @param model_serial
91 * @param db
92 * @param conn
93 * @throws PdbCodeNotFoundError
94 * @throws MsdsdInconsistentResidueNumbersError
95 * @throws SQLException
96 */
97 public MsdsdPdb (String pdbCode, String pdbChainCode, int model_serial, String db, MySQLConnection conn) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
98 this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case
99 this.pdbChainCode=pdbChainCode.toUpperCase(); // our convention: chain codes are upper case
100 this.model=model_serial;
101 this.db=db;
102 this.myMsdsdDb="my_"+db; // i.e. for db=msdsd_00_07_a then myMsdsdDb=my_msdsd_00_07_a
103
104 this.conn = conn;
105
106 this.getchainid();// initialises chainid, modelid and chainCode
107
108 if (check_inconsistent_res_numbering()){
109 throw new MsdsdInconsistentResidueNumbersError("Inconsistent residue numbering in msdsd for accession_code "+this.pdbCode+", chain_pdb_code "+this.pdbChainCode);
110 }
111
112 this.sequence = read_seq();
113 this.pdbresser2resser = get_ressers_mapping();
114
115 this.read_atomData();
116
117 // we initialise resser2pdbresser from the pdbresser2resser HashMap
118 this.resser2pdbresser = new HashMap<Integer, String>();
119 for (String pdbresser:pdbresser2resser.keySet()){
120 resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser);
121 }
122
123 this.readSecStructure();
124 if(!resser2secstruct.isEmpty()) {
125 hasSecondaryStructure = true;
126 secondaryStructureSource = "MSDSD";
127 }
128
129 // initialising atomser2atom from resser_atom2atomserial
130 atomser2atom = new HashMap<Integer, String>();
131 for (String resser_atom:resser_atom2atomserial.keySet()){
132 int atomserial = resser_atom2atomserial.get(resser_atom);
133 String atom = resser_atom.split("_")[1];
134 atomser2atom.put(atomserial,atom);
135 }
136 }
137
138 private void getchainid() throws PdbCodeNotFoundError, SQLException {
139 chainid=0;
140 String chaincodestr="='"+pdbChainCode+"'";
141 if (pdbChainCode.equals("NULL")){
142 chaincodestr="IS NULL";
143 }
144 String sql = "SELECT chain_id, model_id, pchain_code " +
145 " FROM "+myMsdsdDb+".mmol_chain_info " +
146 " WHERE accession_code='"+pdbCode+"' " +
147 " AND chain_pdb_code "+chaincodestr +
148 " AND chain_type='C' " +
149 " AND asu_chain=1 " +
150 " AND model_serial="+model;
151
152 Statement stmt = conn.createStatement();
153 ResultSet rsst = stmt.executeQuery(sql);
154 if (rsst.next()) {
155 chainid = rsst.getInt(1);
156 modelid = rsst.getInt(2);
157 chainCode=rsst.getString(3);
158 if (! rsst.isLast()) {
159 //System.err.println("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
160 throw new PdbCodeNotFoundError("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
161 }
162 } else {
163 //System.err.println("No chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
164 throw new PdbCodeNotFoundError("No chain_id could be matched for accession_code "+pdbCode+", chain_pdb_code "+pdbChainCode);
165 }
166 rsst.close();
167 stmt.close();
168 }
169
170 private boolean check_inconsistent_res_numbering() throws SQLException{
171 int count=0;
172 int numserial=0;
173
174 String sql="SELECT count(*) " +
175 " FROM "+myMsdsdDb+".problem_serial_chain " +
176 " WHERE chain_id="+chainid +
177 " AND (min_serial!=1 OR num_serial!=num_dist_serial OR num_serial!=max_serial-min_serial+1)";
178 Statement stmt = conn.createStatement();
179 ResultSet rsst = stmt.executeQuery(sql);
180 while (rsst.next()) {
181 count = rsst.getInt(1);
182 if (count>0){
183 return true;
184 }
185 }
186 sql="SELECT num_serial FROM "+myMsdsdDb+".problem_serial_chain WHERE chain_id="+chainid;
187 rsst = stmt.executeQuery(sql);
188 int check = 0;
189 while (rsst.next()){
190 check++;
191 numserial=rsst.getInt(1);
192 }
193 if (check!=1){
194 System.err.println("No num_serial match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
195 }
196 String allresseq = read_seq();
197 if (allresseq.length()!=numserial){
198 System.err.println("num_serial and length of all_res_seq don't match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode);
199 return true;
200 }
201 rsst.close();
202 stmt.close();
203 return false;
204 }
205
206 private void read_atomData() throws SQLException{
207 resser_atom2atomserial = new HashMap<String,Integer>();
208 resser2restype = new HashMap<Integer,String>();
209 atomser2coord = new HashMap<Integer,Point3d>();
210 atomser2resser = new HashMap<Integer,Integer>();
211
212 String sql = "SELECT serial,chem_atom_name,code_3_letter,residue_serial,x,y,z " +
213 " FROM "+db+".atom_data " +
214 " WHERE (model_id = "+modelid+") " +
215 " AND (chain_id = "+chainid+") " +
216 " AND (graph_alt_code_used = 1) " +
217 " AND (graph_standard_aa=1) " +
218 " AND (pdb_group = 'A')" +
219 " ORDER BY chain_code, residue_serial, serial";
220
221 Statement stmt = conn.createStatement();
222 ResultSet rsst = stmt.executeQuery(sql);
223 int count=0;
224 while (rsst.next()){
225 count++;
226
227 int atomserial = rsst.getInt(1); // atomserial
228 String atom = rsst.getString(2).trim(); // atom
229 String res_type = rsst.getString(3).trim(); // res_type
230 int res_serial = rsst.getInt(4); // res_serial
231 double x = rsst.getDouble(5); // x
232 double y = rsst.getDouble(6); // y
233 double z = rsst.getDouble(7); // z
234 Point3d coords = new Point3d(x, y, z);
235 ArrayList<String> aalist=AA.aas();
236 if (aalist.contains(res_type)) {
237 atomser2coord.put(atomserial, coords);
238 atomser2resser.put(atomserial, res_serial);
239 resser2restype.put(res_serial, res_type);
240 ArrayList<String> atomlist = aas2atoms.get(res_type);
241 if (atomlist.contains(atom)){
242 resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
243 }
244 }
245
246
247 }
248 if (count==0){
249 System.err.println("atom data query returned no data at all for model_id="+modelid+", model_id="+modelid);
250 }
251 rsst.close();
252 stmt.close();
253 }
254
255 private String read_seq() throws SQLException{
256 String allresseq="";
257 String sql="SELECT all_res_seq FROM "+myMsdsdDb+".chain_seq WHERE chain_id="+chainid;
258
259 Statement stmt = conn.createStatement();
260 ResultSet rsst = stmt.executeQuery(sql);
261 int check = 0;
262 if (rsst.next()) {
263 check++;
264 allresseq=rsst.getString(1);
265 }
266 if (check!=1) {
267 System.err.println("No all_res_seq match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode+", chain_id="+chainid);
268 }
269 rsst.close();
270 stmt.close();
271
272 return allresseq;
273 }
274
275 private HashMap<String,Integer> get_ressers_mapping() throws SQLException {
276 HashMap<String,Integer> map = new HashMap<String, Integer>();
277 String sql="SELECT serial, concat(pdb_seq,IF(pdb_insert_code IS NULL,'',pdb_insert_code)) " +
278 " FROM "+db+".residue " +
279 " WHERE chain_id="+chainid+
280 " AND pdb_seq IS NOT NULL";
281
282 Statement stmt = conn.createStatement();
283 ResultSet rsst = stmt.executeQuery(sql);
284 int count=0;
285 while (rsst.next()) {
286 count++;
287 int resser = rsst.getInt(1);
288 String pdbresser = rsst.getString(2);
289 map.put(pdbresser, resser);
290 }
291 if (count==0) {
292 System.err.println("No residue serials mapping data match for chain_id="+chainid);
293 }
294 rsst.close();
295 stmt.close();
296
297 return map;
298 }
299
300 private void readSecStructure() throws SQLException{
301 this.resser2secstruct = new HashMap<Integer, String>();
302 this.secstruct2resinterval = new TreeMap<String, Interval>();
303
304 //HELIX -- helix table
305 String sql = "SELECT helix_serial, beg_residue_serial, end_residue_serial " +
306 " FROM "+db+".helix " +
307 " WHERE (model_id = "+modelid+") " +
308 " AND (chain_id = "+chainid+") ";
309 Statement stmt = conn.createStatement();
310 ResultSet rsst = stmt.executeQuery(sql);
311 int count=0;
312 while (rsst.next()) {
313 count++;
314 int serial = rsst.getInt(1);
315 int beg = rsst.getInt(2);
316 int end =rsst.getInt(3);
317 String ssId = "H"+serial;
318 secstruct2resinterval.put(ssId, new Interval(beg,end));
319 for (int i=beg;i<=end;i++){
320 if (resser2secstruct.containsKey(i)){ // if already assigned we print a warning and then assign it
321 //System.err.println("Inconsistency in secondary structure assignment. " +
322 // "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId);
323 }
324 resser2secstruct.put(i,ssId);
325 }
326 }
327 rsst.close();
328 stmt.close();
329 //SHEET -- strand table
330 sql = "SELECT sheet_serial, strand_serial, strand_beg_residue_serial, strand_end_residue_serial " +
331 " FROM "+db+".strand " +
332 " WHERE (model_id = "+modelid+") " +
333 " AND (chain_id = "+chainid+") ";
334 stmt = conn.createStatement();
335 rsst = stmt.executeQuery(sql);
336 // we store everything in these 2 maps to assign later to resser2secstruct based on our own ids (ids are not very consistent in msdsd)
337 HashMap<Integer,Interval> strands2begEnd = new HashMap<Integer, Interval>();
338 TreeMap<Integer,ArrayList<Integer>> sheets2strands = new TreeMap<Integer, ArrayList<Integer>>();
339 count=0;
340 while (rsst.next()) {
341 count++;
342 int sheetSerial = rsst.getInt(1);
343 int strandSerial = rsst.getInt(2);
344 int beg = rsst.getInt(3);
345 int end =rsst.getInt(4);
346 strands2begEnd.put(strandSerial, new Interval(beg,end));
347 if (sheets2strands.containsKey(sheetSerial)){
348 sheets2strands.get(sheetSerial).add(strandSerial);
349 } else {
350 ArrayList<Integer> strands = new ArrayList<Integer>();
351 strands.add(strandSerial);
352 sheets2strands.put(sheetSerial, strands);
353 }
354 }
355 rsst.close();
356 stmt.close();
357 char sheet='A';
358 for (int sheetSerial:sheets2strands.keySet()){
359 int strand=1;
360 for (int strandSerial:sheets2strands.get(sheetSerial)){
361 Interval begEnd = strands2begEnd.get(strandSerial);
362 for (int i=begEnd.beg;i<=begEnd.end;i++){
363 String ssId = "S"+sheet+strand;
364 secstruct2resinterval.put(ssId, begEnd);
365 if (resser2secstruct.containsKey(i)){ // if already assigned we print a warning and then assign it
366 //System.err.println("Inconsistency in secondary structure assignment. " +
367 // "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId);
368 }
369 resser2secstruct.put(i,ssId);
370 }
371 strand++;
372 }
373 sheet++;
374 }
375
376 //TURN -- turn table
377 // they forgot to fill up the turn_serial field so we have to use turn_id and get a serial from it that is unique within the chain only
378 sql = "SELECT turn_id, res_1_residue_serial, res_2_residue_serial, res_3_residue_serial, res_4_residue_serial " +
379 " FROM "+db+".turn " +
380 " WHERE (model_id = "+modelid+") " +
381 " AND (chain_id = "+chainid+") ";
382 stmt = conn.createStatement();
383 rsst = stmt.executeQuery(sql);
384 TreeMap<Integer,ArrayList<Integer>> turns = new TreeMap<Integer, ArrayList<Integer>>();
385 count=0;
386 while (rsst.next()) {
387 count++;
388 int dbId = rsst.getInt(1);
389 int res1 = rsst.getInt(2);
390 int res2 = rsst.getInt(3);
391 int res3 = rsst.getInt(4);
392 int res4 = rsst.getInt(5);
393 ArrayList<Integer> residues = new ArrayList<Integer>();
394 if (res1!=0) residues.add(res1); // res is 0 when the field is NULL in database
395 if (res2!=0) residues.add(res2);
396 if (res3!=0) residues.add(res3);
397 if (res4!=0) residues.add(res4);
398 turns.put(dbId, residues);
399 }
400 rsst.close();
401 stmt.close();
402 int serial=1;
403 for (int dbId:turns.keySet()){
404 String ssId="T"+serial;
405 int beg = Collections.min(turns.get(dbId));
406 int end = Collections.max(turns.get(dbId));
407 secstruct2resinterval.put(ssId, new Interval(beg,end));
408 for (int i:turns.get(dbId)){
409 if (resser2secstruct.containsKey(i)){ // if already assigned we print a warning and then assign it
410 //System.err.println("Inconsistency in secondary structure assignment. " +
411 // "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId);
412 }
413 resser2secstruct.put(i,ssId);
414 }
415 serial++;
416 }
417
418 }
419 }