1 |
package proteinstructure; |
2 |
|
3 |
import java.sql.ResultSet; |
4 |
import java.sql.SQLException; |
5 |
import java.sql.Statement; |
6 |
import java.util.ArrayList; |
7 |
import java.util.Collections; |
8 |
import java.util.HashMap; |
9 |
import java.util.TreeMap; |
10 |
|
11 |
import javax.vecmath.Point3d; |
12 |
|
13 |
import tools.MySQLConnection; |
14 |
|
15 |
/** |
16 |
* A single chain pdb protein structure loaded from a MSDSD database |
17 |
* See http://www.ebi.ac.uk/msd-srv/docs/dbdoc/refaindex.html to know what MSDSD is |
18 |
* |
19 |
* @author Jose Duarte |
20 |
* Class: MsdsdPdb |
21 |
* Package: proteinstructure |
22 |
*/ |
23 |
public class MsdsdPdb extends Pdb { |
24 |
|
25 |
private final static String MYSQLSERVER="white"; |
26 |
private final static String MYSQLUSER=MySQLConnection.getUserName(); |
27 |
private final static String MYSQLPWD="nieve"; |
28 |
//private final static String DEFAULT_MYMSDSD_DB="my_msdsd_00_07_a"; |
29 |
private final static String DEFAULT_MSDSD_DB="msdsd_00_07_a"; |
30 |
|
31 |
private MySQLConnection conn; |
32 |
|
33 |
private int chainid; |
34 |
private int modelid; |
35 |
|
36 |
// TODO for this to be able to be used by other people we need to do things without a myMsdsdDb (or also distribute our fixes database) |
37 |
private String myMsdsdDb; // our database with add-ons and fixes to msdsd |
38 |
|
39 |
/** |
40 |
* Constructs Pdb object given pdb code and pdb chain code. |
41 |
* Model will be DEFAULT_MODEL |
42 |
* MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD |
43 |
* Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB |
44 |
* @param pdbCode |
45 |
* @param pdbChainCode |
46 |
* @throws PdbCodeNotFoundError |
47 |
* @throws MsdsdInconsistentResidueNumbersError |
48 |
* @throws SQLException |
49 |
*/ |
50 |
public MsdsdPdb (String pdbCode, String pdbChainCode) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException { |
51 |
this(pdbCode,pdbChainCode,DEFAULT_MODEL,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD)); |
52 |
} |
53 |
|
54 |
/** |
55 |
* Constructs Pdb object given pdb code, pdb chain code, db and MySQLConnection |
56 |
* Model will be DEFAULT_MODEL |
57 |
* db must be a msdsd database |
58 |
* @param pdbCode |
59 |
* @param pdbChainCode |
60 |
* @param db |
61 |
* @param conn |
62 |
* @throws PdbCodeNotFoundError |
63 |
* @throws MsdsdInconsistentResidueNumbersError |
64 |
* @throws SQLException |
65 |
*/ |
66 |
public MsdsdPdb (String pdbCode, String pdbChainCode, String db, MySQLConnection conn) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException { |
67 |
this(pdbCode,pdbChainCode,DEFAULT_MODEL,db,conn); |
68 |
} |
69 |
|
70 |
/** |
71 |
* Constructs Pdb object given pdb code, pdb chain code and a model serial |
72 |
* MySQLConnection is taken from defaults in MsdsdPdb class: MYSQLSERVER, MYSQLUSER, MYSQLPWD |
73 |
* Database is taken from default msdsd database in MsdsdPdb class: DEFAULT_MSDSD_DB |
74 |
* @param pdbCode |
75 |
* @param pdbChainCode |
76 |
* @param model_serial |
77 |
* @throws PdbCodeNotFoundError |
78 |
* @throws MsdsdInconsistentResidueNumbersError |
79 |
* @throws SQLException |
80 |
*/ |
81 |
public MsdsdPdb (String pdbCode, String pdbChainCode, int model_serial) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException { |
82 |
this(pdbCode,pdbChainCode,model_serial,DEFAULT_MSDSD_DB,new MySQLConnection(MYSQLSERVER,MYSQLUSER,MYSQLPWD)); |
83 |
} |
84 |
|
85 |
/** |
86 |
* Constructs Pdb object given pdb code, pdb chain code, model serial, a source db and a MySQLConnection. |
87 |
* db must be a msdsd database |
88 |
* @param pdbCode |
89 |
* @param pdbChainCode |
90 |
* @param model_serial |
91 |
* @param db |
92 |
* @param conn |
93 |
* @throws PdbCodeNotFoundError |
94 |
* @throws MsdsdInconsistentResidueNumbersError |
95 |
* @throws SQLException |
96 |
*/ |
97 |
public MsdsdPdb (String pdbCode, String pdbChainCode, int model_serial, String db, MySQLConnection conn) throws PdbCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException { |
98 |
this.pdbCode=pdbCode.toLowerCase(); // our convention: pdb codes are lower case |
99 |
this.pdbChainCode=pdbChainCode.toUpperCase(); // our convention: chain codes are upper case |
100 |
this.model=model_serial; |
101 |
this.db=db; |
102 |
this.myMsdsdDb="my_"+db; // i.e. for db=msdsd_00_07_a then myMsdsdDb=my_msdsd_00_07_a |
103 |
|
104 |
this.conn = conn; |
105 |
|
106 |
this.getchainid();// initialises chainid, modelid and chainCode |
107 |
|
108 |
if (check_inconsistent_res_numbering()){ |
109 |
throw new MsdsdInconsistentResidueNumbersError("Inconsistent residue numbering in msdsd for accession_code "+this.pdbCode+", chain_pdb_code "+this.pdbChainCode); |
110 |
} |
111 |
|
112 |
this.sequence = read_seq(); |
113 |
this.pdbresser2resser = get_ressers_mapping(); |
114 |
|
115 |
this.read_atomData(); |
116 |
|
117 |
// we initialise resser2pdbresser from the pdbresser2resser HashMap |
118 |
this.resser2pdbresser = new HashMap<Integer, String>(); |
119 |
for (String pdbresser:pdbresser2resser.keySet()){ |
120 |
resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser); |
121 |
} |
122 |
|
123 |
this.readSecStructure(); |
124 |
if(!resser2secstruct.isEmpty()) { |
125 |
hasSecondaryStructure = true; |
126 |
secondaryStructureSource = "MSDSD"; |
127 |
} |
128 |
|
129 |
// initialising atomser2atom from resser_atom2atomserial |
130 |
atomser2atom = new HashMap<Integer, String>(); |
131 |
for (String resser_atom:resser_atom2atomserial.keySet()){ |
132 |
int atomserial = resser_atom2atomserial.get(resser_atom); |
133 |
String atom = resser_atom.split("_")[1]; |
134 |
atomser2atom.put(atomserial,atom); |
135 |
} |
136 |
} |
137 |
|
138 |
private void getchainid() throws PdbCodeNotFoundError, SQLException { |
139 |
chainid=0; |
140 |
String chaincodestr="='"+pdbChainCode+"'"; |
141 |
if (pdbChainCode.equals("NULL")){ |
142 |
chaincodestr="IS NULL"; |
143 |
} |
144 |
String sql = "SELECT chain_id, model_id, pchain_code " + |
145 |
" FROM "+myMsdsdDb+".mmol_chain_info " + |
146 |
" WHERE accession_code='"+pdbCode+"' " + |
147 |
" AND chain_pdb_code "+chaincodestr + |
148 |
" AND chain_type='C' " + |
149 |
" AND asu_chain=1 " + |
150 |
" AND model_serial="+model; |
151 |
|
152 |
Statement stmt = conn.createStatement(); |
153 |
ResultSet rsst = stmt.executeQuery(sql); |
154 |
if (rsst.next()) { |
155 |
chainid = rsst.getInt(1); |
156 |
modelid = rsst.getInt(2); |
157 |
chainCode=rsst.getString(3); |
158 |
if (! rsst.isLast()) { |
159 |
//System.err.println("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode); |
160 |
throw new PdbCodeNotFoundError("More than 1 chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode); |
161 |
} |
162 |
} else { |
163 |
//System.err.println("No chain_id match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode); |
164 |
throw new PdbCodeNotFoundError("No chain_id could be matched for accession_code "+pdbCode+", chain_pdb_code "+pdbChainCode); |
165 |
} |
166 |
rsst.close(); |
167 |
stmt.close(); |
168 |
} |
169 |
|
170 |
private boolean check_inconsistent_res_numbering() throws SQLException{ |
171 |
int count=0; |
172 |
int numserial=0; |
173 |
|
174 |
String sql="SELECT count(*) " + |
175 |
" FROM "+myMsdsdDb+".problem_serial_chain " + |
176 |
" WHERE chain_id="+chainid + |
177 |
" AND (min_serial!=1 OR num_serial!=num_dist_serial OR num_serial!=max_serial-min_serial+1)"; |
178 |
Statement stmt = conn.createStatement(); |
179 |
ResultSet rsst = stmt.executeQuery(sql); |
180 |
while (rsst.next()) { |
181 |
count = rsst.getInt(1); |
182 |
if (count>0){ |
183 |
return true; |
184 |
} |
185 |
} |
186 |
sql="SELECT num_serial FROM "+myMsdsdDb+".problem_serial_chain WHERE chain_id="+chainid; |
187 |
rsst = stmt.executeQuery(sql); |
188 |
int check = 0; |
189 |
while (rsst.next()){ |
190 |
check++; |
191 |
numserial=rsst.getInt(1); |
192 |
} |
193 |
if (check!=1){ |
194 |
System.err.println("No num_serial match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode); |
195 |
} |
196 |
String allresseq = read_seq(); |
197 |
if (allresseq.length()!=numserial){ |
198 |
System.err.println("num_serial and length of all_res_seq don't match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode); |
199 |
return true; |
200 |
} |
201 |
rsst.close(); |
202 |
stmt.close(); |
203 |
return false; |
204 |
} |
205 |
|
206 |
private void read_atomData() throws SQLException{ |
207 |
resser_atom2atomserial = new HashMap<String,Integer>(); |
208 |
resser2restype = new HashMap<Integer,String>(); |
209 |
atomser2coord = new HashMap<Integer,Point3d>(); |
210 |
atomser2resser = new HashMap<Integer,Integer>(); |
211 |
|
212 |
String sql = "SELECT serial,chem_atom_name,code_3_letter,residue_serial,x,y,z " + |
213 |
" FROM "+db+".atom_data " + |
214 |
" WHERE (model_id = "+modelid+") " + |
215 |
" AND (chain_id = "+chainid+") " + |
216 |
" AND (graph_alt_code_used = 1) " + |
217 |
" AND (graph_standard_aa=1) " + |
218 |
" AND (pdb_group = 'A')" + |
219 |
" ORDER BY chain_code, residue_serial, serial"; |
220 |
|
221 |
Statement stmt = conn.createStatement(); |
222 |
ResultSet rsst = stmt.executeQuery(sql); |
223 |
int count=0; |
224 |
while (rsst.next()){ |
225 |
count++; |
226 |
|
227 |
int atomserial = rsst.getInt(1); // atomserial |
228 |
String atom = rsst.getString(2).trim(); // atom |
229 |
String res_type = rsst.getString(3).trim(); // res_type |
230 |
int res_serial = rsst.getInt(4); // res_serial |
231 |
double x = rsst.getDouble(5); // x |
232 |
double y = rsst.getDouble(6); // y |
233 |
double z = rsst.getDouble(7); // z |
234 |
Point3d coords = new Point3d(x, y, z); |
235 |
ArrayList<String> aalist=AA.aas(); |
236 |
if (aalist.contains(res_type)) { |
237 |
atomser2coord.put(atomserial, coords); |
238 |
atomser2resser.put(atomserial, res_serial); |
239 |
resser2restype.put(res_serial, res_type); |
240 |
ArrayList<String> atomlist = aas2atoms.get(res_type); |
241 |
if (atomlist.contains(atom)){ |
242 |
resser_atom2atomserial.put(res_serial+"_"+atom, atomserial); |
243 |
} |
244 |
} |
245 |
|
246 |
|
247 |
} |
248 |
if (count==0){ |
249 |
System.err.println("atom data query returned no data at all for model_id="+modelid+", model_id="+modelid); |
250 |
} |
251 |
rsst.close(); |
252 |
stmt.close(); |
253 |
} |
254 |
|
255 |
private String read_seq() throws SQLException{ |
256 |
String allresseq=""; |
257 |
String sql="SELECT all_res_seq FROM "+myMsdsdDb+".chain_seq WHERE chain_id="+chainid; |
258 |
|
259 |
Statement stmt = conn.createStatement(); |
260 |
ResultSet rsst = stmt.executeQuery(sql); |
261 |
int check = 0; |
262 |
if (rsst.next()) { |
263 |
check++; |
264 |
allresseq=rsst.getString(1); |
265 |
} |
266 |
if (check!=1) { |
267 |
System.err.println("No all_res_seq match or more than 1 match for accession_code="+pdbCode+", chain_pdb_code="+pdbChainCode+", chain_id="+chainid); |
268 |
} |
269 |
rsst.close(); |
270 |
stmt.close(); |
271 |
|
272 |
return allresseq; |
273 |
} |
274 |
|
275 |
private HashMap<String,Integer> get_ressers_mapping() throws SQLException { |
276 |
HashMap<String,Integer> map = new HashMap<String, Integer>(); |
277 |
String sql="SELECT serial, concat(pdb_seq,IF(pdb_insert_code IS NULL,'',pdb_insert_code)) " + |
278 |
" FROM "+db+".residue " + |
279 |
" WHERE chain_id="+chainid+ |
280 |
" AND pdb_seq IS NOT NULL"; |
281 |
|
282 |
Statement stmt = conn.createStatement(); |
283 |
ResultSet rsst = stmt.executeQuery(sql); |
284 |
int count=0; |
285 |
while (rsst.next()) { |
286 |
count++; |
287 |
int resser = rsst.getInt(1); |
288 |
String pdbresser = rsst.getString(2); |
289 |
map.put(pdbresser, resser); |
290 |
} |
291 |
if (count==0) { |
292 |
System.err.println("No residue serials mapping data match for chain_id="+chainid); |
293 |
} |
294 |
rsst.close(); |
295 |
stmt.close(); |
296 |
|
297 |
return map; |
298 |
} |
299 |
|
300 |
private void readSecStructure() throws SQLException{ |
301 |
this.resser2secstruct = new HashMap<Integer, String>(); |
302 |
this.secstruct2resinterval = new TreeMap<String, Interval>(); |
303 |
|
304 |
//HELIX -- helix table |
305 |
String sql = "SELECT helix_serial, beg_residue_serial, end_residue_serial " + |
306 |
" FROM "+db+".helix " + |
307 |
" WHERE (model_id = "+modelid+") " + |
308 |
" AND (chain_id = "+chainid+") "; |
309 |
Statement stmt = conn.createStatement(); |
310 |
ResultSet rsst = stmt.executeQuery(sql); |
311 |
int count=0; |
312 |
while (rsst.next()) { |
313 |
count++; |
314 |
int serial = rsst.getInt(1); |
315 |
int beg = rsst.getInt(2); |
316 |
int end =rsst.getInt(3); |
317 |
String ssId = "H"+serial; |
318 |
secstruct2resinterval.put(ssId, new Interval(beg,end)); |
319 |
for (int i=beg;i<=end;i++){ |
320 |
if (resser2secstruct.containsKey(i)){ // if already assigned we print a warning and then assign it |
321 |
//System.err.println("Inconsistency in secondary structure assignment. " + |
322 |
// "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId); |
323 |
} |
324 |
resser2secstruct.put(i,ssId); |
325 |
} |
326 |
} |
327 |
rsst.close(); |
328 |
stmt.close(); |
329 |
//SHEET -- strand table |
330 |
sql = "SELECT sheet_serial, strand_serial, strand_beg_residue_serial, strand_end_residue_serial " + |
331 |
" FROM "+db+".strand " + |
332 |
" WHERE (model_id = "+modelid+") " + |
333 |
" AND (chain_id = "+chainid+") "; |
334 |
stmt = conn.createStatement(); |
335 |
rsst = stmt.executeQuery(sql); |
336 |
// we store everything in these 2 maps to assign later to resser2secstruct based on our own ids (ids are not very consistent in msdsd) |
337 |
HashMap<Integer,Interval> strands2begEnd = new HashMap<Integer, Interval>(); |
338 |
TreeMap<Integer,ArrayList<Integer>> sheets2strands = new TreeMap<Integer, ArrayList<Integer>>(); |
339 |
count=0; |
340 |
while (rsst.next()) { |
341 |
count++; |
342 |
int sheetSerial = rsst.getInt(1); |
343 |
int strandSerial = rsst.getInt(2); |
344 |
int beg = rsst.getInt(3); |
345 |
int end =rsst.getInt(4); |
346 |
strands2begEnd.put(strandSerial, new Interval(beg,end)); |
347 |
if (sheets2strands.containsKey(sheetSerial)){ |
348 |
sheets2strands.get(sheetSerial).add(strandSerial); |
349 |
} else { |
350 |
ArrayList<Integer> strands = new ArrayList<Integer>(); |
351 |
strands.add(strandSerial); |
352 |
sheets2strands.put(sheetSerial, strands); |
353 |
} |
354 |
} |
355 |
rsst.close(); |
356 |
stmt.close(); |
357 |
char sheet='A'; |
358 |
for (int sheetSerial:sheets2strands.keySet()){ |
359 |
int strand=1; |
360 |
for (int strandSerial:sheets2strands.get(sheetSerial)){ |
361 |
Interval begEnd = strands2begEnd.get(strandSerial); |
362 |
for (int i=begEnd.beg;i<=begEnd.end;i++){ |
363 |
String ssId = "S"+sheet+strand; |
364 |
secstruct2resinterval.put(ssId, begEnd); |
365 |
if (resser2secstruct.containsKey(i)){ // if already assigned we print a warning and then assign it |
366 |
//System.err.println("Inconsistency in secondary structure assignment. " + |
367 |
// "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId); |
368 |
} |
369 |
resser2secstruct.put(i,ssId); |
370 |
} |
371 |
strand++; |
372 |
} |
373 |
sheet++; |
374 |
} |
375 |
|
376 |
//TURN -- turn table |
377 |
// they forgot to fill up the turn_serial field so we have to use turn_id and get a serial from it that is unique within the chain only |
378 |
sql = "SELECT turn_id, res_1_residue_serial, res_2_residue_serial, res_3_residue_serial, res_4_residue_serial " + |
379 |
" FROM "+db+".turn " + |
380 |
" WHERE (model_id = "+modelid+") " + |
381 |
" AND (chain_id = "+chainid+") "; |
382 |
stmt = conn.createStatement(); |
383 |
rsst = stmt.executeQuery(sql); |
384 |
TreeMap<Integer,ArrayList<Integer>> turns = new TreeMap<Integer, ArrayList<Integer>>(); |
385 |
count=0; |
386 |
while (rsst.next()) { |
387 |
count++; |
388 |
int dbId = rsst.getInt(1); |
389 |
int res1 = rsst.getInt(2); |
390 |
int res2 = rsst.getInt(3); |
391 |
int res3 = rsst.getInt(4); |
392 |
int res4 = rsst.getInt(5); |
393 |
ArrayList<Integer> residues = new ArrayList<Integer>(); |
394 |
if (res1!=0) residues.add(res1); // res is 0 when the field is NULL in database |
395 |
if (res2!=0) residues.add(res2); |
396 |
if (res3!=0) residues.add(res3); |
397 |
if (res4!=0) residues.add(res4); |
398 |
turns.put(dbId, residues); |
399 |
} |
400 |
rsst.close(); |
401 |
stmt.close(); |
402 |
int serial=1; |
403 |
for (int dbId:turns.keySet()){ |
404 |
String ssId="T"+serial; |
405 |
int beg = Collections.min(turns.get(dbId)); |
406 |
int end = Collections.max(turns.get(dbId)); |
407 |
secstruct2resinterval.put(ssId, new Interval(beg,end)); |
408 |
for (int i:turns.get(dbId)){ |
409 |
if (resser2secstruct.containsKey(i)){ // if already assigned we print a warning and then assign it |
410 |
//System.err.println("Inconsistency in secondary structure assignment. " + |
411 |
// "Residue "+i+" is getting reassigned from "+resser2secstruct.get(i)+" to "+ssId); |
412 |
} |
413 |
resser2secstruct.put(i,ssId); |
414 |
} |
415 |
serial++; |
416 |
} |
417 |
|
418 |
} |
419 |
} |