1 |
package tools; |
2 |
|
3 |
import java.io.*; |
4 |
import java.sql.SQLException; |
5 |
import java.sql.Statement; |
6 |
import java.sql.ResultSet; |
7 |
|
8 |
/** |
9 |
* Package: tools |
10 |
* Class: Msdsd2Pdb |
11 |
* Author: Ioannis Filippis, filippis@molgen.mpg.de |
12 |
* Date: 21/03/2006 |
13 |
* |
14 |
* Msdsd2Pdb's static export2File method creates a pdb file by exporting the atom |
15 |
* lines directly from the msdsd. This is needed for the visualization of the |
16 |
* biological units since pdb files contain the ASUs. Moreover, contact graphs |
17 |
* are defined using msdsd-custom fields (like chain code and residue serial) |
18 |
* and mapping to pdb fields would be necessary for the graph visualisation |
19 |
* if the original pdb files were preferred. |
20 |
* |
21 |
* Notes: |
22 |
* - Hetatoms are excluded (pdb_group = "A") and in case of multiple locations of |
23 |
* amino acids, only the default location is considered (graph_alt_code_used = 1) |
24 |
* (VendruscoloM_00_PSFG.pdf currently found in LitNet/incomingPDF/LAST_ROUND/) |
25 |
* - There is also the possibility to send the atom lines directly to PyMol and |
26 |
* loading the structure without intermediate files. Look at PyMol class and |
27 |
* sendAtomLines method. |
28 |
* |
29 |
* Changelog: |
30 |
* 21/03/06 first created by IF |
31 |
* 02/03/07 JD, Major changes: adapted to msdsd_00_07_a and using my_msdsd_00_07_a. |
32 |
* Using MySQLConnection and file PrintStream instead of shell mysql client for output |
33 |
*/ |
34 |
|
35 |
public class Msdsd2Pdb { |
36 |
|
37 |
|
38 |
public static String MSDSDDB="msdsd_00_07_a"; |
39 |
public static String INFODB="my_msdsd_00_07_a"; |
40 |
public static String HOST="white"; |
41 |
public static String PWD="nieve"; |
42 |
|
43 |
|
44 |
/** |
45 |
* Exports to file in pdb format the atom lines of a model (modelId) of a biological unit (assemblyId) |
46 |
* of a protein (accessionCode) directly from msdsd. |
47 |
* |
48 |
* Notes: |
49 |
* - Hetatoms are excluded (pdb_group = "A") and in case of multiple locations of |
50 |
* amino acids, only the default location is considered (graph_alt_code_used = 1) |
51 |
* (VendruscoloM_00_PSFG.pdf currently found in LitNet/incomingPDF/LAST_ROUND/) |
52 |
* - The chain_pdb_code is used in the chainID field in the atom line, while the chain_code is used |
53 |
* the segID field (due to its length). Therefore, "segi" and not "chain" must be used in pymol |
54 |
* selections. |
55 |
* |
56 |
* @param accessionCode |
57 |
* @param assemblyId |
58 |
* @param modelId |
59 |
* @param pdbFile |
60 |
* @param user |
61 |
*/ |
62 |
public static void export2File(String accessionCode, int assemblyId, int modelId, String pdbFile, String user) throws FileNotFoundException{ |
63 |
PrintStream Pdb = new PrintStream(new FileOutputStream(pdbFile)); |
64 |
MySQLConnection conn = new MySQLConnection(HOST,user,PWD,MSDSDDB); |
65 |
|
66 |
String query = "SELECT CONCAT("+ |
67 |
"RPAD(\"ATOM\", 6, \" \"), "+ |
68 |
"LPAD(serial, 5, \" \"), "+ |
69 |
"\" \", "+ |
70 |
"LPAD(chem_atom_name, 4, \" \"), "+ |
71 |
"IF(alt_code IS NULL, \" \", alt_code), "+ |
72 |
"code_3_letter, "+ |
73 |
"\" \", "+ |
74 |
"IF(chain_pdb_code IS NULL, \" \", chain_pdb_code), "+ |
75 |
"LPAD(residue_serial, 4, \" \"), "+ |
76 |
"IF(residue_pdb_insert_code IS NULL, \" \", residue_pdb_insert_code), "+ |
77 |
"REPEAT(\" \", 3), "+ |
78 |
"LPAD(x, 8, \" \"), "+ |
79 |
"LPAD(y, 8, \" \"), "+ |
80 |
"LPAD(z, 8, \" \"), "+ |
81 |
"LPAD(occupancy, 6, \" \"), "+ |
82 |
"REPEAT(\" \", 6), "+ |
83 |
"REPEAT(\" \", 6), "+ |
84 |
"RPAD(chain_code, 4, \" \") "+ |
85 |
") AS atom_lines FROM "+MSDSDDB+".atom_data WHERE "+ |
86 |
"(assembly_id = "+assemblyId+") AND "+ |
87 |
"(model_id = "+modelId+") AND "+ |
88 |
"(graph_alt_code_used = 1) AND "+ |
89 |
"(pdb_group = \"A\") "+ |
90 |
"ORDER BY chain_code, residue_serial, serial;"; |
91 |
|
92 |
try { |
93 |
Statement stmt = conn.createStatement(); |
94 |
ResultSet rsst = stmt.executeQuery(query); |
95 |
while (rsst.next()) { |
96 |
Pdb.println(rsst.getString(1)); |
97 |
} |
98 |
stmt.close(); |
99 |
rsst.close(); |
100 |
} catch (SQLException e) { |
101 |
e.printStackTrace(); |
102 |
} |
103 |
Pdb.close(); |
104 |
conn.close(); |
105 |
} |
106 |
|
107 |
|
108 |
/** |
109 |
* Exports to file in pdb format the atom lines of the assymetric unit of a protein (accessionCode) |
110 |
* (all chains) directly from msdsd. |
111 |
* |
112 |
* Notes: |
113 |
* - Hetatoms are excluded (pdb_group = "A") and in case of multiple locations of |
114 |
* amino acids, only the default location is considered (graph_alt_code_used = 1) |
115 |
* (VendruscoloM_00_PSFG.pdf currently found in LitNet/incomingPDF/LAST_ROUND/) |
116 |
* - The chain_pdb_code is used in the chainID field in the atom line, while the chain_code is used |
117 |
* the segID field (due to its length). Therefore, "segi" and not "chain" must be used in pymol |
118 |
* selections. |
119 |
* |
120 |
* @param accessionCode |
121 |
* @param pdbFile |
122 |
* @param user |
123 |
*/ |
124 |
public static void export2File(String accessionCode, String pdbFile, String user) throws FileNotFoundException { |
125 |
PrintStream Pdb = new PrintStream(new FileOutputStream(pdbFile)); |
126 |
MySQLConnection conn = new MySQLConnection(HOST,user,PWD,MSDSDDB); |
127 |
|
128 |
String query = "SELECT CONCAT("+ |
129 |
"RPAD(\"ATOM\", 6, \" \"), "+ |
130 |
"LPAD(serial, 5, \" \"), "+ |
131 |
"\" \", "+ |
132 |
"LPAD(chem_atom_name, 4, \" \"), "+ |
133 |
"IF(alt_code IS NULL, \" \", alt_code), "+ |
134 |
"code_3_letter, "+ |
135 |
"\" \", "+ |
136 |
"IF(chain_pdb_code IS NULL, \" \", chain_pdb_code), "+ |
137 |
"LPAD(residue_serial, 4, \" \"), "+ |
138 |
"IF(residue_pdb_insert_code IS NULL, \" \", residue_pdb_insert_code), "+ |
139 |
"REPEAT(\" \", 3), "+ |
140 |
"LPAD(x, 8, \" \"), "+ |
141 |
"LPAD(y, 8, \" \"), "+ |
142 |
"LPAD(z, 8, \" \"), "+ |
143 |
"LPAD(occupancy, 6, \" \"), "+ |
144 |
"REPEAT(\" \", 6), "+ |
145 |
"REPEAT(\" \", 6), "+ |
146 |
"RPAD(chain_code, 4, \" \") "+ |
147 |
") AS atom_lines FROM "+MSDSDDB+".atom_data WHERE "+ |
148 |
"(accession_code = \""+accessionCode+"\") AND "+ |
149 |
"(non_assembly_valid = \"Y\") AND "+ |
150 |
"(graph_alt_code_used = 1) AND "+ |
151 |
"(pdb_group = \"A\") "+ |
152 |
"ORDER BY chain_code, residue_serial, serial;"; |
153 |
try { |
154 |
Statement stmt = conn.createStatement(); |
155 |
ResultSet rsst = stmt.executeQuery(query); |
156 |
while (rsst.next()) { |
157 |
Pdb.println(rsst.getString(1)); |
158 |
} |
159 |
stmt.close(); |
160 |
rsst.close(); |
161 |
} catch (SQLException e) { |
162 |
e.printStackTrace(); |
163 |
} |
164 |
Pdb.close(); |
165 |
conn.close(); |
166 |
} |
167 |
|
168 |
/** |
169 |
* Exports to file in pdb format the atom coordinates for the assymetric unit of a protein given model_id and chain_id |
170 |
* @param chainId |
171 |
* @param modelId |
172 |
* @param pdbFile |
173 |
* @param user |
174 |
* @return |
175 |
*/ |
176 |
public static void export2File(int chainId, int modelId, String pdbFile, String user) throws FileNotFoundException { |
177 |
PrintStream Pdb = new PrintStream(new FileOutputStream(pdbFile)); |
178 |
MySQLConnection conn = new MySQLConnection(HOST,user,PWD,MSDSDDB); |
179 |
|
180 |
String query = "SELECT CONCAT("+ |
181 |
"RPAD(\"ATOM\", 6, \" \"), "+ |
182 |
"LPAD(serial, 5, \" \"), "+ |
183 |
"\" \", "+ |
184 |
"LPAD(chem_atom_name, 4, \" \"), "+ |
185 |
"IF(alt_code IS NULL, \" \", alt_code), "+ |
186 |
"code_3_letter, "+ |
187 |
"\" \", "+ |
188 |
"IF(chain_pdb_code IS NULL, \" \", chain_pdb_code), "+ |
189 |
"LPAD(residue_serial, 4, \" \"), "+// check if this is msd or pdb residue serials, do we care? |
190 |
"IF(residue_pdb_insert_code IS NULL, \" \", residue_pdb_insert_code), "+ |
191 |
"REPEAT(\" \", 3), "+ |
192 |
"LPAD(x, 8, \" \"), "+ |
193 |
"LPAD(y, 8, \" \"), "+ |
194 |
"LPAD(z, 8, \" \"), "+ |
195 |
"LPAD(occupancy, 6, \" \"), "+ |
196 |
"REPEAT(\" \", 6), "+ |
197 |
"REPEAT(\" \", 6), "+ |
198 |
"RPAD(chain_code, 4, \" \") "+ |
199 |
") AS atom_lines FROM "+MSDSDDB+".atom_data WHERE "+ |
200 |
"(model_id = "+modelId+") AND "+ |
201 |
"(chain_id = "+chainId+") AND "+ |
202 |
"(graph_alt_code_used = 1) AND "+ |
203 |
"(pdb_group = \"A\") "+ |
204 |
"ORDER BY chain_code, residue_serial, serial;"; |
205 |
try { |
206 |
Statement stmt = conn.createStatement(); |
207 |
ResultSet rsst = stmt.executeQuery(query); |
208 |
while (rsst.next()) { |
209 |
Pdb.println(rsst.getString(1)); |
210 |
} |
211 |
stmt.close(); |
212 |
rsst.close(); |
213 |
} catch (SQLException e) { |
214 |
e.printStackTrace(); |
215 |
} |
216 |
Pdb.close(); |
217 |
conn.close(); |
218 |
} |
219 |
|
220 |
/** |
221 |
* Exports to file in pdb format the atom coordinates for the assymetric unit of a protein given an accession_code and chain_pdb_code |
222 |
* (if NMR just the model with model_serial=1) |
223 |
* @param accessionCode |
224 |
* @param chainPdbCode |
225 |
* @param pdbFile |
226 |
* @param user |
227 |
* @return |
228 |
*/ |
229 |
public static void export2File(String accessionCode, String chainPdbCode, String pdbFile, String user) throws FileNotFoundException{ |
230 |
MySQLConnection conn = new MySQLConnection(HOST,user,PWD,MSDSDDB); |
231 |
int chainId=0; |
232 |
int modelId=0; |
233 |
String chainStr="='"+chainPdbCode+"'"; |
234 |
if (chainPdbCode.equals("NULL")) { |
235 |
chainStr="IS NULL"; |
236 |
} |
237 |
String query = "SELECT chain_id, model_id " + |
238 |
"FROM "+INFODB+".mmol_chain_info " + |
239 |
"WHERE accession_code='"+accessionCode+"' " + |
240 |
"AND chain_pdb_code " + chainStr + |
241 |
" AND chain_type='C' " + |
242 |
"AND asu_chain=1 " + |
243 |
"AND model_serial=1;"; |
244 |
try { |
245 |
Statement stmt = conn.createStatement(); |
246 |
ResultSet rsst = stmt.executeQuery(query); |
247 |
while (rsst.next()) { |
248 |
chainId=rsst.getInt(1); |
249 |
modelId=rsst.getInt(2); |
250 |
} |
251 |
stmt.close(); |
252 |
rsst.close(); |
253 |
} catch (SQLException e) { |
254 |
e.printStackTrace(); |
255 |
} |
256 |
conn.close(); |
257 |
export2File(chainId,modelId,pdbFile,user); |
258 |
} |
259 |
|
260 |
|
261 |
|
262 |
|
263 |
} // end of class Msdsd2Pdb |