ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/tools/Msdsd2Pdb.java
Revision: 115
Committed: Fri Mar 2 13:53:04 2007 UTC (17 years, 7 months ago) by duarte
File size: 9196 byte(s)
Log Message:
Adapted to msdsd_00_07_a
Now uses MySQLConnection and PrintWriters and not mysql client through shell (got rid of that method)
Thus all methos taking new arguments user and pdbFile
New methods:
 export2file taking accession_code, chain_pdb_code 
 export2file taking chain_id, model_id

Line File contents
1 package tools;
2
3 import java.io.*;
4 import java.sql.SQLException;
5 import java.sql.Statement;
6 import java.sql.ResultSet;
7
8 /**
9 * Package: tools
10 * Class: Msdsd2Pdb
11 * Author: Ioannis Filippis, filippis@molgen.mpg.de
12 * Date: 21/03/2006
13 *
14 * Msdsd2Pdb's static export2File method creates a pdb file by exporting the atom
15 * lines directly from the msdsd. This is needed for the visualization of the
16 * biological units since pdb files contain the ASUs. Moreover, contact graphs
17 * are defined using msdsd-custom fields (like chain code and residue serial)
18 * and mapping to pdb fields would be necessary for the graph visualisation
19 * if the original pdb files were preferred.
20 *
21 * Notes:
22 * - Hetatoms are excluded (pdb_group = "A") and in case of multiple locations of
23 * amino acids, only the default location is considered (graph_alt_code_used = 1)
24 * (VendruscoloM_00_PSFG.pdf currently found in LitNet/incomingPDF/LAST_ROUND/)
25 * - There is also the possibility to send the atom lines directly to PyMol and
26 * loading the structure without intermediate files. Look at PyMol class and
27 * sendAtomLines method.
28 *
29 * Changelog:
30 * 21/03/06 first created by IF
31 * 02/03/07 JD, Major changes: adapted to msdsd_00_07_a and using my_msdsd_00_07_a.
32 * Using MySQLConnection and file PrintStream instead of shell mysql client for output
33 */
34
35 public class Msdsd2Pdb {
36
37
38 public static String MSDSDDB="msdsd_00_07_a";
39 public static String INFODB="my_msdsd_00_07_a";
40 public static String HOST="white";
41 public static String PWD="nieve";
42
43
44 /**
45 * Exports to file in pdb format the atom lines of a model (modelId) of a biological unit (assemblyId)
46 * of a protein (accessionCode) directly from msdsd.
47 *
48 * Notes:
49 * - Hetatoms are excluded (pdb_group = "A") and in case of multiple locations of
50 * amino acids, only the default location is considered (graph_alt_code_used = 1)
51 * (VendruscoloM_00_PSFG.pdf currently found in LitNet/incomingPDF/LAST_ROUND/)
52 * - The chain_pdb_code is used in the chainID field in the atom line, while the chain_code is used
53 * the segID field (due to its length). Therefore, "segi" and not "chain" must be used in pymol
54 * selections.
55 *
56 * @param accessionCode
57 * @param assemblyId
58 * @param modelId
59 * @param pdbFile
60 * @param user
61 */
62 public static void export2File(String accessionCode, int assemblyId, int modelId, String pdbFile, String user) throws FileNotFoundException{
63 PrintStream Pdb = new PrintStream(new FileOutputStream(pdbFile));
64 MySQLConnection conn = new MySQLConnection(HOST,user,PWD,MSDSDDB);
65
66 String query = "SELECT CONCAT("+
67 "RPAD(\"ATOM\", 6, \" \"), "+
68 "LPAD(serial, 5, \" \"), "+
69 "\" \", "+
70 "LPAD(chem_atom_name, 4, \" \"), "+
71 "IF(alt_code IS NULL, \" \", alt_code), "+
72 "code_3_letter, "+
73 "\" \", "+
74 "IF(chain_pdb_code IS NULL, \" \", chain_pdb_code), "+
75 "LPAD(residue_serial, 4, \" \"), "+
76 "IF(residue_pdb_insert_code IS NULL, \" \", residue_pdb_insert_code), "+
77 "REPEAT(\" \", 3), "+
78 "LPAD(x, 8, \" \"), "+
79 "LPAD(y, 8, \" \"), "+
80 "LPAD(z, 8, \" \"), "+
81 "LPAD(occupancy, 6, \" \"), "+
82 "REPEAT(\" \", 6), "+
83 "REPEAT(\" \", 6), "+
84 "RPAD(chain_code, 4, \" \") "+
85 ") AS atom_lines FROM "+MSDSDDB+".atom_data WHERE "+
86 "(assembly_id = "+assemblyId+") AND "+
87 "(model_id = "+modelId+") AND "+
88 "(graph_alt_code_used = 1) AND "+
89 "(pdb_group = \"A\") "+
90 "ORDER BY chain_code, residue_serial, serial;";
91
92 try {
93 Statement stmt = conn.createStatement();
94 ResultSet rsst = stmt.executeQuery(query);
95 while (rsst.next()) {
96 Pdb.println(rsst.getString(1));
97 }
98 stmt.close();
99 rsst.close();
100 } catch (SQLException e) {
101 e.printStackTrace();
102 }
103 Pdb.close();
104 conn.close();
105 }
106
107
108 /**
109 * Exports to file in pdb format the atom lines of the assymetric unit of a protein (accessionCode)
110 * (all chains) directly from msdsd.
111 *
112 * Notes:
113 * - Hetatoms are excluded (pdb_group = "A") and in case of multiple locations of
114 * amino acids, only the default location is considered (graph_alt_code_used = 1)
115 * (VendruscoloM_00_PSFG.pdf currently found in LitNet/incomingPDF/LAST_ROUND/)
116 * - The chain_pdb_code is used in the chainID field in the atom line, while the chain_code is used
117 * the segID field (due to its length). Therefore, "segi" and not "chain" must be used in pymol
118 * selections.
119 *
120 * @param accessionCode
121 * @param pdbFile
122 * @param user
123 */
124 public static void export2File(String accessionCode, String pdbFile, String user) throws FileNotFoundException {
125 PrintStream Pdb = new PrintStream(new FileOutputStream(pdbFile));
126 MySQLConnection conn = new MySQLConnection(HOST,user,PWD,MSDSDDB);
127
128 String query = "SELECT CONCAT("+
129 "RPAD(\"ATOM\", 6, \" \"), "+
130 "LPAD(serial, 5, \" \"), "+
131 "\" \", "+
132 "LPAD(chem_atom_name, 4, \" \"), "+
133 "IF(alt_code IS NULL, \" \", alt_code), "+
134 "code_3_letter, "+
135 "\" \", "+
136 "IF(chain_pdb_code IS NULL, \" \", chain_pdb_code), "+
137 "LPAD(residue_serial, 4, \" \"), "+
138 "IF(residue_pdb_insert_code IS NULL, \" \", residue_pdb_insert_code), "+
139 "REPEAT(\" \", 3), "+
140 "LPAD(x, 8, \" \"), "+
141 "LPAD(y, 8, \" \"), "+
142 "LPAD(z, 8, \" \"), "+
143 "LPAD(occupancy, 6, \" \"), "+
144 "REPEAT(\" \", 6), "+
145 "REPEAT(\" \", 6), "+
146 "RPAD(chain_code, 4, \" \") "+
147 ") AS atom_lines FROM "+MSDSDDB+".atom_data WHERE "+
148 "(accession_code = \""+accessionCode+"\") AND "+
149 "(non_assembly_valid = \"Y\") AND "+
150 "(graph_alt_code_used = 1) AND "+
151 "(pdb_group = \"A\") "+
152 "ORDER BY chain_code, residue_serial, serial;";
153 try {
154 Statement stmt = conn.createStatement();
155 ResultSet rsst = stmt.executeQuery(query);
156 while (rsst.next()) {
157 Pdb.println(rsst.getString(1));
158 }
159 stmt.close();
160 rsst.close();
161 } catch (SQLException e) {
162 e.printStackTrace();
163 }
164 Pdb.close();
165 conn.close();
166 }
167
168 /**
169 * Exports to file in pdb format the atom coordinates for the assymetric unit of a protein given model_id and chain_id
170 * @param chainId
171 * @param modelId
172 * @param pdbFile
173 * @param user
174 * @return
175 */
176 public static void export2File(int chainId, int modelId, String pdbFile, String user) throws FileNotFoundException {
177 PrintStream Pdb = new PrintStream(new FileOutputStream(pdbFile));
178 MySQLConnection conn = new MySQLConnection(HOST,user,PWD,MSDSDDB);
179
180 String query = "SELECT CONCAT("+
181 "RPAD(\"ATOM\", 6, \" \"), "+
182 "LPAD(serial, 5, \" \"), "+
183 "\" \", "+
184 "LPAD(chem_atom_name, 4, \" \"), "+
185 "IF(alt_code IS NULL, \" \", alt_code), "+
186 "code_3_letter, "+
187 "\" \", "+
188 "IF(chain_pdb_code IS NULL, \" \", chain_pdb_code), "+
189 "LPAD(residue_serial, 4, \" \"), "+// check if this is msd or pdb residue serials, do we care?
190 "IF(residue_pdb_insert_code IS NULL, \" \", residue_pdb_insert_code), "+
191 "REPEAT(\" \", 3), "+
192 "LPAD(x, 8, \" \"), "+
193 "LPAD(y, 8, \" \"), "+
194 "LPAD(z, 8, \" \"), "+
195 "LPAD(occupancy, 6, \" \"), "+
196 "REPEAT(\" \", 6), "+
197 "REPEAT(\" \", 6), "+
198 "RPAD(chain_code, 4, \" \") "+
199 ") AS atom_lines FROM "+MSDSDDB+".atom_data WHERE "+
200 "(model_id = "+modelId+") AND "+
201 "(chain_id = "+chainId+") AND "+
202 "(graph_alt_code_used = 1) AND "+
203 "(pdb_group = \"A\") "+
204 "ORDER BY chain_code, residue_serial, serial;";
205 try {
206 Statement stmt = conn.createStatement();
207 ResultSet rsst = stmt.executeQuery(query);
208 while (rsst.next()) {
209 Pdb.println(rsst.getString(1));
210 }
211 stmt.close();
212 rsst.close();
213 } catch (SQLException e) {
214 e.printStackTrace();
215 }
216 Pdb.close();
217 conn.close();
218 }
219
220 /**
221 * Exports to file in pdb format the atom coordinates for the assymetric unit of a protein given an accession_code and chain_pdb_code
222 * (if NMR just the model with model_serial=1)
223 * @param accessionCode
224 * @param chainPdbCode
225 * @param pdbFile
226 * @param user
227 * @return
228 */
229 public static void export2File(String accessionCode, String chainPdbCode, String pdbFile, String user) throws FileNotFoundException{
230 MySQLConnection conn = new MySQLConnection(HOST,user,PWD,MSDSDDB);
231 int chainId=0;
232 int modelId=0;
233 String chainStr="='"+chainPdbCode+"'";
234 if (chainPdbCode.equals("NULL")) {
235 chainStr="IS NULL";
236 }
237 String query = "SELECT chain_id, model_id " +
238 "FROM "+INFODB+".mmol_chain_info " +
239 "WHERE accession_code='"+accessionCode+"' " +
240 "AND chain_pdb_code " + chainStr +
241 "AND chain_type='C' " +
242 "AND asu_chain=1 " +
243 "AND model_serial=1;";
244 try {
245 Statement stmt = conn.createStatement();
246 ResultSet rsst = stmt.executeQuery(query);
247 while (rsst.next()) {
248 chainId=rsst.getInt(1);
249 modelId=rsst.getInt(2);
250 }
251 stmt.close();
252 rsst.close();
253 } catch (SQLException e) {
254 e.printStackTrace();
255 }
256 conn.close();
257 export2File(chainId,modelId,pdbFile,user);
258 }
259
260
261
262
263 } // end of class Msdsd2Pdb