1 |
stehr |
303 |
import java.io.*; |
2 |
stehr |
250 |
import java.sql.*; |
3 |
|
|
import java.util.*; |
4 |
|
|
import proteinstructure.*; |
5 |
|
|
import tools.MySQLConnection; |
6 |
|
|
|
7 |
stehr |
303 |
// TODO: |
8 |
|
|
// - how will the distribution look like for randomly drawing from the unit square? (use Ioannis' random graphs?) |
9 |
|
|
// - what is the maximum and expected number of points per square? how does that translate into runtime |
10 |
|
|
// Problems: |
11 |
|
|
// - surface effect (distinguish surface/core) |
12 |
|
|
// - translation of grid (-> kdtrees) |
13 |
|
|
// Space of grid: |
14 |
|
|
// - when assuming spherical proteins and constant density, grid size grows linearly |
15 |
|
|
// - in the worst case of completely linear proteins, grid size growth is cubic |
16 |
|
|
// - for the protein case, do benchmark (to prove linear runtime, estimate runtime constants and space exponent) |
17 |
|
|
// - test whether rotating (by using principle components?) improves runtime/space |
18 |
stehr |
250 |
|
19 |
|
|
public class calculateGridDensity { |
20 |
|
|
|
21 |
|
|
/*------------------------------ constants ------------------------------*/ |
22 |
|
|
|
23 |
|
|
// database with a list of pdb codes and chain codes to process |
24 |
|
|
public static String DB_NAME = "pdb_reps"; |
25 |
|
|
public static String DB_TABLE = "reps"; |
26 |
|
|
public static String DB_COL_PDB = "accession_code"; |
27 |
|
|
public static String DB_COL_CHAIN = "chain_pdb_code"; |
28 |
|
|
|
29 |
|
|
public static String PDB_DB = "pdbase"; |
30 |
|
|
public static String DB_HOST = "white"; |
31 |
|
|
public static String DB_USER = getUserName(); |
32 |
|
|
public static String DB_PWD = "nieve"; |
33 |
|
|
|
34 |
|
|
public static String PDB_CODE = "1tdr"; |
35 |
|
|
public static String CHAIN_CODE = "B"; |
36 |
|
|
public static String edgeType = "Ca"; |
37 |
stehr |
303 |
public static double cutoff_from = 4.0; |
38 |
|
|
public static double cutoff_to = 15.0; |
39 |
|
|
public static double cutoff_step = 1.0; |
40 |
|
|
public static int limit = 100; |
41 |
stehr |
250 |
|
42 |
stehr |
303 |
public static String outFileName = "grid_nbs_pdbreps100"; |
43 |
|
|
|
44 |
stehr |
250 |
/*---------------------------- private methods --------------------------*/ |
45 |
|
|
/** |
46 |
|
|
* Get user name from operating system (for use as database username). |
47 |
|
|
* */ |
48 |
|
|
private static String getUserName() { |
49 |
|
|
String user = null; |
50 |
|
|
user = System.getProperty("user.name"); |
51 |
|
|
if(user == null) { |
52 |
|
|
System.err.println("Could not get user name from operating system."); |
53 |
|
|
} |
54 |
|
|
return user; |
55 |
|
|
} |
56 |
|
|
|
57 |
|
|
private static void calcDensity(String pdbCode, String chainCode, double cutoff, String egdeType, MySQLConnection conn, Map<Integer, Integer> densityCount) { |
58 |
|
|
Pdb pdb = null; |
59 |
|
|
try { |
60 |
|
|
pdb = new PdbasePdb(pdbCode, chainCode, PDB_DB, conn); |
61 |
|
|
// add to density count vector |
62 |
|
|
pdb.calcGridDensity(edgeType, cutoff, densityCount); |
63 |
|
|
|
64 |
|
|
} catch (PdbaseInconsistencyError e) { |
65 |
|
|
System.out.println("Inconsistency in " + pdbCode + chainCode); |
66 |
|
|
} catch (PdbCodeNotFoundError e) { |
67 |
|
|
e.printStackTrace(); |
68 |
|
|
} catch (SQLException e) { |
69 |
|
|
e.printStackTrace(); |
70 |
|
|
} catch (PdbChainCodeNotFoundError e) { |
71 |
|
|
e.printStackTrace(); |
72 |
|
|
} |
73 |
|
|
|
74 |
|
|
} |
75 |
|
|
|
76 |
stehr |
303 |
public static void printValues(Map<Integer, Integer> v, PrintStream out) { |
77 |
stehr |
250 |
int atoms = 0; |
78 |
|
|
for(int size:v.keySet()) { |
79 |
stehr |
303 |
out.println(size + "\t" + v.get(size)); |
80 |
stehr |
250 |
atoms += size*v.get(size); |
81 |
|
|
} |
82 |
stehr |
303 |
//out.println("Atoms: " + atoms); |
83 |
stehr |
250 |
} |
84 |
|
|
|
85 |
stehr |
303 |
|
86 |
|
|
public static void writeResultToFile(Map<Integer, Integer> v, String baseName, String edgeType, double cutoff) { |
87 |
|
|
try { |
88 |
|
|
File outFile = new File(baseName + "_" + edgeType + "_" + cutoff + ".out"); |
89 |
|
|
if(outFile.exists()) { |
90 |
|
|
outFile.delete(); |
91 |
|
|
} |
92 |
|
|
outFile.createNewFile(); |
93 |
|
|
printValues(v, new PrintStream(outFile)); |
94 |
|
|
System.out.println("Results written to file " + outFile.getName()); |
95 |
|
|
|
96 |
|
|
} catch (FileNotFoundException e) { |
97 |
|
|
e.printStackTrace(); |
98 |
|
|
} catch (IOException e) { |
99 |
|
|
e.printStackTrace(); |
100 |
|
|
} |
101 |
|
|
} |
102 |
|
|
|
103 |
|
|
|
104 |
|
|
public static void writeResultToDb(Map<Integer, Integer> v) { |
105 |
|
|
// insert into runs(run_id, edgeType, cutoff, timestamp, proteins, points, cells, avg_pts_per_cell, max_pts_per_cell, avg_pts_per_area, max_pts_per_area) |
106 |
|
|
// insert into density_distr(run_id, points_per_cell, num_cells) |
107 |
|
|
// do another run for values per protein (run_id, pdb_id, chain_id, num_res, num_atoms, num_cells, surface_cells, core_cells, surface_area, volume) |
108 |
|
|
|
109 |
|
|
} |
110 |
|
|
|
111 |
|
|
public static Map<Integer, Integer> calcDensity(MySQLConnection conn, String edgeType, double cutoff, boolean verbose) { |
112 |
|
|
Map<Integer, Integer> densityCount = new TreeMap<Integer,Integer>(); |
113 |
stehr |
250 |
String pdbCode, chainCode; |
114 |
|
|
int numPdbs = 0; |
115 |
|
|
|
116 |
stehr |
303 |
if(verbose) { |
117 |
|
|
System.out.print(edgeType + " " + cutoff + ": "); |
118 |
|
|
} |
119 |
stehr |
250 |
|
120 |
|
|
// read structures from database |
121 |
stehr |
303 |
String query = "SELECT DISTINCT " + DB_COL_PDB + "," + DB_COL_CHAIN + " FROM " + DB_NAME + "." + DB_TABLE + " LIMIT " + limit + ";" ; |
122 |
stehr |
250 |
Statement stmt; |
123 |
|
|
try { |
124 |
|
|
stmt = conn.createStatement(); |
125 |
|
|
ResultSet rs = stmt.executeQuery(query); |
126 |
|
|
while(rs.next()) { |
127 |
|
|
pdbCode = rs.getString(1); |
128 |
|
|
chainCode = rs.getString(2); |
129 |
|
|
|
130 |
|
|
if(chainCode == null) { |
131 |
|
|
chainCode = "NULL"; |
132 |
|
|
} |
133 |
stehr |
303 |
numPdbs++; |
134 |
stehr |
250 |
|
135 |
|
|
// calculate statistics |
136 |
|
|
calcDensity(pdbCode, chainCode, cutoff, edgeType, conn, densityCount); // will add to densityCount |
137 |
stehr |
303 |
if(verbose) { |
138 |
|
|
if(numPdbs %2 == 0) { |
139 |
|
|
System.out.print('\b'); |
140 |
|
|
} else { |
141 |
|
|
System.out.print("."); |
142 |
|
|
} |
143 |
|
|
if(numPdbs % 500 == 0) System.out.print(numPdbs + " "); |
144 |
|
|
} |
145 |
stehr |
250 |
|
146 |
|
|
// for each protein write to db: pdb, chain, num_res, volume, max_density |
147 |
|
|
} |
148 |
stehr |
303 |
rs.close(); |
149 |
|
|
stmt.close(); |
150 |
|
|
if(verbose) System.out.println("."); |
151 |
stehr |
250 |
|
152 |
|
|
} catch (SQLException e) { |
153 |
|
|
e.printStackTrace(); |
154 |
stehr |
303 |
} |
155 |
|
|
return densityCount; |
156 |
|
|
} |
157 |
|
|
|
158 |
|
|
public static void main(String[] args) { |
159 |
|
|
MySQLConnection conn = null; |
160 |
|
|
Map<Integer, Integer> densityCount = null; |
161 |
|
|
double cutoff; |
162 |
|
|
|
163 |
|
|
// opening db connection |
164 |
|
|
try{ |
165 |
|
|
conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD); |
166 |
|
|
} catch (Exception e) { |
167 |
|
|
System.err.println("Error opening database connection. Exiting"); |
168 |
|
|
System.exit(1); |
169 |
stehr |
250 |
} |
170 |
|
|
|
171 |
stehr |
303 |
for(cutoff = cutoff_from; cutoff <= cutoff_to; cutoff += cutoff_step) { |
172 |
|
|
// run calculation |
173 |
|
|
densityCount = calcDensity(conn, edgeType, cutoff, true); |
174 |
|
|
|
175 |
|
|
// output results |
176 |
|
|
//printValues(densityCount, System.out); |
177 |
|
|
writeResultToFile(densityCount, outFileName, edgeType, cutoff); |
178 |
|
|
} |
179 |
|
|
System.out.println("Done."); |
180 |
|
|
|
181 |
stehr |
250 |
} |
182 |
|
|
|
183 |
|
|
} |