1 |
import gnu.getopt.Getopt; |
2 |
|
3 |
import java.io.BufferedReader; |
4 |
import java.io.File; |
5 |
import java.io.FileOutputStream; |
6 |
import java.io.FileReader; |
7 |
import java.io.IOException; |
8 |
import java.io.PrintStream; |
9 |
import java.sql.SQLException; |
10 |
|
11 |
import proteinstructure.Pdb; |
12 |
import proteinstructure.PdbCodeNotFoundError; |
13 |
import proteinstructure.PdbLoadError; |
14 |
import proteinstructure.PdbasePdb; |
15 |
import tools.MySQLConnection; |
16 |
|
17 |
|
18 |
public class dumpseq { |
19 |
/*------------------------------ constants ------------------------------*/ |
20 |
|
21 |
public static final String PDB_DB = "pdbase"; |
22 |
public static final String DB_HOST = "white"; |
23 |
public static final String DB_USER = MySQLConnection.getUserName(); |
24 |
public static final String DB_PWD = "nieve"; |
25 |
|
26 |
public static final String GAP_CHARACTER = "-"; |
27 |
|
28 |
public static void main(String[] args) throws IOException { |
29 |
|
30 |
|
31 |
String help = "Usage, 3 options:\n" + |
32 |
"1) genGraph -i <listfile> -o <output_dir> [-D <pdbase_db>] \n" + |
33 |
"2) genGraph -p <pdb_code> -c <chain_pdb_code> -o <output_dir> [-D <pdbase_db>] \n" + |
34 |
"In case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" + |
35 |
"If pdbase_db not specified, the default pdbase will be used\n"; |
36 |
|
37 |
String listfile = ""; |
38 |
String[] pdbCodes = null; |
39 |
String[] pdbChainCodes = null; |
40 |
String pdbaseDb = PDB_DB; |
41 |
String outputDir = ""; |
42 |
|
43 |
Getopt g = new Getopt("genGraph", args, "i:p:c:o:D:h?"); |
44 |
int c; |
45 |
while ((c = g.getopt()) != -1) { |
46 |
switch(c){ |
47 |
case 'i': |
48 |
listfile = g.getOptarg(); |
49 |
break; |
50 |
case 'p': |
51 |
pdbCodes = g.getOptarg().split(","); |
52 |
break; |
53 |
case 'c': |
54 |
pdbChainCodes = g.getOptarg().split(","); |
55 |
break; |
56 |
case 'o': |
57 |
outputDir = g.getOptarg(); |
58 |
break; |
59 |
case 'D': |
60 |
pdbaseDb = g.getOptarg(); |
61 |
break; |
62 |
case 'h': |
63 |
case '?': |
64 |
System.out.println(help); |
65 |
System.exit(0); |
66 |
break; // getopt() already printed an error |
67 |
} |
68 |
} |
69 |
|
70 |
if (listfile.equals("") && pdbCodes==null){ |
71 |
System.err.println("Either a listfile or some pdb codes/chain codes must be given"); |
72 |
System.err.println(help); |
73 |
System.exit(1); |
74 |
} |
75 |
if (!listfile.equals("") && pdbCodes!=null) { |
76 |
System.err.println("Options -p/-c and -i are exclusive. Use only one of them"); |
77 |
System.err.println(help); |
78 |
System.exit(1); |
79 |
} |
80 |
|
81 |
|
82 |
MySQLConnection conn = null; |
83 |
|
84 |
try{ |
85 |
conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD); |
86 |
} catch (Exception e) { |
87 |
System.err.println("Error opening database connection. Exiting"); |
88 |
System.exit(1); |
89 |
} |
90 |
|
91 |
|
92 |
|
93 |
|
94 |
if (!listfile.equals("")) { |
95 |
BufferedReader fpdb = new BufferedReader(new FileReader(listfile)); |
96 |
String line = ""; |
97 |
int numLines = 0; |
98 |
fpdb.mark(100000); |
99 |
while ((line = fpdb.readLine() ) != null ) { |
100 |
if (line.length()>0) numLines++; |
101 |
} |
102 |
fpdb.reset(); |
103 |
pdbCodes = new String[numLines]; |
104 |
pdbChainCodes = new String[numLines]; |
105 |
numLines = 0; |
106 |
while ((line = fpdb.readLine() ) != null ) { |
107 |
pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase(); |
108 |
pdbChainCodes[numLines] = line.split("\\s+")[1]; |
109 |
numLines++; |
110 |
} |
111 |
} |
112 |
|
113 |
int numPdbs = 0; |
114 |
|
115 |
for (int i=0;i<pdbCodes.length;i++) { |
116 |
String pdbCode = pdbCodes[i]; |
117 |
String pdbChainCode = pdbChainCodes[i]; |
118 |
|
119 |
try { |
120 |
|
121 |
Pdb pdb = new PdbasePdb(pdbCode, pdbaseDb, conn); |
122 |
pdb.load(pdbChainCode); |
123 |
|
124 |
String sequence = pdb.getSequence(); |
125 |
|
126 |
File outputFile = new File(outputDir,pdbCode+"_"+pdbChainCode+".fasta"); |
127 |
|
128 |
PrintStream Out = new PrintStream(new FileOutputStream(outputFile.getAbsolutePath())); |
129 |
Out.println(">"+pdbCode+"_"+pdbChainCode); |
130 |
// for (int pos=1;pos<=sequence.length();pos++) { |
131 |
// if (pos%10==0){ |
132 |
// Out.printf("%10d",pos/10); |
133 |
// } |
134 |
// } |
135 |
// Out.println(); |
136 |
// for (int pos=1;pos<=sequence.length();pos++) { |
137 |
// Out.print(pos%10); |
138 |
// } |
139 |
// Out.println(); |
140 |
Out.println(sequence); |
141 |
// for (int pos=1;pos<=sequence.length();pos++) { |
142 |
// if (pdb.hasCoordinates(pos)) { |
143 |
// Out.print(sequence.charAt(pos-1)); |
144 |
// } else { |
145 |
// Out.print(GAP_CHARACTER); |
146 |
// } |
147 |
// } |
148 |
Out.close(); |
149 |
|
150 |
System.out.println("Wrote "+outputFile.getAbsolutePath()); |
151 |
|
152 |
numPdbs++; |
153 |
|
154 |
} catch (PdbLoadError e) { |
155 |
System.err.println("Error loading pdb data for " + pdbCode + pdbChainCode+", specific error: "+e.getMessage()); |
156 |
} catch (PdbCodeNotFoundError e) { |
157 |
System.err.println("Couldn't find pdb code "+pdbCode); |
158 |
} catch (SQLException e) { |
159 |
System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage()); |
160 |
} |
161 |
|
162 |
} |
163 |
|
164 |
// output results |
165 |
System.out.println("Number of dumped sequences: " + numPdbs); |
166 |
|
167 |
|
168 |
} |
169 |
|
170 |
|
171 |
} |