ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/dumpseq.java
Revision: 535
Committed: Wed Feb 13 14:11:10 2008 UTC (16 years, 7 months ago) by duarte
File size: 5225 byte(s)
Log Message:
Added option to output to standard output
Line File contents
1 import gnu.getopt.Getopt;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileOutputStream;
6 import java.io.FileReader;
7 import java.io.IOException;
8 import java.io.PrintStream;
9 import java.sql.SQLException;
10
11 import proteinstructure.Pdb;
12 import proteinstructure.PdbCodeNotFoundError;
13 import proteinstructure.PdbLoadError;
14 import proteinstructure.PdbasePdb;
15 import tools.MySQLConnection;
16
17
18 public class dumpseq {
19 /*------------------------------ constants ------------------------------*/
20
21 public static final String PDB_DB = "pdbase";
22 public static final String DB_HOST = "white";
23 public static final String DB_USER = MySQLConnection.getUserName();
24 public static final String DB_PWD = "nieve";
25
26 public static final String GAP_CHARACTER = "-";
27
28 public static void main(String[] args) throws IOException {
29
30
31 String help = "Usage, 3 options:\n" +
32 "1) genGraph -i <listfile> [-o <output_dir> | -f <one_output_file> | -s] [-D <pdbase_db>] \n" +
33 "2) genGraph -p <pdb_code> -c <chain_pdb_code> [-o <output_dir> | -f <one_output_file> | -s] [-D <pdbase_db>] \n" +
34 "Output options: -o one file per sequence, -f one file for all sequences, -s standard output"+
35 "In case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" +
36 "If pdbase_db not specified, the default pdbase will be used\n";
37
38 String listfile = "";
39 String[] pdbCodes = null;
40 String[] pdbChainCodes = null;
41 String pdbaseDb = PDB_DB;
42 String outputDir = "";
43 File oneOutputFile = null;
44 boolean stdout = false;
45
46 Getopt g = new Getopt("genGraph", args, "i:p:c:o:f:D:sh?");
47 int c;
48 while ((c = g.getopt()) != -1) {
49 switch(c){
50 case 'i':
51 listfile = g.getOptarg();
52 break;
53 case 'p':
54 pdbCodes = g.getOptarg().split(",");
55 break;
56 case 'c':
57 pdbChainCodes = g.getOptarg().split(",");
58 break;
59 case 'o':
60 outputDir = g.getOptarg();
61 break;
62 case 'f':
63 oneOutputFile = new File(g.getOptarg());
64 break;
65 case 'D':
66 pdbaseDb = g.getOptarg();
67 break;
68 case 's':
69 stdout = true;
70 break;
71 case 'h':
72 case '?':
73 System.out.println(help);
74 System.exit(0);
75 break; // getopt() already printed an error
76 }
77 }
78
79 if (listfile.equals("") && pdbCodes==null){
80 System.err.println("Either a listfile or some pdb codes/chain codes must be given");
81 System.err.println(help);
82 System.exit(1);
83 }
84 if (!listfile.equals("") && pdbCodes!=null) {
85 System.err.println("Options -p/-c and -i are exclusive. Use only one of them");
86 System.err.println(help);
87 System.exit(1);
88 }
89
90
91 MySQLConnection conn = null;
92
93 try{
94 conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD);
95 } catch (Exception e) {
96 System.err.println("Error opening database connection. Exiting");
97 System.exit(1);
98 }
99
100
101
102
103 if (!listfile.equals("")) {
104 BufferedReader fpdb = new BufferedReader(new FileReader(listfile));
105 String line = "";
106 int numLines = 0;
107 fpdb.mark(100000);
108 while ((line = fpdb.readLine() ) != null ) {
109 if (line.length()>0) numLines++;
110 }
111 fpdb.reset();
112 pdbCodes = new String[numLines];
113 pdbChainCodes = new String[numLines];
114 numLines = 0;
115 while ((line = fpdb.readLine() ) != null ) {
116 pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase();
117 pdbChainCodes[numLines] = line.split("\\s+")[1];
118 numLines++;
119 }
120 }
121
122 int numPdbs = 0;
123
124 PrintStream Out = null;
125 if (stdout) {
126 Out = System.out;
127 } else if (oneOutputFile!=null) {
128 Out = new PrintStream(new FileOutputStream(oneOutputFile));
129 }
130
131 for (int i=0;i<pdbCodes.length;i++) {
132 String pdbCode = pdbCodes[i];
133 String pdbChainCode = pdbChainCodes[i];
134
135 try {
136
137 Pdb pdb = new PdbasePdb(pdbCode, pdbaseDb, conn);
138 pdb.load(pdbChainCode);
139
140 String sequence = pdb.getSequence();
141
142 File outputFile = new File(outputDir,pdbCode+"_"+pdbChainCode+".fasta");
143
144 if (!stdout && oneOutputFile==null) {
145 Out = new PrintStream(new FileOutputStream(outputFile.getAbsolutePath()));
146 }
147
148 if (!stdout) { // if output of sequence is stdout, then we don't want to print just the sequence without FASTA headers
149 Out.println(">"+pdbCode+"_"+pdbChainCode);
150 }
151
152 Out.println(sequence);
153
154 if (!stdout && oneOutputFile==null) {
155 Out.close();
156 }
157
158 if (!stdout) { // if output of sequence is stdout, then we don't want to print anything else to stdout
159 System.out.println("Wrote "+pdbCode+"_"+pdbChainCode+".fasta");
160 }
161
162 numPdbs++;
163
164 } catch (PdbLoadError e) {
165 System.err.println("Error loading pdb data for " + pdbCode + pdbChainCode+", specific error: "+e.getMessage());
166 } catch (PdbCodeNotFoundError e) {
167 System.err.println("Couldn't find pdb code "+pdbCode);
168 } catch (SQLException e) {
169 System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage());
170 }
171
172 }
173
174 if (!stdout && oneOutputFile!=null) {
175 Out.close();
176 }
177
178 // output results
179 if (!stdout) { // if output of sequence is stdout, then we don't want to print anything else to stdout
180 System.out.println("Number of dumped sequences: " + numPdbs);
181 }
182
183
184 }
185
186
187 }