ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/dumpseq.java
Revision: 492
Committed: Wed Jan 2 13:18:57 2008 UTC (16 years, 8 months ago) by duarte
File size: 4660 byte(s)
Log Message:
Copied the aglappe-jung branch into trunk.

Line File contents
1 import gnu.getopt.Getopt;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileOutputStream;
6 import java.io.FileReader;
7 import java.io.IOException;
8 import java.io.PrintStream;
9 import java.sql.SQLException;
10
11 import proteinstructure.Pdb;
12 import proteinstructure.PdbCodeNotFoundError;
13 import proteinstructure.PdbLoadError;
14 import proteinstructure.PdbasePdb;
15 import tools.MySQLConnection;
16
17
18 public class dumpseq {
19 /*------------------------------ constants ------------------------------*/
20
21 public static final String PDB_DB = "pdbase";
22 public static final String DB_HOST = "white";
23 public static final String DB_USER = MySQLConnection.getUserName();
24 public static final String DB_PWD = "nieve";
25
26 public static final String GAP_CHARACTER = "-";
27
28 public static void main(String[] args) throws IOException {
29
30
31 String help = "Usage, 3 options:\n" +
32 "1) genGraph -i <listfile> -o <output_dir> [-D <pdbase_db>] \n" +
33 "2) genGraph -p <pdb_code> -c <chain_pdb_code> -o <output_dir> [-D <pdbase_db>] \n" +
34 "In case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" +
35 "If pdbase_db not specified, the default pdbase will be used\n";
36
37 String listfile = "";
38 String[] pdbCodes = null;
39 String[] pdbChainCodes = null;
40 String pdbaseDb = PDB_DB;
41 String outputDir = "";
42
43 Getopt g = new Getopt("genGraph", args, "i:p:c:o:D:h?");
44 int c;
45 while ((c = g.getopt()) != -1) {
46 switch(c){
47 case 'i':
48 listfile = g.getOptarg();
49 break;
50 case 'p':
51 pdbCodes = g.getOptarg().split(",");
52 break;
53 case 'c':
54 pdbChainCodes = g.getOptarg().split(",");
55 break;
56 case 'o':
57 outputDir = g.getOptarg();
58 break;
59 case 'D':
60 pdbaseDb = g.getOptarg();
61 break;
62 case 'h':
63 case '?':
64 System.out.println(help);
65 System.exit(0);
66 break; // getopt() already printed an error
67 }
68 }
69
70 if (listfile.equals("") && pdbCodes==null){
71 System.err.println("Either a listfile or some pdb codes/chain codes must be given");
72 System.err.println(help);
73 System.exit(1);
74 }
75 if (!listfile.equals("") && pdbCodes!=null) {
76 System.err.println("Options -p/-c and -i are exclusive. Use only one of them");
77 System.err.println(help);
78 System.exit(1);
79 }
80
81
82 MySQLConnection conn = null;
83
84 try{
85 conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD);
86 } catch (Exception e) {
87 System.err.println("Error opening database connection. Exiting");
88 System.exit(1);
89 }
90
91
92
93
94 if (!listfile.equals("")) {
95 BufferedReader fpdb = new BufferedReader(new FileReader(listfile));
96 String line = "";
97 int numLines = 0;
98 fpdb.mark(100000);
99 while ((line = fpdb.readLine() ) != null ) {
100 if (line.length()>0) numLines++;
101 }
102 fpdb.reset();
103 pdbCodes = new String[numLines];
104 pdbChainCodes = new String[numLines];
105 numLines = 0;
106 while ((line = fpdb.readLine() ) != null ) {
107 pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase();
108 pdbChainCodes[numLines] = line.split("\\s+")[1];
109 numLines++;
110 }
111 }
112
113 int numPdbs = 0;
114
115 for (int i=0;i<pdbCodes.length;i++) {
116 String pdbCode = pdbCodes[i];
117 String pdbChainCode = pdbChainCodes[i];
118
119 try {
120
121 Pdb pdb = new PdbasePdb(pdbCode, pdbaseDb, conn);
122 pdb.load(pdbChainCode);
123
124 String sequence = pdb.getSequence();
125
126 File outputFile = new File(outputDir,pdbCode+"_"+pdbChainCode+".fasta");
127
128 PrintStream Out = new PrintStream(new FileOutputStream(outputFile.getAbsolutePath()));
129 Out.println(">"+pdbCode+"_"+pdbChainCode);
130 // for (int pos=1;pos<=sequence.length();pos++) {
131 // if (pos%10==0){
132 // Out.printf("%10d",pos/10);
133 // }
134 // }
135 // Out.println();
136 // for (int pos=1;pos<=sequence.length();pos++) {
137 // Out.print(pos%10);
138 // }
139 // Out.println();
140 Out.println(sequence);
141 // for (int pos=1;pos<=sequence.length();pos++) {
142 // if (pdb.hasCoordinates(pos)) {
143 // Out.print(sequence.charAt(pos-1));
144 // } else {
145 // Out.print(GAP_CHARACTER);
146 // }
147 // }
148 Out.close();
149
150 System.out.println("Wrote "+outputFile.getAbsolutePath());
151
152 numPdbs++;
153
154 } catch (PdbLoadError e) {
155 System.err.println("Error loading pdb data for " + pdbCode + pdbChainCode+", specific error: "+e.getMessage());
156 } catch (PdbCodeNotFoundError e) {
157 System.err.println("Couldn't find pdb code "+pdbCode);
158 } catch (SQLException e) {
159 System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage());
160 }
161
162 }
163
164 // output results
165 System.out.println("Number of dumped sequences: " + numPdbs);
166
167
168 }
169
170
171 }