1 |
|
package proteinstructure; |
2 |
|
|
3 |
+ |
import java.io.BufferedReader; |
4 |
+ |
import java.io.File; |
5 |
|
import java.io.FileOutputStream; |
6 |
+ |
import java.io.FileReader; |
7 |
|
import java.io.PrintStream; |
8 |
+ |
import java.io.FileNotFoundException; |
9 |
|
import java.io.IOException; |
10 |
|
import java.util.HashMap; |
11 |
|
import java.util.TreeMap; |
12 |
|
import java.util.ArrayList; |
13 |
|
import java.util.Collections; |
14 |
+ |
import java.util.regex.Matcher; |
15 |
+ |
import java.util.regex.Pattern; |
16 |
|
|
17 |
|
public class Pdb { |
18 |
|
|
40 |
|
read_pdb_data_from_pdbase(db); |
41 |
|
} |
42 |
|
|
43 |
< |
public Pdb (String pdbfile) { |
44 |
< |
//TODO implement read_pdb_data_from_file |
45 |
< |
//read_pdb_data_from_file(pdbfile); |
43 |
> |
public Pdb (String pdbfile) throws FileNotFoundException, IOException { |
44 |
> |
this.accode=""; |
45 |
> |
read_pdb_data_from_file(pdbfile); |
46 |
|
} |
47 |
|
|
48 |
|
public void read_pdb_data_from_pdbase(String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError{ |
79 |
|
} |
80 |
|
} |
81 |
|
|
82 |
+ |
public void read_pdb_data_from_file(String pdbfile) throws FileNotFoundException, IOException{ |
83 |
+ |
resser_atom2atomserial = new HashMap<String,Integer>(); |
84 |
+ |
resser2restype = new HashMap<Integer,String>(); |
85 |
+ |
atomser2coord = new HashMap<Integer,Double[]>(); |
86 |
+ |
atomser2resser = new HashMap<Integer,Integer>(); |
87 |
+ |
|
88 |
+ |
BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile))); |
89 |
+ |
String line; |
90 |
+ |
while ((line = fpdb.readLine() ) != null ) { |
91 |
+ |
Pattern p = Pattern.compile("^ATOM"); |
92 |
+ |
Matcher m = p.matcher(line); |
93 |
+ |
if (m.find()){ |
94 |
+ |
Pattern pl = Pattern.compile(".{6}(.....).{2}(...).{1}(...).{2}(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE); |
95 |
+ |
Matcher ml = pl.matcher(line); |
96 |
+ |
if (ml.find()) { |
97 |
+ |
int atomserial=Integer.parseInt(ml.group(1).trim()); |
98 |
+ |
String atom = ml.group(2).trim(); |
99 |
+ |
String res_type = ml.group(3).trim(); |
100 |
+ |
int res_serial = Integer.parseInt(ml.group(4).trim()); |
101 |
+ |
double x = Double.parseDouble(ml.group(5).trim()); |
102 |
+ |
double y = Double.parseDouble(ml.group(6).trim()); |
103 |
+ |
double z = Double.parseDouble(ml.group(7).trim()); |
104 |
+ |
Double[] coords = {x, y, z}; |
105 |
+ |
ArrayList<String> aalist=AA.aas(); |
106 |
+ |
if (aalist.contains(res_type)) { |
107 |
+ |
atomser2coord.put(atomserial, coords); |
108 |
+ |
atomser2resser.put(atomserial, res_serial); |
109 |
+ |
resser2restype.put(res_serial, res_type); |
110 |
+ |
ArrayList<String> atomlist = aas2atoms.get(res_type); |
111 |
+ |
if (atomlist.contains(atom)){ |
112 |
+ |
resser_atom2atomserial.put(res_serial+"_"+atom, atomserial); |
113 |
+ |
} |
114 |
+ |
} |
115 |
+ |
} |
116 |
+ |
} |
117 |
+ |
} |
118 |
+ |
fpdb.close(); |
119 |
+ |
// now we read the sequence from the resser2restype HashMap |
120 |
+ |
// NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here! |
121 |
+ |
ArrayList<Integer> ressers = new ArrayList<Integer>(); |
122 |
+ |
for (int resser:resser2restype.keySet()) { |
123 |
+ |
ressers.add(resser); |
124 |
+ |
} |
125 |
+ |
Collections.sort(ressers); |
126 |
+ |
sequence=""; |
127 |
+ |
for (int resser:ressers){ |
128 |
+ |
String oneletter = AA.threeletter2oneletter(resser2restype.get(resser)); |
129 |
+ |
sequence += oneletter; |
130 |
+ |
} |
131 |
+ |
// finally we set accode and chaincode to unknown |
132 |
+ |
//TODO: we should parse accode and chaincode from appropriate fields in pdb file, |
133 |
+ |
// problem: in case of a non-original pdb file there won't be accession code |
134 |
+ |
accode="?"; |
135 |
+ |
chaincode="?"; |
136 |
+ |
} |
137 |
+ |
|
138 |
|
public void dump2pdbfile(String outfile) throws IOException { |
139 |
|
String chainstr=chain; |
140 |
|
if (chain.equals("NULL")){ |
149 |
|
String res_type = resser2restype.get(res_serial); |
150 |
|
Double[] coords = atomser2coord.get(atomserial); |
151 |
|
Object[] fields = {atomserial, atom, res_type, chainstr, res_serial, coords[0], coords[1], coords[2]}; |
152 |
< |
Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f",fields); |
152 |
> |
Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",fields); |
153 |
|
} |
154 |
|
Out.println("END"); |
155 |
|
Out.close(); |