Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add secondary structure prediction. #42

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ docs/build/
docs/site/
.DS_Store
benchmark/tune.json
Manifest.toml
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "BioStructures"
uuid = "de9282ab-8554-53be-b2d6-f6c222edabfc"
authors = ["Joe G Greener <[email protected]>"]
version = "2.0.0"
version = "2.0.1"

[deps]
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
Expand All @@ -10,6 +10,7 @@ BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
DSSP_jll = "74334e00-59ce-546d-b517-81f3b7e1d491"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8"
Expand All @@ -18,6 +19,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MMTF = "259c3a9c-12c3-507f-b21f-68ecc40fcda4"
MetaGraphs = "626554b9-1ddb-594c-aa3c-2596fe9399a5"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
STRIDE_jll = "850473c1-9ef0-5df9-a957-757f4cde8b8b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
Expand All @@ -27,13 +29,15 @@ BioGenerics = "0.1"
BioSequences = "3"
BioSymbols = "5"
CodecZlib = "0.7"
DSSP_jll = "4.4"
DataFrames = "1"
Downloads = "1"
Format = "1.3"
Graphs = "1"
MMTF = "1"
MetaGraphs = "0.7"
RecipesBase = "1"
STRIDE_jll = "1"
julia = "1.6"

[extras]
Expand Down
114 changes: 75 additions & 39 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,59 +19,95 @@ temp_filename, io = mktemp()
close(io)

pdbids = ["1AKE", "1EN2", "1SSU"]
formats = Dict("PDB"=> PDB, "mmCIF"=> MMCIF, "MMTF"=> MMTF)
writefunctions = Dict("PDB"=> writepdb, "mmCIF"=> writemmcif, "MMTF"=> writemmtf)
formats = Dict("PDB" => PDB, "mmCIF" => MMCIF, "MMTF" => MMTF)
writefunctions = Dict("PDB" => writepdb, "mmCIF" => writemmcif, "MMTF" => writemmtf)

const SUITE = BenchmarkGroup(
[],
"read" => BenchmarkGroup([], [f=> BenchmarkGroup() for f in keys(formats)]...),
"write" => BenchmarkGroup([], [f=> BenchmarkGroup() for f in keys(formats)]...),
"dict" => BenchmarkGroup(),
"model" => BenchmarkGroup(),
"collect"=> BenchmarkGroup(),
"spatial"=> BenchmarkGroup(),
[],
"read" => BenchmarkGroup([], [f => BenchmarkGroup() for f in keys(formats)]...),
"write" => BenchmarkGroup([], [f => BenchmarkGroup() for f in keys(formats)]...),
"dict" => BenchmarkGroup(),
"model" => BenchmarkGroup(),
"collect" => BenchmarkGroup(),
"spatial" => BenchmarkGroup(),
)

struc = Dict{String, ProteinStructure}()
struc = Dict{String,ProteinStructure}()
for pdbid in pdbids
struc[pdbid] = read(testfilepath("PDB", "$pdbid.pdb"), PDB)
end

for pdbid in pdbids
for f in keys(formats)
SUITE["read"][f][pdbid] = @benchmarkable read(
$(testfilepath(f, "$pdbid.$(pdbextension[formats[f]])")), $(formats[f]))
SUITE["write"][f][pdbid] = @benchmarkable $(writefunctions[f])(
$temp_filename, $(struc[pdbid])) teardown=(rm(temp_filename, force=true))
$(testfilepath(f, "$pdbid.$(pdbextension[formats[f]])")),
$(formats[f]),
)
SUITE["write"][f][pdbid] =
@benchmarkable $(writefunctions[f])($temp_filename, $(struc[pdbid])) teardown =
(rm(temp_filename, force = true))
end
end

SUITE["dict"]["mmCIF"] = @benchmarkable MMCIFDict($(testfilepath("mmCIF", "1AKE.cif" )))
SUITE["dict"]["MMTF" ] = @benchmarkable MMTFDict( $(testfilepath("MMTF" , "1AKE.mmtf")))
SUITE["dict"]["mmCIF"] = @benchmarkable MMCIFDict($(testfilepath("mmCIF", "1AKE.cif")))
SUITE["dict"]["MMTF"] = @benchmarkable MMTFDict($(testfilepath("MMTF", "1AKE.mmtf")))

SUITE["model"]["atomaccess" ] = @benchmarkable $(struc["1EN2"])["A"][20]["CA"]
SUITE["model"]["sortatoms" ] = @benchmarkable sort(ats) setup=(ats = shuffle(collectatoms( struc["1EN2"])))
SUITE["model"]["sortresidues"] = @benchmarkable sort(res) setup=(res = shuffle(collectresidues(struc["1EN2"])))
SUITE["model"]["iterate" ] = @benchmarkable for mod in $(struc["1AKE"]) for ch in mod for res in ch for at in res end end end end
SUITE["model"]["sequence" ] = @benchmarkable LongAA($(collectresidues(struc["1EN2"])))
SUITE["model"]["pdbline" ] = @benchmarkable pdbline($(struc["1EN2"]["A"][20]["CA"]))
SUITE["model"]["atomaccess"] = @benchmarkable $(struc["1EN2"])["A"][20]["CA"]
SUITE["model"]["sortatoms"] =
@benchmarkable sort(ats) setup = (ats = shuffle(collectatoms(struc["1EN2"])))
SUITE["model"]["sortresidues"] =
@benchmarkable sort(res) setup = (res = shuffle(collectresidues(struc["1EN2"])))
SUITE["model"]["iterate"] = @benchmarkable for mod in $(struc["1AKE"])
for ch in mod
for res in ch
for at in res
end
end
end
end
SUITE["model"]["sequence"] = @benchmarkable LongAA($(collectresidues(struc["1EN2"])))
SUITE["model"]["pdbline"] = @benchmarkable pdbline($(struc["1EN2"]["A"][20]["CA"]))

SUITE["collect"]["atoms" ] = @benchmarkable collectatoms( $(struc["1EN2"]))
SUITE["collect"]["atomssel" ] = @benchmarkable collectatoms( $(struc["1EN2"]), calphaselector)
SUITE["collect"]["atomsdis" ] = @benchmarkable collectatoms( $(struc["1EN2"]), expand_disordered=true)
SUITE["collect"]["residues" ] = @benchmarkable collectresidues($(struc["1EN2"]))
SUITE["collect"]["residuessel" ] = @benchmarkable collectresidues($(struc["1EN2"]), standardselector)
SUITE["collect"]["residuesdis" ] = @benchmarkable collectresidues($(struc["1EN2"]), expand_disordered=true)
SUITE["collect"]["chains" ] = @benchmarkable collectchains( $(struc["1AKE"]))
SUITE["collect"]["models" ] = @benchmarkable collectmodels( $(struc["1SSU"]))
SUITE["collect"]["atoms"] = @benchmarkable collectatoms($(struc["1EN2"]))
SUITE["collect"]["atomssel"] = @benchmarkable collectatoms($(struc["1EN2"]), calphaselector)
SUITE["collect"]["atomsdis"] =
@benchmarkable collectatoms($(struc["1EN2"]), expand_disordered = true)
SUITE["collect"]["residues"] = @benchmarkable collectresidues($(struc["1EN2"]))
SUITE["collect"]["residuessel"] =
@benchmarkable collectresidues($(struc["1EN2"]), standardselector)
SUITE["collect"]["residuesdis"] =
@benchmarkable collectresidues($(struc["1EN2"]), expand_disordered = true)
SUITE["collect"]["chains"] = @benchmarkable collectchains($(struc["1AKE"]))
SUITE["collect"]["models"] = @benchmarkable collectmodels($(struc["1SSU"]))

SUITE["spatial"]["coordarray" ] = @benchmarkable coordarray($(collectatoms(struc["1AKE"])))
SUITE["spatial"]["transformation"] = @benchmarkable Transformation($(collectresidues(struc["1SSU"][5], standardselector)), $(collectresidues(struc["1SSU"][10], standardselector)))
SUITE["spatial"]["rmsd" ] = @benchmarkable rmsd($(coordarray(struc["1SSU"][5], heavyatomselector)), $(coordarray(struc["1SSU"][10], heavyatomselector)))
SUITE["spatial"]["distance" ] = @benchmarkable distance($(struc["1AKE"]["A"][50]), $(struc["1AKE"]["A"][60]))
SUITE["spatial"]["bondangle" ] = @benchmarkable bondangle($(struc["1AKE"]["A"][20]["N"]), $(struc["1AKE"]["A"][20]["CA"]), $(struc["1AKE"]["A"][20]["C"]))
SUITE["spatial"]["dihedralangle" ] = @benchmarkable dihedralangle($(struc["1AKE"]["A"][20]["N"]), $(struc["1AKE"]["A"][20]["CA"]), $(struc["1AKE"]["A"][20]["C"]), $(struc["1AKE"]["A"][21]["N"]))
SUITE["spatial"]["ramachandran" ] = @benchmarkable ramachandranangles($(collectresidues(struc["1AKE"]["A"], standardselector)))
SUITE["spatial"]["contactmap" ] = @benchmarkable ContactMap($(collectatoms(struc["1AKE"]["A"], cbetaselector)), 8.0)
SUITE["spatial"]["distancemap" ] = @benchmarkable DistanceMap($(collectresidues(struc["1AKE"]["A"], standardselector)))
SUITE["spatial"]["contactgraph" ] = @benchmarkable MetaGraph($(collectatoms(struc["1AKE"]["A"], cbetaselector)), 8.0)
SUITE["spatial"]["coordarray"] = @benchmarkable coordarray($(collectatoms(struc["1AKE"])))
SUITE["spatial"]["transformation"] = @benchmarkable Transformation(
$(collectresidues(struc["1SSU"][5], standardselector)),
$(collectresidues(struc["1SSU"][10], standardselector)),
)
SUITE["spatial"]["rmsd"] = @benchmarkable rmsd(
$(coordarray(struc["1SSU"][5], heavyatomselector)),
$(coordarray(struc["1SSU"][10], heavyatomselector)),
)
SUITE["spatial"]["distance"] =
@benchmarkable distance($(struc["1AKE"]["A"][50]), $(struc["1AKE"]["A"][60]))
SUITE["spatial"]["bondangle"] = @benchmarkable bondangle(
$(struc["1AKE"]["A"][20]["N"]),
$(struc["1AKE"]["A"][20]["CA"]),
$(struc["1AKE"]["A"][20]["C"]),
)
SUITE["spatial"]["dihedralangle"] = @benchmarkable dihedralangle(
$(struc["1AKE"]["A"][20]["N"]),
$(struc["1AKE"]["A"][20]["CA"]),
$(struc["1AKE"]["A"][20]["C"]),
$(struc["1AKE"]["A"][21]["N"]),
)
SUITE["spatial"]["ramachandran"] = @benchmarkable ramachandranangles(
$(collectresidues(struc["1AKE"]["A"], standardselector)),
)
SUITE["spatial"]["contactmap"] =
@benchmarkable ContactMap($(collectatoms(struc["1AKE"]["A"], cbetaselector)), 8.0)
SUITE["spatial"]["distancemap"] =
@benchmarkable DistanceMap($(collectresidues(struc["1AKE"]["A"], standardselector)))
SUITE["spatial"]["contactgraph"] =
@benchmarkable MetaGraph($(collectatoms(struc["1AKE"]["A"], cbetaselector)), 8.0)
15 changes: 6 additions & 9 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,11 @@ using BioStructures
makedocs(
sitename = "BioStructures.jl",
pages = [
"Home" => "index.md",
"Documentation"=> "documentation.md",
"Examples" => "examples.md",
"API" => "api.md",
]
"Home" => "index.md",
"Documentation" => "documentation.md",
"Examples" => "examples.md",
"API" => "api.md",
],
)

deploydocs(
repo="github.com/BioJulia/BioStructures.jl.git",
push_preview=true,
)
deploydocs(repo = "github.com/BioJulia/BioStructures.jl.git", push_preview = true)
4 changes: 4 additions & 0 deletions docs/src/documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ Properties can be retrieved as follows:
| [`resnumber`](@ref) | Residue number of a residue or atom | `Int` |
| [`sequentialresidues`](@ref) | Determine if the second residue follows the first in sequence | `Bool` |
| [`inscode`](@ref) | Insertion code of a residue or atom | `Char` |
| [`ss_code`](@ref) | Secondary Structure code of a residue or atom | `String` |
| [`resid`](@ref) | Residue ID of an atom or residue (`full=true` includes chain) | `String` |
| [`atomnames`](@ref) | Atom names of the atoms in a residue, sorted by serial | `Array{String,1}` |
| [`atoms`](@ref) | Dictionary of atoms in a residue | `Dict{String,AbstractAtom}` |
Expand Down Expand Up @@ -170,6 +171,9 @@ The selectors available are:
| [`notwaterselector`](@ref) | `AbstractAtom` or `AbstractResidue` | Atoms/residues with residue name not HOH |
| [`disorderselector`](@ref) | `AbstractAtom` or `AbstractResidue` | Atoms/residues with alternative locations |
| [`allselector`](@ref) | `AbstractAtom` or `AbstractResidue` | All atoms/residues |
| [`helixselector`](@ref) | `Atom` or `Residue` | Atoms/residues arising from Helix |
| [`sheetselector`](@ref) | `Atom` or `Residue` | Atoms/residues arising from Sheet |
| [`coilselector`](@ref) | `Atom` or `Residue` | Atoms/residues arising from Coil |

To create a new [`atomnameselector`](@ref) or [`resnameselector`](@ref):
```julia
Expand Down
3 changes: 3 additions & 0 deletions src/BioStructures.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@ using Graphs
using MetaGraphs
import MMTF: parsemmtf, writemmtf # Imported to avoid clash with MMTF name
using RecipesBase
using STRIDE_jll: STRIDE_jll # for secondary structure prediction
using DSSP_jll: DSSP_jll # for secondary structure prediction

include("model.jl")
include("pdb.jl")
include("mmcif.jl")
include("mmtf.jl")
include("spatial.jl")
include("secondary.jl")

end # BioStructures
Loading
Loading