Skip to content

Commit

Permalink
Support residue "ins codes", linear backbone indexing only + other ch…
Browse files Browse the repository at this point in the history
…anges
  • Loading branch information
AntonOresten committed Jun 3, 2024
1 parent 99d32dd commit 35a27e8
Show file tree
Hide file tree
Showing 13 changed files with 73 additions and 175 deletions.
5 changes: 2 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
name = "Backboner"
uuid = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7"
authors = ["Anton Oresten <anton.oresten42@gmail.com>"]
version = "0.9.8"
version = "0.10.0"

[deps]
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
PDBTools = "e29189f1-7114-4dbd-93d0-c5673a921a58"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Rotations = "6038ab10-8711-5258-84ad-4b1120ba62dc"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Expand All @@ -20,14 +19,14 @@ ZygoteIdealizationExt = ["Zygote"]
[compat]
BioStructures = "3"
LinearAlgebra = "1"
PDBTools = "1"
PrecompileTools = "1"
Rotations = "1"
StaticArrays = "1"
julia = "1"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[targets]
test = ["Test", "Zygote"]
2 changes: 1 addition & 1 deletion ext/ZygoteIdealizationExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ function Backboner.idealize(
ideal_angles = [ideal for (_, ideal) in zip(bonds.angles, Iterators.cycle(ideal_angles))]
length_mask = Vector(abs.(bonds.lengths .- ideal_lengths) .< mask_tolerance)

offsets = zeros(T, size(backbone))
offsets = zeros(T, size(backbone.coords))

function total_loss(coords, offsets, w1, w2, w3)
offset_loss, l_loss, a_loss = ideal_loss(coords, offsets, ideal_lengths, ideal_angles, length_mask)
Expand Down
61 changes: 5 additions & 56 deletions src/backbone.jl
Original file line number Diff line number Diff line change
@@ -1,84 +1,33 @@
export Backbone

"""
Backbone{T <: Real, M <: AbstractMatrix{T}} <: AbstractMatrix{T}
Backbone{T<:Real,M<:AbstractMatrix{T}} <: AbstractVector{AbstractVector{T}}
The `Backbone` type is designed to efficiently store and manipulate the three-dimensional coordinates of backbone atoms.
# Examples
A `Backbone` can be created from a matrix of coordinates:
```jldoctest
julia> backbone = Backbone(zeros(3, 5)) # 5 atoms with 3 coordinates each
3×5 Backbone{Float64, Matrix{Float64}}:
0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0
julia> backbone[1] = [1.0, 2.0, 3.0]; # set the first atom's coordinates
julia> backbone
3×5 Backbone{Float64, Matrix{Float64}}:
1.0 0.0 0.0 0.0 0.0
2.0 0.0 0.0 0.0 0.0
3.0 0.0 0.0 0.0 0.0
julia> backbone[1:2] # indexing by range returns a new Backbone
3×2 Backbone{Float64, Matrix{Float64}}:
1.0 0.0
2.0 0.0
3.0 0.0
```
Arrays will always be flattened to a 3xN matrix:
```jldoctest
julia> backbone = Backbone(zeros(3, 3, 4)) # e.g. 3 coordinates per atom, 3 atoms per residue, 4 residues
3×12 Backbone{Float64, Matrix{Float64}}:
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
```
"""
struct Backbone{T <: Real, M <: AbstractMatrix{T}} <: AbstractMatrix{T}
struct Backbone{T<:Real,M<:AbstractMatrix{T}} <: AbstractVector{AbstractVector{T}}
coords::M

function Backbone{T, M}(coords::M) where {T <: Real, M <: AbstractMatrix{T}}
function Backbone{T,M}(coords::M) where {T<:Real,M<:AbstractMatrix{T}}
size(coords, 1) == 3 || throw(ArgumentError("Expected the first dimension of coords to have a size of 3"))
return new{T, M}(coords)
return new(coords)
end
end

Backbone{T}(coords::M) where {T <: Real, M <: AbstractMatrix{T}} = Backbone{T, M}(coords)
Backbone{T}(coords::AbstractArray{T}) where T <: Real = Backbone{T}(reshape(coords, size(coords, 1), :))
Backbone{T}(coords::AbstractArray{<:Real}) where T <: Real = Backbone{T}(convert.(T, coords))
Backbone{T}(backbone::Backbone) where T <: Real = Backbone{T}(backbone.coords)
Backbone(coords::AbstractArray{T}) where T <: Real = Backbone{T}(coords)

Backbone{T, M}(::UndefInitializer, n::Integer) where {T <: Real, M <: AbstractMatrix{T}} = Backbone{T, M}(M(undef, 3, n))
Backbone{T}(::UndefInitializer, n::Integer) where T = Backbone{T, Matrix{T}}(undef, n)
Backbone{T}() where T = Backbone{T}(undef, 0)

Base.values(backbone::Backbone) = backbone.coords

@inline Base.size(backbone::Backbone) = size(backbone.coords)
@inline Base.getindex(backbone::Backbone, i, j) = backbone.coords[i, j]
@inline Base.view(backbone::Backbone, i, j) = view(backbone.coords, i, j)
@inline Base.setindex!(backbone::Backbone, v, i, j) = (backbone.coords[i, j] .= v)

@inline Base.length(backbone::Backbone) = size(backbone, 2)
@inline Base.size(backbone::Backbone) = Tuple(size(backbone.coords, 2))
@inline Base.getindex(backbone::Backbone, i) = Backbone(backbone.coords[:, i])
@inline Base.view(backbone::Backbone, i) = Backbone(view(backbone.coords, :, i))
@inline Base.setindex!(backbone::Backbone, v, i) = (backbone.coords[:, i] .= v)

@inline Base.getindex(backbone::Backbone, i::Integer) = backbone.coords[:, i]
@inline Base.view(backbone::Backbone, i::Integer) = view(backbone.coords, :, i)

Base.copy(backbone::Backbone) = copy(backbone.coords)

Base.:(==)(backbone1::Backbone, backbone2::Backbone) = backbone1.coords == backbone2.coords
Base.:(==)(backbone::Backbone, coords::AbstractMatrix) = backbone.coords == coords
Base.:(==)(coords::AbstractMatrix, backbone::Backbone) = coords == backbone.coords

Base.hash(backbone::Backbone, h::UInt) = hash(backbone.coords, h)
35 changes: 14 additions & 21 deletions src/bonds.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
export
get_atom_displacements,
get_atom_distances,
get_bond_vectors,
get_bond_lengths,
get_bond_angles,
get_dihedrals,
ChainedBonds,
append_bonds!,
append_bonds,
prepend_bonds!,
prepend_bonds
export get_atom_displacements, get_atom_distances
export get_bond_vectors, get_bond_lengths, get_bond_angles, get_dihedrals
export ChainedBonds
export append_bonds!, append_bonds
export prepend_bonds!, prepend_bonds

using LinearAlgebra
import Rotations: AngleAxis
Expand All @@ -27,9 +20,9 @@ function get_atom_distances(backbone::Backbone, start::Integer, step::Integer, s
return column_norms(get_atom_displacements(backbone, start, step, stride))
end

get_bond_lengths(bond_vectors::AbstractMatrix{<:Real}) = column_norms(bond_vectors)
_get_bond_lengths(bond_vectors::AbstractMatrix{<:Real}) = column_norms(bond_vectors)

function get_bond_angles(bond_vectors::AbstractMatrix{T}) where T
function _get_bond_angles(bond_vectors::AbstractMatrix{T}) where T
us = @view(bond_vectors[:, begin:end-1])
vs = @view(bond_vectors[:, begin+1:end])
return π .- acos.(clamp.(column_dots(us, vs) ./ (column_norms(us) .* column_norms(vs)), -one(T), one(T)))
Expand All @@ -42,7 +35,7 @@ function batched_cross_product(A::AbstractMatrix{T}, B::AbstractMatrix{T}) where
return [C1 C2 C3]'
end

function get_dihedrals(bond_vectors::AbstractMatrix{<:Real})
function _get_dihedrals(bond_vectors::AbstractMatrix{<:Real})
crosses = batched_cross_product(@view(bond_vectors[:, begin:end-1]), @view(bond_vectors[:, begin+1:end]))
normalized_crosses = normalize_columns(crosses)
cross_crosses = batched_cross_product(@view(normalized_crosses[:, begin:end-1]), @view(normalized_crosses[:, begin+1:end]))
Expand All @@ -54,9 +47,9 @@ function get_dihedrals(bond_vectors::AbstractMatrix{<:Real})
end

get_bond_vectors(backbone::Backbone) = get_atom_displacements(backbone, 1, 1, 1)
get_bond_lengths(backbone::Backbone) = get_atom_distances(backbone, 1, 1, 1)
get_bond_angles(backbone::Backbone) = get_bond_angles(get_bond_vectors(backbone))
get_dihedrals(backbone::Backbone) = get_dihedrals(get_bond_vectors(backbone))
get_bond_lengths(backbone::Backbone) = _get_bond_lengths(get_bond_vectors(backbone))
get_bond_angles(backbone::Backbone) = _get_bond_angles(get_bond_vectors(backbone))
get_dihedrals(backbone::Backbone) = _get_dihedrals(get_bond_vectors(backbone))

"""
ChainedBonds{T <: Real, V <: AbstractVector{T}}
Expand Down Expand Up @@ -101,9 +94,9 @@ Base.reverse(bonds::ChainedBonds) = reverse!(deepcopy(bonds))

function ChainedBonds(backbone::Backbone)
bond_vectors = get_bond_vectors(backbone)
lengths = get_bond_lengths(bond_vectors)
angles = get_bond_angles(bond_vectors)
dihedrals = get_dihedrals(bond_vectors)
lengths = _get_bond_lengths(bond_vectors)
angles = _get_bond_angles(bond_vectors)
dihedrals = _get_dihedrals(bond_vectors)
return ChainedBonds(lengths, angles, dihedrals)
end

Expand Down
2 changes: 1 addition & 1 deletion src/knots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ function is_knotted(
backbone::Backbone,
metrics::Vector{Function} = [triangle_distance, triangle_area],
)
points = SVector{3}.(eachcol(backbone)) # convert to StaticArrays for 40x performance lmao
points = SVector{3}.(backbone) # convert to StaticArrays for 40x performance lmao
for metric in metrics
length(simplify(points, metric)) == 2 && return false
end
Expand Down
16 changes: 8 additions & 8 deletions src/protein/chain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,29 @@ struct Chain <: AbstractVector{Residue}
backbone::Backbone
modelnum::Int
resnums::Vector{Int}
ins_codes::Vector{Char}
aavector::Vector{Char}
ssvector::Vector{Char}
residue_atoms_dict::Dict{Int, Vector{Atom}}
residue_atoms::Vector{Vector{Atom}}

function Chain(
backbone::Backbone;
id::AbstractString = "A",
modelnum::Int = 1,
resnums::Vector{Int} = collect(1:length(backbone) ÷ 3),
ins_codes::Vector{Char} = fill(' ', length(backbone) ÷ 3),
aavector::Vector{Char} = fill('G', length(backbone) ÷ 3),
ssvector::Union{Vector{Char}, Vector{<:Integer}} = fill(' ', length(backbone) ÷ 3),
residue_atoms_dict::Dict{Int, Vector{Atom}} = Dict{Int, Vector{Atom}}(i => Atom[] for i in resnums),
residue_atoms::Vector{Vector{Atom}} = [Atom[] for i in resnums],
)
L, r = divrem(length(backbone), 3)
iszero(r) || throw(ArgumentError("backbone must have a length divisible by 3"))
length(resnums) == L || throw(ArgumentError("length of resnums must be equal to length of backbone divided by 3"))
length(ins_codes) == L || throw(ArgumentError("length of ins_codes must be equal to length of backbone divided by 3"))
length(aavector) == L || throw(ArgumentError("length of aavector must be equal to length of backbone divided by 3"))
length(ssvector) == L || throw(ArgumentError("length of ssvector must be equal to length of backbone divided by 3"))
ssvector isa Vector{<:Integer} && (ssvector = get.(('-', 'H', 'E'), ssvector, ' '))
chain = new(id, backbone, modelnum, resnums, aavector, ssvector, residue_atoms_dict)
chain = new(id, backbone, modelnum, resnums, ins_codes, aavector, ssvector, residue_atoms)
assign_missing_backbone_atoms!(chain)
return chain
end
Expand All @@ -52,11 +55,11 @@ end
@inline Base.:(==)(chain1::Chain, chain2::Chain) = chain1.id == chain2.id && chain1.backbone == chain2.backbone && chain1.ssvector == chain2.ssvector
@inline Base.length(chain::Chain) = length(chain.backbone) ÷ 3
@inline Base.size(chain::Chain) = Tuple(length(chain))
@inline Base.getindex(chain::Chain, i::Integer) = Residue(chain.resnums[i], chain.residue_atoms_dict[chain.resnums[i]], chain.aavector[i], chain.ssvector[i])
@inline Base.getindex(chain::Chain, i::Integer) = Residue(chain.resnums[i], chain.residue_atoms[i], chain.aavector[i], chain.ssvector[i], chain.ins_codes[i])

function Base.getindex(chain::Protein.Chain, I::AbstractVector{<:Integer})
backbone = Backbone(reshape(chain.backbone.coords, 3, 3, :)[:, :, I])
Protein.Chain(backbone; id=chain.id, modelnum=chain.modelnum, resnums=chain.resnums[I], aavector=chain.aavector[I], ssvector=chain.ssvector[I])
Protein.Chain(backbone; id=chain.id, modelnum=chain.modelnum, resnums=chain.resnums[I], ins_codes=chain.ins_codes[I], aavector=chain.aavector[I], ssvector=chain.ssvector[I], residue_atoms=chain.residue_atoms[I])
end

Base.summary(chain::Chain) = "Chain $(chain.id) with $(length(chain)) residue$(length(chain) == 1 ? "" : "s")"
Expand All @@ -81,9 +84,6 @@ function assign_missing_backbone_atoms!(chain::Chain)
end
end

# oxygen_coords function in src/protein/oxygen.jl
# TODO: hydrogen_coords

"""
nitrogen_coords(chain::Chain)
nitrogen_coords(backbone::Backbone)
Expand Down
52 changes: 30 additions & 22 deletions src/protein/pdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,29 @@ protein_backbone(chain::BioStructures.Chain, res_selector) = Backboner.Backbone(
aminoacid_sequence(residues::Vector{<:BioStructures.AbstractResidue}) = map(oneletter_resname, residues)
aminoacid_sequence(chain::BioStructures.Chain, res_selector) = aminoacid_sequence(getresidues(chain, res_selector))

function get_residue_atoms_dict(residues::Vector{<:BioStructures.AbstractResidue})
atoms = Dict{Int, Vector{Atom}}()
function get_residue_atoms(residues::Vector{<:BioStructures.AbstractResidue})
residue_atoms = Vector{Atom}[]
for res in residues
append!(get!(atoms, res.number, Atom[]), Atom.(BioStructures.collectatoms(res, a -> BioStructures.standardselector(a) && !BioStructures.disorderselector(a))))
push!(residue_atoms, Atom.(BioStructures.collectatoms(res, a -> BioStructures.standardselector(a) && !BioStructures.disorderselector(a))))
end
return atoms
return residue_atoms
end

function Protein.Chain(residues::Vector{<:BioStructures.AbstractResidue})
id = only(unique(BioStructures.chainid.(residues)))
backbone = protein_backbone(residues)
aavector = aminoacid_sequence(residues)
resnums = BioStructures.resnumber.(residues)
ins_codes = BioStructures.inscode.(residues)
modelnum = only(unique(BioStructures.modelnumber.(residues)))
residue_atoms_dict = get_residue_atoms_dict(residues)
return Protein.Chain(id, backbone; resnums, aavector, modelnum, residue_atoms_dict)
residue_atoms = get_residue_atoms(residues)
return Protein.Chain(backbone; id, resnums, ins_codes, aavector, modelnum, residue_atoms)
end

function Protein.Chain(chain::BioStructures.Chain; res_selector=backboneselector)
residues = getresidues(chain, res_selector)
isempty(residues) && return Protein.Chain(
BioStructures.chainid(chain), Backbone{Float32}(undef, 0); resnums=Int[], aavector=Char[], modelnum=BioStructures.modelnumber(chain))
BioStructures.chainid(chain), Backbone(Matrix{Float32}(undef, 3, 0)); modelnum=BioStructures.modelnumber(chain))
return Protein.Chain(residues)
end

Expand All @@ -71,15 +72,13 @@ function readpdb(pdbfile::String)
return chains(struc)
end

import PDBTools

"""
writepdb(protein::Vector{Protein.Chain}, filename)
Write a protein (represented as a `Vector{Protein.Chain}`s) to a PDB file.
"""
function writepdb(protein::Vector{Chain}, filename, header=:auto, footer=:auto)
all_atoms = PDBTools.Atom[]
function writepdb(protein::Vector{Chain}, filename)
atom_records = BioStructures.AtomRecord[]
index = 0
residue_index = 0
for chain in protein
Expand All @@ -94,19 +93,28 @@ function writepdb(protein::Vector{Chain}, filename, header=:auto, footer=:auto)
]
for atom in residue_atoms
index += 1
push!(all_atoms, PDBTools.Atom(
index = index,
name = atom.name,
resname = resname,
chain = chain.id,
resnum = residue.num,
residue = residue_index,
x = atom.coords[1],
y = atom.coords[2],
z = atom.coords[3],
push!(atom_records, BioStructures.AtomRecord(
false,
index,
atom.name,
' ',
resname,
chain.id,
residue.num,
residue.ins_code,
atom.coords,
1.0,
0.0,
strip(atom.name)[1:1],
"",
))
end
end
end
PDBTools.writePDB(all_atoms, filename, header=header, footer=footer)
pdblines = BioStructures.pdbline.(atom_records)
io = open(filename, "w")
for line in pdblines
println(io, line)
end
close(io)
end
10 changes: 6 additions & 4 deletions src/protein/residue.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,22 @@ export Residue

const threeletter_aa_names = Dict{Char, String}([Char(v) => k for (k, v) in BioStructures.threeletter_to_aa])

struct Residue #<: AbstractVector{Atom}
struct Residue
num::Integer
atoms::Vector{Atom}
aa::Char
ss::Char
ins_code::Char

function Residue(num::Integer, atoms::Vector{Atom}, aa::Char='G', ss::Char=' ')
return new(num, atoms, aa, ss)
function Residue(num::Integer, atoms::Vector{Atom}, aa::Char='G', ss::Char=' ', ins_code::Char=' ')
return new(num, atoms, aa, ss, ins_code)
end
end

function Base.show(io::IO, residue::Residue)
num = residue.num
aa = get(threeletter_aa_names, residue.aa, "XXX")
ss = residue.ss == ' ' ? "" : "($(residue.ss))"
print(io, "Residue $num $aa with $(length(residue.atoms)) atom$(length(residue.atoms) == 1 ? "" : "s") $ss")
ins_code = residue.ins_code == ' ' ? "" : residue.ins_code
print(io, "Residue $num$ins_code $aa with $(length(residue.atoms)) atom$(length(residue.atoms) == 1 ? "" : "s") $ss")
end
Loading

2 comments on commit 35a27e8

@AntonOresten
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release notes:

  • Add support for residue "ins codes" (those letters next to residue numbers).
  • Make Backbone{T} a subtype of AbstractVector{AbstractVector{T}}, instead of AbstractMatrix{T}.
  • Only allow linear indexing of backbones.
  • Remove get_bond_lengths(bond_vectors), get_bond_angles(bond_vectors), and get_dihedrals(bond_vectors) methods, in favor of only having the method for backbones, which calls _get_bond_lengths(bond_vectors), _get_bond_angles(bond_vectors), and _get_dihedrals(bond_vectors).
  • Use BioStructures instead of PDBTools for writing.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/108219

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.10.0 -m "<description of version>" 35a27e823dbb643188720a3b2ed085d8c36a413f
git push origin v0.10.0

Please sign in to comment.