From 4b53affbc5cbbfffe904458dd81a039dd54abe32 Mon Sep 17 00:00:00 2001 From: Alexander Sadovnikov Date: Mon, 23 Mar 2020 22:47:17 +0300 Subject: [PATCH] Version 0.1.2.9. `filterAtomsOfModel` function added (#28) --- ChangeLog.md | 6 +++ package.yaml | 2 +- src/Bio/MAE.hs | 6 +-- src/Bio/Structure.hs | 14 ++++-- src/Bio/Structure/Functions.hs | 59 +++++++++++++++++++++++++ test/MAESpec.hs | 8 ---- test/Spec.hs | 5 ++- test/StructureSpec.hs | 79 ++++++++++++++++++++++++++++++++++ 8 files changed, 163 insertions(+), 16 deletions(-) create mode 100644 src/Bio/Structure/Functions.hs create mode 100644 test/StructureSpec.hs diff --git a/ChangeLog.md b/ChangeLog.md index 51abaee..658d4aa 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -2,6 +2,12 @@ ## [Unreleased] +## [0.1.2.9] - 2020-03-12 +### Added +- Function `filterAtomsOfModel` to filter atoms of model by the given predicate. +### Fixed +- Grouping by residues when converting `Mae` to `Model`. + ## [0.1.2.8] - 2020-03-12 ### Added - `instance Traversable (Sequence mk w)`. diff --git a/package.yaml b/package.yaml index 7257dcc..3923f7e 100644 --- a/package.yaml +++ b/package.yaml @@ -1,5 +1,5 @@ name: cobot-io -version: 0.1.2.8 +version: 0.1.2.9 github: "less-wrong/cobot-io" license: BSD3 category: Bio diff --git a/src/Bio/MAE.hs b/src/Bio/MAE.hs index 4545176..993cc2d 100644 --- a/src/Bio/MAE.hs +++ b/src/Bio/MAE.hs @@ -1,5 +1,5 @@ -{-# LANGUAGE CPP #-} {-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Bio.MAE @@ -36,7 +36,7 @@ import Data.Vector (Vector) import qualified Data.Vector as V (fromList) import Linear.V3 (V3 (..)) #if !MIN_VERSION_base(4,13,0) -import Control.Monad.Fail (MonadFail(..)) +import Control.Monad.Fail (MonadFail (..)) import Prelude hiding (fail) #endif @@ -118,7 +118,7 @@ instance StructureModels Mae where residues = V.fromList $ fmap groupToResidue groupedByResidues by :: Int -> (Int, Text) - by i = (unsafeGetFromContents "i_m_residue_number" i, unsafeGetFromContents "s_m_pdb_residue_name" i) + by i = (unsafeGetFromContents "i_m_residue_number" i, getFromContents defaultChainName "s_m_insertion_code" i) defaultChainName :: Text defaultChainName = "A" diff --git a/src/Bio/Structure.hs b/src/Bio/Structure.hs index 195a10d..e4e78c1 100644 --- a/src/Bio/Structure.hs +++ b/src/Bio/Structure.hs @@ -31,10 +31,10 @@ data SecondaryStructure = PiHelix -- ^ pi helix instance NFData SecondaryStructure newtype GlobalID = GlobalID { getGlobalID :: Int } - deriving (Eq, Show, Generic, NFData) + deriving (Eq, Show, Ord, Generic, NFData) newtype LocalID = LocalID { getLocalID :: Int } - deriving (Eq, Show, Generic, NFData) + deriving (Eq, Show, Ord, Generic, NFData) -- | Generic atom representation -- @@ -56,7 +56,15 @@ data Bond m = Bond { bondStart :: m -- ^ index of first incident atom , bondEnd :: m -- ^ index of second incident atom , bondOrder :: Int -- ^ the order of chemical bond } - deriving (Show, Eq, Generic) + deriving (Show, Eq, Functor, Generic) + +instance Ord (Bond LocalID) where + (Bond (LocalID x) (LocalID y) _) <= (Bond (LocalID x') (LocalID y') _) | x == x' = y <= y' + | otherwise = x <= x' + +instance Ord (Bond GlobalID) where + (Bond (GlobalID x) (GlobalID y) _) <= (Bond (GlobalID x') (GlobalID y') _) | x == x' = y <= y' + | otherwise = x <= x' instance NFData a => NFData (Bond a) diff --git a/src/Bio/Structure/Functions.hs b/src/Bio/Structure/Functions.hs new file mode 100644 index 0000000..bb17185 --- /dev/null +++ b/src/Bio/Structure/Functions.hs @@ -0,0 +1,59 @@ +module Bio.Structure.Functions + ( filterAtomsOfModel + ) where + +import Bio.Structure (Atom (..), Bond (..), Chain (..), + GlobalID (..), LocalID (..), Model (..), + Residue (..)) +import qualified Data.Map.Strict as M (fromList, (!)) +import Data.Set (Set) +import qualified Data.Set as S (fromList, notMember, unions) +import Data.Vector (Vector) +import qualified Data.Vector as V (filter, fromList, length, toList, unzip) + +-- | Takes predicate on 'Atom's of 'Model' and returns new 'Model' containing only atoms +-- satisfying given predicate. +-- +filterAtomsOfModel :: (Atom -> Bool) -> Model -> Model +filterAtomsOfModel p Model{..} = Model newChains newBonds + where + removePred = not . p + (newChains, indss) = V.unzip $ fmap (removeAtomsFromChain removePred) modelChains + + inds = S.unions $ V.toList indss + newBonds = V.filter (\(Bond l r _) -> l `S.notMember` inds && r `S.notMember` inds) modelBonds + +removeAtomsFromChain :: (Atom -> Bool) -> Chain -> (Chain, Set GlobalID) +removeAtomsFromChain p Chain{..} = (Chain chainName newResidues, S.unions $ V.toList indss) + where + (newResidues, indss) = V.unzip $ fmap (removeAtomsFromResidue p) chainResidues + +removeAtomsFromResidue :: (Atom -> Bool) -> Residue -> (Residue, Set GlobalID) +removeAtomsFromResidue p r'@Residue{..} = (res, S.fromList $ V.toList $ fmap atomId withAtom) + where + (withAtom, withoutAtom, indsToDelete) = partitionAndInds resAtoms + + oldIndsToNew = M.fromList $ fmap (\i -> (i, newInd i)) [0 .. V.length resAtoms - 1] + newBonds = fmap modifyBond $ V.filter leaveBond resBonds + + res = r' { resAtoms=withoutAtom, resBonds=newBonds } + + leaveBond :: Bond LocalID -> Bool + leaveBond (Bond (LocalID l) (LocalID r) _) = l `notElem` indsToDelete && r `notElem` indsToDelete + + modifyBond :: Bond LocalID -> Bond LocalID + modifyBond (Bond (LocalID l) (LocalID r) t) = Bond (LocalID $ oldIndsToNew M.! l) + (LocalID $ oldIndsToNew M.! r) + t + + newInd :: Int -> Int + newInd i = i - (length $ filter (< i) indsToDelete) + + partitionAndInds :: Vector Atom -> (Vector Atom, Vector Atom, [Int]) + partitionAndInds = go 0 ([], [], []) . V.toList + where + go :: Int -> ([Atom], [Atom], [Int]) -> [Atom] -> (Vector Atom, Vector Atom, [Int]) + go _ (sat, notSat, inds) [] = (V.fromList $ reverse sat, V.fromList $ reverse notSat, reverse inds) + go i (sat, notSat, inds) (x : xs) = go (i + 1) newState xs + where + newState = if p x then (x : sat, notSat, i : inds) else (sat, x : notSat, inds) diff --git a/test/MAESpec.hs b/test/MAESpec.hs index f4d5053..887dd46 100644 --- a/test/MAESpec.hs +++ b/test/MAESpec.hs @@ -88,11 +88,3 @@ maeSpec = describe "Mae spec." $ do toSet :: Ord a => Vector a -> Set a toSet = S.fromList . V.toList - -instance Ord (Bond LocalID) where - (Bond (LocalID x) (LocalID y) _) <= (Bond (LocalID x') (LocalID y') _) | x == x' = y <= y' - | otherwise = x <= x' - -instance Ord (Bond GlobalID) where - (Bond (GlobalID x) (GlobalID y) _) <= (Bond (GlobalID x') (GlobalID y') _) | x == x' = y <= y' - | otherwise = x <= x' diff --git a/test/Spec.hs b/test/Spec.hs index 7dc34a0..df6f0ed 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -7,11 +7,12 @@ import GBWriterSpec import MAEParserSpec import MAESpec import MMTFSpec +import PDBSpec import SequenceSpec +import StructureSpec import System.IO import Test.Hspec import UniprotSpec -import PDBSpec main :: IO () main = do @@ -52,3 +53,5 @@ main = do repeatedStringsSpecP emptyRemarkSpecP emptyModelSpecP + -- Structure + structureSpec diff --git a/test/StructureSpec.hs b/test/StructureSpec.hs new file mode 100644 index 0000000..cd626ed --- /dev/null +++ b/test/StructureSpec.hs @@ -0,0 +1,79 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module StructureSpec where + +import Bio.MAE (fromFile) +import Bio.Structure (Atom (..), Bond (..), Chain (..), + LocalID (..), Model (..), + Residue (..), StructureModels (..)) +import Bio.Structure.Functions (filterAtomsOfModel) +import Control.Monad (join) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as M (fromList, (!)) +import Data.Maybe (fromJust, isJust) +import Data.Set (Set) +import qualified Data.Set as S (fromList, member) +import Data.Vector (Vector) +import qualified Data.Vector as V (all, filter, find, fromList, + toList, zip) +import Test.Hspec + +structureSpec :: Spec +structureSpec = describe "Structure spec." $ do + m <- runIO $ V.toList . modelsOf <$> fromFile "test/MAE/Capri.mae" >>= \[x] -> pure x + + it "atoms filtering works correctly. only N, CA, C" $ checkFiltering m $ (`elem` ["N", "CA", "C"]) . atomName + it "atoms filtering works correctly. only CA" $ checkFiltering m $ (== "CA") . atomName + it "atoms filtering works correctly. no atoms" $ checkFiltering m $ const False + it "atoms filtering works correctly. all atoms" $ checkFiltering m $ const True + where + checkFiltering :: Model -> (Atom -> Bool) -> Expectation + checkFiltering m p = do + checkAtoms + checkGlobalBonds + checkLocalBonds + where + m' = filterAtomsOfModel p m + + checkAtoms :: Expectation + checkAtoms = V.all (V.all (V.all p . resAtoms) . chainResidues) (modelChains m') `shouldBe` True + + checkGlobalBonds :: Expectation + checkGlobalBonds = all (\(Bond l r _) -> l `S.member` inds && r `S.member` inds) (modelBonds m') `shouldBe` True + where + filteredAtoms = join $ fmap (join . fmap (V.filter p . resAtoms) . chainResidues) $ modelChains m + inds = vecToSet $ fmap atomId filteredAtoms + + checkLocalBonds :: Expectation + checkLocalBonds = all checkResiduePair pairsOfResidues `shouldBe` True + where + pairsOfResidues = zip (V.toList (join $ chainResidues <$> modelChains m)) + (V.toList (join $ chainResidues <$> modelChains m')) + + checkResiduePair :: (Residue, Residue) -> Bool + checkResiduePair (r, r') = vecToSet mappedBonds == vecToSet (resBonds r') + where + atInds = atomsWithIndices r + atInds' = atomsWithIndices r' + + mappedBonds = fromJust <$> V.filter isJust (mapBond <$> resBonds r) + + mapBond :: Bond LocalID -> Maybe (Bond LocalID) + mapBond (Bond l k t) = Bond <$> localMapping M.! l + <*> localMapping M.! k + <*> pure t + + localMapping :: Map LocalID (Maybe LocalID) + localMapping = M.fromList $ V.toList forMap + where + forMap = fmap (\(a, i) -> (i, snd <$> V.find ((== atomId a) . atomId . fst) atInds')) atInds + + atomsWithIndices :: Residue -> Vector (Atom, LocalID) + atomsWithIndices Residue{..} = V.zip resAtoms $ fmap LocalID $ V.fromList [0 ..] + + vecToSet :: Ord a => Vector a -> Set a + vecToSet = S.fromList . V.toList +