Skip to content

Commit

Permalink
Version 0.1.2.9. filterAtomsOfModel function added (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexKaneRUS authored Mar 23, 2020
1 parent 94c652e commit 4b53aff
Show file tree
Hide file tree
Showing 8 changed files with 163 additions and 16 deletions.
6 changes: 6 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## [Unreleased]

## [0.1.2.9] - 2020-03-12
### Added
- Function `filterAtomsOfModel` to filter atoms of model by the given predicate.
### Fixed
- Grouping by residues when converting `Mae` to `Model`.

## [0.1.2.8] - 2020-03-12
### Added
- `instance Traversable (Sequence mk w)`.
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.2.8
version: 0.1.2.9
github: "less-wrong/cobot-io"
license: BSD3
category: Bio
Expand Down
6 changes: 3 additions & 3 deletions src/Bio/MAE.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}

module Bio.MAE
Expand Down Expand Up @@ -36,7 +36,7 @@ import Data.Vector (Vector)
import qualified Data.Vector as V (fromList)
import Linear.V3 (V3 (..))
#if !MIN_VERSION_base(4,13,0)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Fail (MonadFail (..))
import Prelude hiding (fail)
#endif

Expand Down Expand Up @@ -118,7 +118,7 @@ instance StructureModels Mae where
residues = V.fromList $ fmap groupToResidue groupedByResidues

by :: Int -> (Int, Text)
by i = (unsafeGetFromContents "i_m_residue_number" i, unsafeGetFromContents "s_m_pdb_residue_name" i)
by i = (unsafeGetFromContents "i_m_residue_number" i, getFromContents defaultChainName "s_m_insertion_code" i)

defaultChainName :: Text
defaultChainName = "A"
Expand Down
14 changes: 11 additions & 3 deletions src/Bio/Structure.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ data SecondaryStructure = PiHelix -- ^ pi helix
instance NFData SecondaryStructure

newtype GlobalID = GlobalID { getGlobalID :: Int }
deriving (Eq, Show, Generic, NFData)
deriving (Eq, Show, Ord, Generic, NFData)

newtype LocalID = LocalID { getLocalID :: Int }
deriving (Eq, Show, Generic, NFData)
deriving (Eq, Show, Ord, Generic, NFData)

-- | Generic atom representation
--
Expand All @@ -56,7 +56,15 @@ data Bond m = Bond { bondStart :: m -- ^ index of first incident atom
, bondEnd :: m -- ^ index of second incident atom
, bondOrder :: Int -- ^ the order of chemical bond
}
deriving (Show, Eq, Generic)
deriving (Show, Eq, Functor, Generic)

instance Ord (Bond LocalID) where
(Bond (LocalID x) (LocalID y) _) <= (Bond (LocalID x') (LocalID y') _) | x == x' = y <= y'
| otherwise = x <= x'

instance Ord (Bond GlobalID) where
(Bond (GlobalID x) (GlobalID y) _) <= (Bond (GlobalID x') (GlobalID y') _) | x == x' = y <= y'
| otherwise = x <= x'

instance NFData a => NFData (Bond a)

Expand Down
59 changes: 59 additions & 0 deletions src/Bio/Structure/Functions.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
module Bio.Structure.Functions
( filterAtomsOfModel
) where

import Bio.Structure (Atom (..), Bond (..), Chain (..),
GlobalID (..), LocalID (..), Model (..),
Residue (..))
import qualified Data.Map.Strict as M (fromList, (!))
import Data.Set (Set)
import qualified Data.Set as S (fromList, notMember, unions)
import Data.Vector (Vector)
import qualified Data.Vector as V (filter, fromList, length, toList, unzip)

-- | Takes predicate on 'Atom's of 'Model' and returns new 'Model' containing only atoms
-- satisfying given predicate.
--
filterAtomsOfModel :: (Atom -> Bool) -> Model -> Model
filterAtomsOfModel p Model{..} = Model newChains newBonds
where
removePred = not . p
(newChains, indss) = V.unzip $ fmap (removeAtomsFromChain removePred) modelChains

inds = S.unions $ V.toList indss
newBonds = V.filter (\(Bond l r _) -> l `S.notMember` inds && r `S.notMember` inds) modelBonds

removeAtomsFromChain :: (Atom -> Bool) -> Chain -> (Chain, Set GlobalID)
removeAtomsFromChain p Chain{..} = (Chain chainName newResidues, S.unions $ V.toList indss)
where
(newResidues, indss) = V.unzip $ fmap (removeAtomsFromResidue p) chainResidues

removeAtomsFromResidue :: (Atom -> Bool) -> Residue -> (Residue, Set GlobalID)
removeAtomsFromResidue p r'@Residue{..} = (res, S.fromList $ V.toList $ fmap atomId withAtom)
where
(withAtom, withoutAtom, indsToDelete) = partitionAndInds resAtoms

oldIndsToNew = M.fromList $ fmap (\i -> (i, newInd i)) [0 .. V.length resAtoms - 1]
newBonds = fmap modifyBond $ V.filter leaveBond resBonds

res = r' { resAtoms=withoutAtom, resBonds=newBonds }

leaveBond :: Bond LocalID -> Bool
leaveBond (Bond (LocalID l) (LocalID r) _) = l `notElem` indsToDelete && r `notElem` indsToDelete

modifyBond :: Bond LocalID -> Bond LocalID
modifyBond (Bond (LocalID l) (LocalID r) t) = Bond (LocalID $ oldIndsToNew M.! l)
(LocalID $ oldIndsToNew M.! r)
t

newInd :: Int -> Int
newInd i = i - (length $ filter (< i) indsToDelete)

partitionAndInds :: Vector Atom -> (Vector Atom, Vector Atom, [Int])
partitionAndInds = go 0 ([], [], []) . V.toList
where
go :: Int -> ([Atom], [Atom], [Int]) -> [Atom] -> (Vector Atom, Vector Atom, [Int])
go _ (sat, notSat, inds) [] = (V.fromList $ reverse sat, V.fromList $ reverse notSat, reverse inds)
go i (sat, notSat, inds) (x : xs) = go (i + 1) newState xs
where
newState = if p x then (x : sat, notSat, i : inds) else (sat, x : notSat, inds)
8 changes: 0 additions & 8 deletions test/MAESpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,3 @@ maeSpec = describe "Mae spec." $ do

toSet :: Ord a => Vector a -> Set a
toSet = S.fromList . V.toList

instance Ord (Bond LocalID) where
(Bond (LocalID x) (LocalID y) _) <= (Bond (LocalID x') (LocalID y') _) | x == x' = y <= y'
| otherwise = x <= x'

instance Ord (Bond GlobalID) where
(Bond (GlobalID x) (GlobalID y) _) <= (Bond (GlobalID x') (GlobalID y') _) | x == x' = y <= y'
| otherwise = x <= x'
5 changes: 4 additions & 1 deletion test/Spec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ import GBWriterSpec
import MAEParserSpec
import MAESpec
import MMTFSpec
import PDBSpec
import SequenceSpec
import StructureSpec
import System.IO
import Test.Hspec
import UniprotSpec
import PDBSpec

main :: IO ()
main = do
Expand Down Expand Up @@ -52,3 +53,5 @@ main = do
repeatedStringsSpecP
emptyRemarkSpecP
emptyModelSpecP
-- Structure
structureSpec
79 changes: 79 additions & 0 deletions test/StructureSpec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}

module StructureSpec where

import Bio.MAE (fromFile)
import Bio.Structure (Atom (..), Bond (..), Chain (..),
LocalID (..), Model (..),
Residue (..), StructureModels (..))
import Bio.Structure.Functions (filterAtomsOfModel)
import Control.Monad (join)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList, (!))
import Data.Maybe (fromJust, isJust)
import Data.Set (Set)
import qualified Data.Set as S (fromList, member)
import Data.Vector (Vector)
import qualified Data.Vector as V (all, filter, find, fromList,
toList, zip)
import Test.Hspec

structureSpec :: Spec
structureSpec = describe "Structure spec." $ do
m <- runIO $ V.toList . modelsOf <$> fromFile "test/MAE/Capri.mae" >>= \[x] -> pure x

it "atoms filtering works correctly. only N, CA, C" $ checkFiltering m $ (`elem` ["N", "CA", "C"]) . atomName
it "atoms filtering works correctly. only CA" $ checkFiltering m $ (== "CA") . atomName
it "atoms filtering works correctly. no atoms" $ checkFiltering m $ const False
it "atoms filtering works correctly. all atoms" $ checkFiltering m $ const True
where
checkFiltering :: Model -> (Atom -> Bool) -> Expectation
checkFiltering m p = do
checkAtoms
checkGlobalBonds
checkLocalBonds
where
m' = filterAtomsOfModel p m

checkAtoms :: Expectation
checkAtoms = V.all (V.all (V.all p . resAtoms) . chainResidues) (modelChains m') `shouldBe` True

checkGlobalBonds :: Expectation
checkGlobalBonds = all (\(Bond l r _) -> l `S.member` inds && r `S.member` inds) (modelBonds m') `shouldBe` True
where
filteredAtoms = join $ fmap (join . fmap (V.filter p . resAtoms) . chainResidues) $ modelChains m
inds = vecToSet $ fmap atomId filteredAtoms

checkLocalBonds :: Expectation
checkLocalBonds = all checkResiduePair pairsOfResidues `shouldBe` True
where
pairsOfResidues = zip (V.toList (join $ chainResidues <$> modelChains m))
(V.toList (join $ chainResidues <$> modelChains m'))

checkResiduePair :: (Residue, Residue) -> Bool
checkResiduePair (r, r') = vecToSet mappedBonds == vecToSet (resBonds r')
where
atInds = atomsWithIndices r
atInds' = atomsWithIndices r'

mappedBonds = fromJust <$> V.filter isJust (mapBond <$> resBonds r)

mapBond :: Bond LocalID -> Maybe (Bond LocalID)
mapBond (Bond l k t) = Bond <$> localMapping M.! l
<*> localMapping M.! k
<*> pure t

localMapping :: Map LocalID (Maybe LocalID)
localMapping = M.fromList $ V.toList forMap
where
forMap = fmap (\(a, i) -> (i, snd <$> V.find ((== atomId a) . atomId . fst) atInds')) atInds

atomsWithIndices :: Residue -> Vector (Atom, LocalID)
atomsWithIndices Residue{..} = V.zip resAtoms $ fmap LocalID $ V.fromList [0 ..]

vecToSet :: Ord a => Vector a -> Set a
vecToSet = S.fromList . V.toList

0 comments on commit 4b53aff

Please sign in to comment.