From 6cf07c04aae4c265d8f8cac3b7baef2d4070db60 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Sun, 30 Jun 2024 23:22:08 +0800 Subject: [PATCH] refactor: add doc string for seq_missing and block_missing; --- pygrinder/block_missing/block_missing.py | 32 +++++++++++++++++++-- pygrinder/sequential_missing/seq_missing.py | 26 +++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/pygrinder/block_missing/block_missing.py b/pygrinder/block_missing/block_missing.py index cd59b4d..8b06014 100644 --- a/pygrinder/block_missing/block_missing.py +++ b/pygrinder/block_missing/block_missing.py @@ -112,12 +112,40 @@ def block_missing( feature_idx: list = None, step_idx: list = None, ) -> Union[np.ndarray, torch.Tensor]: + """Create block missing data. + + Parameters + ---------- + X : + Data vector. If X has any missing values, they should be numpy.nan. + + factor : + The actual missing rate of block_missing is hard to be strictly controlled. + Hence, we use ``factor`` to help adjust the final missing rate. + + block_len : + The length of the mask block. + + block_width : + The width of the mask block. + + feature_idx : + The indices of features for missing block to star with. + + step_idx : + The indices of steps for a missing block to start with. + + Returns + ------- + corrupted_X : + Original X with artificial missing values. + Both originally-missing and artificially-missing values are left as NaN. + + """ if isinstance(X, list): X = np.asarray(X) n_samples, n_steps, n_features = X.shape - # assert 0 < p <= 1, f"p must be in range (0, 1), but got {p}" - assert isinstance( block_len, int ), f"`block_len` must be type of int, but got {type(block_len)}" diff --git a/pygrinder/sequential_missing/seq_missing.py b/pygrinder/sequential_missing/seq_missing.py index a752cde..62d5367 100644 --- a/pygrinder/sequential_missing/seq_missing.py +++ b/pygrinder/sequential_missing/seq_missing.py @@ -109,6 +109,32 @@ def seq_missing( feature_idx: list = None, step_idx: list = None, ) -> Union[np.ndarray, torch.Tensor]: + """Create subsequence missing data. + + Parameters + ---------- + X : + Data vector. If X has any missing values, they should be numpy.nan. + + p : + The probability that values may be masked as missing completely at random. + + seq_len : + The length of missing sequence. + + feature_idx : + The indices of features for missing sequences to be corrupted. + + step_idx : + The indices of steps for a missing sequence to start with. + + Returns + ------- + corrupted_X : + Original X with artificial missing values. + Both originally-missing and artificially-missing values are left as NaN. + + """ if isinstance(X, list): X = np.asarray(X) n_samples, n_steps, n_features = X.shape