From 22d3fcc641dcccae1aaeaf92cd7df763907c7c5d Mon Sep 17 00:00:00 2001 From: Hideaki Takahashi Date: Sun, 7 Apr 2024 13:53:44 +0900 Subject: [PATCH 1/2] Update Doc and Fix Typos (#176) --- README.md | 6 +- docs/source/contribution.rst | 151 +++++++++- src/aijack/attack/backdoor/dba.py | 11 +- .../attack/backdoor/modelreplacement.py | 11 +- src/aijack/attack/evasion/evasion_attack.py | 4 +- .../attack/inversion/generator_attack.py | 2 +- .../attack/inversion/gradientinversion.py | 16 +- src/aijack/attack/labelleakage/normattack.py | 4 +- src/aijack/attack/poison/poison_attack.py | 2 +- src/aijack/collaborative/fedavg/api.py | 15 +- src/aijack/collaborative/fedavg/client.py | 11 +- src/aijack/collaborative/fedgems/api.py | 2 +- src/aijack/collaborative/fedgems/client.py | 2 +- src/aijack/collaborative/fedprox/client.py | 11 +- src/aijack/collaborative/moon/client.py | 11 +- src/aijack/defense/__init__.py | 1 + src/aijack/defense/dp/core/rdp.cpp | 12 +- src/aijack/defense/dp/manager/__init__.py | 4 +- src/aijack/defense/dp/manager/accountant.py | 34 +-- src/aijack/defense/dp/manager/rdp.py | 24 +- src/aijack/defense/soteria/soteria_client.py | 13 +- src/main.cpp | 279 +++++++++--------- 22 files changed, 398 insertions(+), 228 deletions(-) diff --git a/README.md b/README.md index 5474c1f1..adf69e22 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ # What is AIJack? -AIJack is an easy-to-use open-source simulation tool for testing the security of your AI system against hijackers. It provides advanced security techniques like *Differential Privacy*, *Homomorphic Encryption*, *K-anonymity* and *Federated Learning* to guarantee protection for your AI. With AIJack, you can test and simulate defenses against various attacks such as *Poisoning*, *Model Inversion*, *Backdoor*, and *Free-Rider*. We support more than 30 state-of-the-art methods. For more information, check our [documentation](https://koukyosyumei.github.io/AIJack/) and start securing your AI today with AIJack. +AIJack is an easy-to-use open-source simulation tool for testing the security of your AI system against hijackers. It provides advanced security techniques like *Differential Privacy*, *Homomorphic Encryption*, *K-anonymity* and *Federated Learning* to guarantee protection for your AI. With AIJack, you can test and simulate defenses against various attacks such as *Poisoning*, *Model Inversion*, *Backdoor*, and *Free-Rider*. We support more than 30 state-of-the-art methods. For more information, check our [paper](https://arxiv.org/abs/2312.17667) and [documentation](https://koukyosyumei.github.io/AIJack/) and start securing your AI today with AIJack. # Installation @@ -237,6 +237,10 @@ Below you can find a list of papers and books that either use or extend AIJack. - Huang, Shiyuan. A General Framework for Model Adaptation to Meet Practical Constraints in Computer Vision. Diss. Columbia University, 2024. - Liu, Can, Jin Wang, and Dongyang Yu. "RAF-GI: Towards Robust, Accurate and Fast-Convergent Gradient Inversion Attack in Federated Learning." arXiv preprint arXiv:2403.08383 (2024). +# Contribution + +AIJack welcomes contributions of any kind. If you'd like to address a bug or propose a new feature, please refer to [our guide](docs/source/contribution.rst). + # Contact welcome2aijack[@]gmail.com diff --git a/docs/source/contribution.rst b/docs/source/contribution.rst index fcf0c28c..33240b54 100644 --- a/docs/source/contribution.rst +++ b/docs/source/contribution.rst @@ -3,4 +3,153 @@ .. _contribution: Contribution Guide -================== \ No newline at end of file +================== + +Welcome to AIJack's Contribution Guide! + +We're thrilled you're interested in contributing to AIJack. This guide outlines the process for submitting code changes and ensuring they adhere to our project's style and formatting conventions. + +Getting Started +--------------- + +Fork the Repository +^^^^^^^^^^^^^^^^^^^ + +* Head over to the AIJack repository on GitHub (https://github.com/Koukyosyumei/AIJack). +* Click the "Fork" button to create your own copy of the repository. + +Clone Your Fork +^^^^^^^^^^^^^^^ + +* Open your terminal and navigate to your desired local directory. +* Use the git clone command to clone your forked repository: + +.. code-block:: bash + + git clone https://github.com//AIJack.git + # Replace with your GitHub username and with the actual project name. + +Set Up a Development Environment +-------------------------------- + +* Build and install AIJack from source code + +.. code-block:: bash + + cd AIJack + + # install the dependencies + apt install -y libboost-all-dev + pip install -U pip + pip install "pybind11[global]" + + # install the editable version + pip install -e . + + +Coding Style and Formatting +--------------------------- + +Google-Style Docstrings +^^^^^^^^^^^^^^^^^^^^^^^ + +We use Google-style docstrings to provide clear and consistent documentation for functions, classes, and modules. +Refer to the Google Python Style Guide (https://github.com/google/styleguide/blob/gh-pages/pyguide.md) for detailed formatting instructions. + +Black Code Formatter +^^^^^^^^^^^^^^^^^^^^ + +We utilize Black, a popular code formatter, to maintain consistent code style throughout the project. + +Ensure Black is installed (pip install black) in your virtual environment. + +To format your code before committing, run: + +.. code-block:: bash + + black . + + +Isort Import Organizer +^^^^^^^^^^^^^^^^^^^^^^ + +isort helps organize imports in a consistent manner. + +Install isort (pip install isort) in your virtual environment. + +To organize imports, run: + +.. code-block:: bash + + isort . + +Making Changes +-------------- + +Create a Branch +^^^^^^^^^^^^^^^ + +* Use git checkout -b to create a new branch for your changes. Replace with a descriptive name (e.g., fix-issue-123). + +Implement Your Changes +^^^^^^^^^^^^^^^^^^^^^^ + +* Make your code modifications in the appropriate files. +* Adhere to the coding style and formatting conventions outlined above. + +Test Your Changes +^^^^^^^^^^^^^^^^^ + +* Write unit tests (if applicable) to verify your code's functionality and prevent regressions. + +* Run existing tests with pytest to ensure they still pass after your modifications. + +Commit Your Changes +^^^^^^^^^^^^^^^^^^^ + +* Stage your changes using + +.. code-block:: bash + + git add .... + +* Commit your staged changes with a descriptive message using + +.. code-block:: bash + + git commit -m "". + +Push Your Changes to Your Fork +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Push your branch to your forked repository on GitHub: + +.. code-block:: bash + + git push origin + +Submitting a Pull Request +^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Navigate to Your Fork on GitHub: +* Go to your GitHub repository. +* Create a Pull Request: + + * Click on the "Pull requests" tab. + * Click on "New pull request" and select the branch containing your changes. + * Provide a clear and concise title and description for your pull request. + * Click on "Create pull request" to submit it for review. + * Code Review and Merging + +Project maintainers will review your pull request and provide feedback. +Address any comments or suggestions raised during the review process. +Once your pull request is approved, it will be merged into the main project repository. + +Additional Tips +--------------- + +* Consider running black . and isort . before committing your changes to ensure consistent formatting. +* Provide clear and concise commit messages that describe the purpose of your changes. +* If you're unsure about anything, feel free to ask for help! You can create an issue on the project's GitHub repository. + +Thank you for your contribution to AIJack! diff --git a/src/aijack/attack/backdoor/dba.py b/src/aijack/attack/backdoor/dba.py index 5906f1a9..051552c3 100644 --- a/src/aijack/attack/backdoor/dba.py +++ b/src/aijack/attack/backdoor/dba.py @@ -39,7 +39,9 @@ def upload_gradients(self): def local_train( self, local_epoch, criterion, trainloader, optimizer, communication_id=0 ): - for i in range(local_epoch): + loss_log = [] + + for _ in range(local_epoch): running_loss = 0.0 running_data_num = 0 for _, data in enumerate(trainloader, 0): @@ -64,10 +66,9 @@ def local_train( running_loss += loss.item() running_data_num += inputs.shape[0] - print( - f"communication {communication_id}, epoch {i}: client-{self.user_id+1}", - running_loss / running_data_num, - ) + loss_log.append(running_loss / running_data_num) + + return loss_log return DistributedBackdoorAttackClientWrapper diff --git a/src/aijack/attack/backdoor/modelreplacement.py b/src/aijack/attack/backdoor/modelreplacement.py index 423a09e2..29a59f2a 100644 --- a/src/aijack/attack/backdoor/modelreplacement.py +++ b/src/aijack/attack/backdoor/modelreplacement.py @@ -42,7 +42,9 @@ def upload_gradients(self): def local_train( self, local_epoch, criterion, trainloader, optimizer, communication_id=0 ): - for i in range(local_epoch): + loss_log = [] + + for _ in range(local_epoch): if reference_dataloader is not None: running_loss = 0.0 running_data_num = 0 @@ -80,10 +82,9 @@ def local_train( running_loss += loss.item() running_data_num += inputs.shape[0] - print( - f"communication {communication_id}, epoch {i}: client-{self.user_id+1}", - running_loss / running_data_num, - ) + loss_log.append(running_loss / running_data_num) + + return loss_log return ModelReplacementAttackClientWrapper diff --git a/src/aijack/attack/evasion/evasion_attack.py b/src/aijack/attack/evasion/evasion_attack.py index 5801509d..85ec2bbf 100644 --- a/src/aijack/attack/evasion/evasion_attack.py +++ b/src/aijack/attack/evasion/evasion_attack.py @@ -148,7 +148,7 @@ def delta_kernel(xm): return True def _get_delta_p(self, xm): - """Culculates deviation of the estimated density p(xm−1 |yc = −1) + """Calculates deviation of the estimated density p(xm−1 |yc = −1) Args: xm (np.array) : an adversarial example @@ -167,7 +167,7 @@ def _get_delta_p(self, xm): return delta_p def _get_grad_f(self, xm, norm="l1"): - """Culculates deviation of objective function F + """Calculates deviation of objective function F Args: xm (np.array) : an adversarial example diff --git a/src/aijack/attack/inversion/generator_attack.py b/src/aijack/attack/inversion/generator_attack.py index f9ab2e07..c7378faa 100644 --- a/src/aijack/attack/inversion/generator_attack.py +++ b/src/aijack/attack/inversion/generator_attack.py @@ -27,7 +27,7 @@ def __init__( target_model if type(target_model) == list else [target_model] ) - def culc_loss(self, dataloader, x_pos=0, y_pos=1, arbitrary_y=False): + def calc_loss(self, dataloader, x_pos=0, y_pos=1, arbitrary_y=False): running_loss = 0 for data in dataloader: x = data[x_pos] diff --git a/src/aijack/attack/inversion/gradientinversion.py b/src/aijack/attack/inversion/gradientinversion.py index 6c730def..6e8d003b 100644 --- a/src/aijack/attack/inversion/gradientinversion.py +++ b/src/aijack/attack/inversion/gradientinversion.py @@ -94,7 +94,7 @@ def __init__( x_shape: the input shape of target_model. y_shape: the output shape of target_model. optimize_label: If true, only optimize images (the label will be automatically estimated). - gradient_ignore_pos: a list of positions whihc will be ignored during the culculation of + gradient_ignore_pos: a list of positions whihc will be ignored during the calculation of the distance between gradients pos_of_final_fc_layer: position of gradients corresponding to the final FC layer within the gradients received from the client. @@ -175,7 +175,7 @@ def _setup_distancefunc(self, distancename): """Assigns a function to self.distancefunc according to distancename Args: - distancename: name of the function to culculat the distance between the gradients. + distancename: name of the function to calculat the distance between the gradients. currently support 'l2' or 'cossim'. Raises: @@ -225,10 +225,10 @@ def hook(model, inp, output): return hook - def _culc_regularization_term( + def _calc_regularization_term( self, fake_x, fake_pred, fake_label, group_fake_x, received_gradients ): - """Culculates the regularization term + """calculates the regularization term Args: fake_x: reconstructed images @@ -238,7 +238,7 @@ def _culc_regularization_term( received_gradients: gradients received from the client Returns: - culculated regularization term + calculated regularization term """ reg_term = 0 if self.tv_reg_coef != 0: @@ -295,7 +295,7 @@ def closure(): distance = self.distancefunc( fake_gradients, received_gradients, self.gradient_ignore_pos ) - distance += self._culc_regularization_term( + distance += self._calc_regularization_term( fake_x, fake_pred, fake_label, @@ -343,7 +343,7 @@ def attack( a tuple of the best reconstructed images and corresponding labels Raises: - OverflowError: If the culculated distance become Nan + OverflowError: If the calculated distance become Nan """ fake_x, fake_label, optimizer = _setup_attack( self.x_shape, @@ -376,7 +376,7 @@ def attack( fake_x[:] = fake_x.clamp(self.clamp_range[0], self.clamp_range[1]) # if torch.sum(torch.isnan(distance)).item(): - # raise OverflowError("stop because the culculated distance is Nan") + # raise OverflowError("stop because the calculated distance is Nan") if best_distance > distance: best_fake_x = fake_x.detach().clone() diff --git a/src/aijack/attack/labelleakage/normattack.py b/src/aijack/attack/labelleakage/normattack.py index 8eadde0d..c65d3c30 100644 --- a/src/aijack/attack/labelleakage/normattack.py +++ b/src/aijack/attack/labelleakage/normattack.py @@ -19,14 +19,14 @@ def extract_intermidiate_gradient(self, outputs): return self.clients[self.target_client_index].grad_from_next_client def attack(self, dataloader): - """Culculate leak_auc on the given SplitNN model + """Calculates leak_auc on the given SplitNN model reference: https://arxiv.org/abs/2102.08504 Args: dataloader (torch dataloader): dataloader for evaluation criterion: loss function for training device: cpu or GPU Returns: - score: culculated leak auc + score: leak auc """ epoch_labels = [] epoch_g_norm = [] diff --git a/src/aijack/attack/poison/poison_attack.py b/src/aijack/attack/poison/poison_attack.py index eedd310c..721094c2 100644 --- a/src/aijack/attack/poison/poison_attack.py +++ b/src/aijack/attack/poison/poison_attack.py @@ -63,7 +63,7 @@ def _detect_type_of_classifier(self): return True def _delta_q(self, xi, xc, yi, yc): - """Culculate deviation of q + """Calculate deviation of q Q = yy.T * K denotes the label - annotated version of K, and α denotes the SVM’s dual variables corresponding to each training point. diff --git a/src/aijack/collaborative/fedavg/api.py b/src/aijack/collaborative/fedavg/api.py index b17117bc..00753db1 100644 --- a/src/aijack/collaborative/fedavg/api.py +++ b/src/aijack/collaborative/fedavg/api.py @@ -54,15 +54,20 @@ def __init__( for dataset_size in local_dataset_sizes ] + self.logging = {} + def local_train(self, i): + self.logging[i] = {} + for client_idx in range(self.client_num): - self.clients[client_idx].local_train( + loss_log = self.clients[client_idx].local_train( self.local_epoch, self.criterion, self.local_dataloaders[client_idx], self.local_optimizers[client_idx], communication_id=i, ) + self.logging[i][client_idx] = loss_log def run(self): self.server.force_send_model_state_dict = True @@ -106,13 +111,16 @@ def __init__( self.custom_action = custom_action self.device = device + self.logging = [] + def run(self): self.party.mpi_initialize() self.comm.Barrier() for i in range(self.num_communication): if not self.is_server: - self.local_train(i) + loss_logging = self.local_train(i) + self.logging.append(loss_logging) self.party.action() self.custom_action(self) @@ -123,10 +131,11 @@ def local_train(self, com_cnt): for param in self.party.model.parameters(): self.party.prev_parameters.append(copy.deepcopy(param)) - self.party.local_train( + loss_logging = self.party.local_train( self.local_epoch, self.criterion, self.local_dataloader, self.local_optimizer, communication_id=com_cnt, ) + return loss_logging diff --git a/src/aijack/collaborative/fedavg/client.py b/src/aijack/collaborative/fedavg/client.py index afba47c4..061b0119 100644 --- a/src/aijack/collaborative/fedavg/client.py +++ b/src/aijack/collaborative/fedavg/client.py @@ -108,7 +108,9 @@ def download(self, new_global_model): def local_train( self, local_epoch, criterion, trainloader, optimizer, communication_id=0 ): - for i in range(local_epoch): + loss_log = [] + + for _ in range(local_epoch): running_loss = 0.0 running_data_num = 0 for _, data in enumerate(trainloader, 0): @@ -128,10 +130,9 @@ def local_train( running_loss += loss.item() running_data_num += inputs.shape[0] - print( - f"communication {communication_id}, epoch {i}: client-{self.user_id+1}", - running_loss / running_data_num, - ) + loss_log.append(running_loss / running_data_num) + + return loss_log def attach_mpi_to_fedavgclient(cls): diff --git a/src/aijack/collaborative/fedgems/api.py b/src/aijack/collaborative/fedgems/api.py index 4d270406..7277ad91 100644 --- a/src/aijack/collaborative/fedgems/api.py +++ b/src/aijack/collaborative/fedgems/api.py @@ -89,7 +89,7 @@ def train_client_on_public_dataset(self): optimizer.zero_grad() y_preds = client(inputs) - loss = client.culc_loss_on_public_dataset(idx, y_preds, labels) + loss = client.calc_loss_on_public_dataset(idx, y_preds, labels) loss.backward() optimizer.step() diff --git a/src/aijack/collaborative/fedgems/client.py b/src/aijack/collaborative/fedgems/client.py index edc2cf26..57d70418 100644 --- a/src/aijack/collaborative/fedgems/client.py +++ b/src/aijack/collaborative/fedgems/client.py @@ -41,7 +41,7 @@ def local_train(self, local_epoch, criterion, trainloader, optimizer): self, local_epoch, criterion, trainloader, optimizer ) - def culc_loss_on_public_dataset(self, idx, y_pred, y): + def calc_loss_on_public_dataset(self, idx, y_pred, y): y_pred_server = self.predicted_values_of_server[idx] base_loss = self.epsilon * self.base_loss_func(y_pred, y.to(torch.int64)) kl_loss = (1 - self.epsilon) * self.kldiv_loss_func( diff --git a/src/aijack/collaborative/fedprox/client.py b/src/aijack/collaborative/fedprox/client.py index 3d52987e..12783878 100644 --- a/src/aijack/collaborative/fedprox/client.py +++ b/src/aijack/collaborative/fedprox/client.py @@ -13,7 +13,9 @@ def local_train( optimizer, communication_id=0, ): - for i in range(local_epoch): + loss_log = [] + + for _ in range(local_epoch): running_loss = 0.0 running_data_num = 0 for _, data in enumerate(trainloader, 0): @@ -44,7 +46,6 @@ def local_train( running_loss += loss.item() running_data_num += inputs.shape[0] - print( - f"communication {communication_id}, epoch {i}: client-{self.user_id+1}", - running_loss / running_data_num, - ) + loss_log.append(running_loss / running_data_num) + + return loss_log diff --git a/src/aijack/collaborative/moon/client.py b/src/aijack/collaborative/moon/client.py index ee7e15e1..e3099c83 100644 --- a/src/aijack/collaborative/moon/client.py +++ b/src/aijack/collaborative/moon/client.py @@ -51,7 +51,9 @@ def local_train( if param is not None: param = prev_param - for i in range(local_epoch): + loss_log = [] + + for _ in range(local_epoch): running_loss = 0.0 running_data_num = 0 for _, data in enumerate(trainloader, 0): @@ -85,7 +87,6 @@ def local_train( running_loss += loss.item() running_data_num += inputs.shape[0] - print( - f"communication {communication_id}, epoch {i}: client-{self.user_id+1}", - running_loss / running_data_num, - ) + loss_log.append(running_loss / running_data_num) + + return loss_log diff --git a/src/aijack/defense/__init__.py b/src/aijack/defense/__init__.py index cc430ae9..c7bfa761 100644 --- a/src/aijack/defense/__init__.py +++ b/src/aijack/defense/__init__.py @@ -1,6 +1,7 @@ """Subpackage for defense algorithms for machine learning models. """ +from .crobustness import PixelDP # noqa: F401 from .dp import DPSGDManager, GeneralMomentAccountant # noqa: F401 from .mid import VIB, KL_between_normals, mib_loss # noqa:F401 from .paillier import PaillierGradientClientManager, PaillierKeyGenerator # noqa: F401 diff --git a/src/aijack/defense/dp/core/rdp.cpp b/src/aijack/defense/dp/core/rdp.cpp index 854a672c..35ad7664 100644 --- a/src/aijack/defense/dp/core/rdp.cpp +++ b/src/aijack/defense/dp/core/rdp.cpp @@ -69,7 +69,7 @@ double eps_randresp(double alpha, py::dict params) return (1 / (alpha - 1)) * logsumexp(terms, signs); } -double culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int(int alpha, +double calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int(int alpha, py::dict params, double sampling_rate) { @@ -90,7 +90,7 @@ double culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int(int alpha, return log_a / (alpha - 1); } -double culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_frac(double alpha, +double calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_frac(double alpha, py::dict params, double sampling_rate) { @@ -153,24 +153,24 @@ double culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_frac(double alpha, return _log_add(log_a0, log_a1) / (alpha - 1); } -double culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism(double alpha, +double calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism(double alpha, py::dict params, double sampling_rate, const std::function &_eps) { if (fmod(alpha, 1) == 0.0) { - return culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int( + return calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int( (int)alpha, params, sampling_rate); } else { - return culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_frac( + return calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_frac( alpha, params, sampling_rate); } } -double culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019(int alpha, +double calc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019(int alpha, py::dict params, double sampling_rate, const std::function &_eps) diff --git a/src/aijack/defense/dp/manager/__init__.py b/src/aijack/defense/dp/manager/__init__.py index aa1f1a54..f07e96b4 100644 --- a/src/aijack/defense/dp/manager/__init__.py +++ b/src/aijack/defense/dp/manager/__init__.py @@ -1,7 +1,7 @@ from .accountant import ( # noqa: F401 GeneralMomentAccountant, - culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019, - culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism, + calc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019, + calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism, ) from .client import DPSGDClientManager # noqa: F401 from .dp_manager import DPSGDManager # noqa: F401 diff --git a/src/aijack/defense/dp/manager/accountant.py b/src/aijack/defense/dp/manager/accountant.py index fedea75b..f173b7eb 100644 --- a/src/aijack/defense/dp/manager/accountant.py +++ b/src/aijack/defense/dp/manager/accountant.py @@ -5,14 +5,14 @@ _greedy_search_frac, _ternary_search, _ternary_search_int, - culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019, - culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism, + calc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019, + calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism, eps_gaussian, eps_laplace, ) from .rdp import ( - culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism as culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_py, + calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism as calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_py, ) @@ -33,7 +33,7 @@ def __init__( self.max_iterations = max_iterations self.steps_info = [] - self._culc_bound_of_rdp = None + self.calc_bound_of_rdp = None if search == "ternary": self.search = _ternary_search @@ -46,7 +46,7 @@ def __init__( self._cache = {} - def _culc_upperbound_of_rdp_onestep(self, alpha, noise_params, sampling_rate): + def calc_upperbound_of_rdp_onestep(self, alpha, noise_params, sampling_rate): key = hash( f"{alpha}_{list(noise_params.keys())[0]}_{list(noise_params.values())[0]}_{sampling_rate}" ) @@ -56,7 +56,7 @@ def _culc_upperbound_of_rdp_onestep(self, alpha, noise_params, sampling_rate): elif sampling_rate == 1: result = self.eps_func(alpha, noise_params) else: - result = self._culc_bound_of_rdp( + result = self.calc_bound_of_rdp( alpha, noise_params, sampling_rate, self.eps_func ) self._cache[key] = result @@ -64,10 +64,10 @@ def _culc_upperbound_of_rdp_onestep(self, alpha, noise_params, sampling_rate): else: return self._cache[key] - def _culc_upperbound_of_rdp(self, lam, steps_info): + def _calc_upperbound_of_rdp(self, lam, steps_info): rdp = 0.0 for noise_params, sampling_rate, num_steps in steps_info: - rdp += num_steps * self._culc_upperbound_of_rdp_onestep( + rdp += num_steps * self.calc_upperbound_of_rdp_onestep( lam, noise_params, sampling_rate ) return rdp @@ -135,7 +135,7 @@ def get_noise_multiplier( def get_delta(self, epsilon): optimal_lam = self.search( lambda order: (order - 1) - * (self._culc_upperbound_of_rdp(order - 1, self.steps_info) - epsilon), + * (self._calc_upperbound_of_rdp(order - 1, self.steps_info) - epsilon), self.order_min, self.order_max, self.orders, @@ -145,7 +145,7 @@ def get_delta(self, epsilon): min_delta = np.exp( (optimal_lam - 1) - * (self._culc_upperbound_of_rdp(optimal_lam - 1, self.steps_info) - epsilon) + * (self._calc_upperbound_of_rdp(optimal_lam - 1, self.steps_info) - epsilon) ) return min_delta @@ -155,7 +155,7 @@ def get_epsilon(self, delta): def estimate_eps(order): return ( - self._culc_upperbound_of_rdp(order, self.steps_info) + self._calc_upperbound_of_rdp(order, self.steps_info) - (np.log(order) + np.log(delta)) / (order - 1) + np.log((order - 1) / order) ) @@ -208,14 +208,14 @@ def _set_noise_type(self, noise_type): def _set_upperbound_func(self, backend, bound_type): if backend == "cpp" and bound_type == "rdp_upperbound_closedformula": - self._culc_bound_of_rdp = ( - culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism + self.calc_bound_of_rdp = ( + calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism ) elif backend == "python" and bound_type == "rdp_upperbound_closedformula": - self._culc_bound_of_rdp = ( - culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_py + self.calc_bound_of_rdp = ( + calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_py ) elif backend == "cpp" and bound_type == "rdp_tight_upperbound": - self._culc_bound_of_rdp = ( - culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019 + self.calc_bound_of_rdp = ( + calc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019 ) diff --git a/src/aijack/defense/dp/manager/rdp.py b/src/aijack/defense/dp/manager/rdp.py index c26549ae..bc4207fa 100644 --- a/src/aijack/defense/dp/manager/rdp.py +++ b/src/aijack/defense/dp/manager/rdp.py @@ -46,21 +46,21 @@ def eps_randresp(alpha, params): ) -def culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism( +def calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism( alpha, params, sampling_rate, _eps ): """Compute log(A_alpha) for any positive finite alpha.""" if float(alpha).is_integer(): - return culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int( + return calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int( int(alpha), params, sampling_rate ) else: - return culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_float( + return calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_float( alpha, params, sampling_rate ) -def culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int( +def calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int( alpha, params, sampling_rate ): """Renyi Differential Privacy of the Sampled Gaussian Mechanism @@ -81,7 +81,7 @@ def culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_int( return float(log_a) / (alpha - 1) -def culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_float( +def calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_float( alpha, params, sampling_rate ): """Compute log(A_alpha) for fractional alpha. 0 < q < 1.""" @@ -120,7 +120,7 @@ def culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism_float( return _log_add(log_a0, log_a1) / (alpha - 1) -def culc_upperbound_of_rdp_with_theorem27_of_wang_2019( +def calc_upperbound_of_rdp_with_theorem27_of_wang_2019( alpha, params, sampling_rate, _eps ): def B(el): @@ -176,19 +176,19 @@ def logAbsB(el): return (1 / (alpha - 1)) * logsumexp(terms, b=signs) -def culc_first_term_of_general_upper_bound_of_rdp(alpha, sampling_rate): +def calc_first_term_of_general_upper_bound_of_rdp(alpha, sampling_rate): return ((alpha - 1) * np.log(1 - sampling_rate)) + np.log( alpha * sampling_rate - sampling_rate + 1 ) -def culc_general_upperbound_of_rdp_with_theorem5_of_zhu_2019( +def calc_general_upperbound_of_rdp_with_theorem5_of_zhu_2019( alpha, params, sampling_rate, _eps ): terms = [] signs = [] - first = culc_first_term_of_general_upper_bound_of_rdp(alpha, sampling_rate) + first = calc_first_term_of_general_upper_bound_of_rdp(alpha, sampling_rate) terms.append(first) signs.append(1) @@ -213,13 +213,13 @@ def culc_general_upperbound_of_rdp_with_theorem5_of_zhu_2019( return (1 / (alpha - 1)) * logsumexp(terms, b=signs) -def culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019( +def calc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019( alpha, params, sampling_rate, _eps ): terms = [] signs = [] - first = culc_first_term_of_general_upper_bound_of_rdp(alpha, sampling_rate) + first = calc_first_term_of_general_upper_bound_of_rdp(alpha, sampling_rate) terms.append(first) signs.append(1) @@ -234,7 +234,7 @@ def culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019( return (1 / (alpha - 1)) * logsumexp(terms, b=signs) -def culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019_with_tau_estimation( +def calc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019_with_tau_estimation( alpha, params, sampling_rate, _eps, tau=10 ): terms = [] diff --git a/src/aijack/defense/soteria/soteria_client.py b/src/aijack/defense/soteria/soteria_client.py index 66873872..a58ea6e0 100644 --- a/src/aijack/defense/soteria/soteria_client.py +++ b/src/aijack/defense/soteria/soteria_client.py @@ -59,7 +59,7 @@ def action_before_lossbackward(self): mask[:, i] = 1 feature.backward( mask, retain_graph=True - ) # culc the derivative of feature_2 @ df_dtarget + ) # calc the derivative of feature_2 @ df_dtarget dfri_dx = input_data.grad.data r_dfr_dx_norm[:, i] = feature[:, i] / torch.norm( dfri_dx.view(dfri_dx.shape[0], -1), dim=1 @@ -95,7 +95,9 @@ def backward(self, loss): def local_train( self, local_epoch, criterion, trainloader, optimizer, communication_id=0 ): - for i in range(local_epoch): + loss_log = [] + + for _ in range(local_epoch): running_loss = 0.0 running_data_num = 0 for _, data in enumerate(trainloader, 0): @@ -116,10 +118,9 @@ def local_train( running_loss += loss.item() running_data_num += inputs.shape[0] - print( - f"communication {communication_id}, epoch {i}: client-{self.user_id+1}", - running_loss / running_data_num, - ) + loss_log.append(running_loss / running_data_num) + + return loss_log return SoteriaClientWrapper diff --git a/src/main.cpp b/src/main.cpp index 799f45de..b5442b99 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -25,150 +25,151 @@ namespace py = pybind11; template using overload_cast_ = pybind11::detail::overload_cast_impl; -PYBIND11_MODULE(aijack_cpp_core, m) { - m.doc() = R"pbdoc( +PYBIND11_MODULE(aijack_cpp_core, m) +{ + m.doc() = R"pbdoc( c++ backend for aijack )pbdoc"; - m.def("eps_gaussian", &eps_gaussian, R"pbdoc(eps_gaussian)pbdoc"); - - m.def("eps_laplace", &eps_laplace, R"pbdoc(eps_laplace)pbdoc"); - - m.def("eps_randresp", &eps_randresp, R"pbdoc(eps_randresp)pbdoc"); - - m.def( - "culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019", - &culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019, - R"pbdoc(culc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019)pbdoc"); - - m.def("culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism", - &culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism, - R"pbdoc(culc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism)pbdoc"); - - m.def("_ternary_search", &_ternary_search, R"pbdoc(_ternary_search)pbdoc"); - - m.def("_ternary_search_int", &_ternary_search_int, - R"pbdoc(_ternary_search_int)pbdoc"); - - m.def("_greedy_search", &_greedy_search, R"pbdoc(_greedy_search)pbdoc"); - - m.def("_greedy_search_frac", &_greedy_search_frac, - R"pbdoc(_greey_search_frac)pbdoc"); - - py::class_(m, "PaillierKeyGenerator") - .def(py::init()) - .def("generate_keypair", &PaillierKeyGenerator::generate_keypair); - - py::class_(m, "PaillierPublicKey") - .def("encrypt", &PaillierPublicKey::encrypt) - .def("encrypt", &PaillierPublicKey::encrypt) - .def("encrypt", &PaillierPublicKey::encrypt) - .def("encrypt", &PaillierPublicKey::encrypt) - .def("get_publickeyvalues", &PaillierPublicKey::get_publickeyvalues); - - py::class_(m, "PaillierCipherText") - .def("__add__", overload_cast_()(&PaillierCipherText::operator+)) - .def("__add__", overload_cast_()(&PaillierCipherText::operator+)) - .def("__add__", overload_cast_()(&PaillierCipherText::operator+)) - .def("__add__", overload_cast_()(&PaillierCipherText::operator+)) - .def("__add__", - overload_cast_()(&PaillierCipherText::operator+)) - .def("__mul__", overload_cast_()(&PaillierCipherText::operator*)) - .def("__mul__", overload_cast_()(&PaillierCipherText::operator*)) - .def("__mul__", overload_cast_()(&PaillierCipherText::operator*)) - .def("__mul__", overload_cast_()(&PaillierCipherText::operator*)) - .def("get_value", &PaillierCipherText::get_value); - - py::class_(m, "PaillierSecretKey") - .def("decrypt2int", &PaillierSecretKey::decrypt2int) - .def("decrypt2long", &PaillierSecretKey::decrypt2long) - .def("decrypt2float", &PaillierSecretKey::decrypt2float) - .def("decrypt2double", &PaillierSecretKey::decrypt2double) - .def("get_publickeyvalues", &PaillierSecretKey::get_publickeyvalues) - .def("get_secretkeyvalues", &PaillierSecretKey::get_secretkeyvalues); - - py::class_(m, "XGBoostParty") - .def(py::init>, int, vector, int, int, float, - int, bool, int>()) - .def("set_cost_constraint_map", &XGBoostParty::set_cost_constraint_map) - .def("get_lookup_table", &XGBoostParty::get_lookup_table); - - py::class_(m, "SecureBoostParty") - .def(py::init>, int, vector, int, int, float, - int, bool, int>()) - .def("get_lookup_table", &SecureBoostParty::get_lookup_table) - .def("set_publickey", &SecureBoostParty::set_publickey) - .def("set_secretkey", &SecureBoostParty::set_secretkey); - - py::class_(m, "XGBoostNode") - .def("get_idxs", &XGBoostNode::get_idxs) - .def("get_party_id", &XGBoostNode::get_party_id) - .def("get_record_id", &XGBoostNode::get_record_id) - .def("get_num_parties", &XGBoostNode::get_num_parties) - .def("get_val", &XGBoostNode::get_val) - .def("get_score", &XGBoostNode::get_score) - .def("get_left", &XGBoostNode::get_left) - .def("get_right", &XGBoostNode::get_right) - .def("is_leaf", &XGBoostNode::is_leaf); - - py::class_(m, "SecureBoostNode") - .def("get_idxs", &SecureBoostNode::get_idxs) - .def("get_party_id", &SecureBoostNode::get_party_id) - .def("get_record_id", &SecureBoostNode::get_record_id) - .def("get_val", &SecureBoostNode::get_val) - .def("get_score", &SecureBoostNode::get_score) - .def("get_left", &SecureBoostNode::get_left) - .def("get_right", &SecureBoostNode::get_right) - .def("is_leaf", &SecureBoostNode::is_leaf); - - py::class_(m, "XGBoostTree") - .def("get_root_xgboost_node", &XGBoostTree::get_root_xgboost_node) - .def("print", &XGBoostTree::print) - .def("predict", &XGBoostTree::predict); - - py::class_(m, "SecureBoostTree") - .def("print", &SecureBoostTree::print) - .def("predict", &SecureBoostTree::predict); - - py::class_(m, "XGBoostClassifier") - .def(py::init()) - .def("fit", &XGBoostClassifier::fit) - .def("get_init_pred", &XGBoostClassifier::get_init_pred) - .def("load_estimators", &XGBoostClassifier::load_estimators) - .def("get_estimators", &XGBoostClassifier::get_estimators) - .def("get_parties", &XGBoostClassifier::get_parties) - .def("predict_raw", &XGBoostClassifier::predict_raw) - .def("predict_proba", &XGBoostClassifier::predict_proba); - - py::class_(m, "SecureBoostClassifier") - .def(py::init()) - .def("fit", &SecureBoostClassifier::fit) - .def("get_init_pred", &SecureBoostClassifier::get_init_pred) - .def("load_estimators", &SecureBoostClassifier::load_estimators) - .def("get_estimators", &SecureBoostClassifier::get_estimators) - .def("get_parties", &SecureBoostClassifier::get_parties) - .def("predict_raw", &SecureBoostClassifier::predict_raw) - .def("predict_proba", &SecureBoostClassifier::predict_proba); - - py::class_(m, "DataFrame") - .def(py::init, map, int>()) - .def("insert_continuous", &DataFrame::insert_continuous) - .def("insert_categorical", &DataFrame::insert_categorical) - .def("insert_continuous_column", &DataFrame::insert_continuous_column) - .def("insert_categorical_column", &DataFrame::insert_categorical_column) - .def("get_data_continuous", &DataFrame::get_data_continuous) - .def("get_data_categorical", &DataFrame::get_data_categorical); - - py::class_(m, "Mondrian") - .def(py::init()) - .def("get_final_partitions", &Mondrian::get_final_partitions) - .def("anonymize", &Mondrian::anonymize); + m.def("eps_gaussian", &eps_gaussian, R"pbdoc(eps_gaussian)pbdoc"); + + m.def("eps_laplace", &eps_laplace, R"pbdoc(eps_laplace)pbdoc"); + + m.def("eps_randresp", &eps_randresp, R"pbdoc(eps_randresp)pbdoc"); + + m.def( + "calc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019", + &calc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019, + R"pbdoc(calc_tightupperbound_lowerbound_of_rdp_with_theorem6and8_of_zhu_2019)pbdoc"); + + m.def("calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism", + &calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism, + R"pbdoc(calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism)pbdoc"); + + m.def("_ternary_search", &_ternary_search, R"pbdoc(_ternary_search)pbdoc"); + + m.def("_ternary_search_int", &_ternary_search_int, + R"pbdoc(_ternary_search_int)pbdoc"); + + m.def("_greedy_search", &_greedy_search, R"pbdoc(_greedy_search)pbdoc"); + + m.def("_greedy_search_frac", &_greedy_search_frac, + R"pbdoc(_greey_search_frac)pbdoc"); + + py::class_(m, "PaillierKeyGenerator") + .def(py::init()) + .def("generate_keypair", &PaillierKeyGenerator::generate_keypair); + + py::class_(m, "PaillierPublicKey") + .def("encrypt", &PaillierPublicKey::encrypt) + .def("encrypt", &PaillierPublicKey::encrypt) + .def("encrypt", &PaillierPublicKey::encrypt) + .def("encrypt", &PaillierPublicKey::encrypt) + .def("get_publickeyvalues", &PaillierPublicKey::get_publickeyvalues); + + py::class_(m, "PaillierCipherText") + .def("__add__", overload_cast_()(&PaillierCipherText::operator+)) + .def("__add__", overload_cast_()(&PaillierCipherText::operator+)) + .def("__add__", overload_cast_()(&PaillierCipherText::operator+)) + .def("__add__", overload_cast_()(&PaillierCipherText::operator+)) + .def("__add__", + overload_cast_()(&PaillierCipherText::operator+)) + .def("__mul__", overload_cast_()(&PaillierCipherText::operator*)) + .def("__mul__", overload_cast_()(&PaillierCipherText::operator*)) + .def("__mul__", overload_cast_()(&PaillierCipherText::operator*)) + .def("__mul__", overload_cast_()(&PaillierCipherText::operator*)) + .def("get_value", &PaillierCipherText::get_value); + + py::class_(m, "PaillierSecretKey") + .def("decrypt2int", &PaillierSecretKey::decrypt2int) + .def("decrypt2long", &PaillierSecretKey::decrypt2long) + .def("decrypt2float", &PaillierSecretKey::decrypt2float) + .def("decrypt2double", &PaillierSecretKey::decrypt2double) + .def("get_publickeyvalues", &PaillierSecretKey::get_publickeyvalues) + .def("get_secretkeyvalues", &PaillierSecretKey::get_secretkeyvalues); + + py::class_(m, "XGBoostParty") + .def(py::init>, int, vector, int, int, float, + int, bool, int>()) + .def("set_cost_constraint_map", &XGBoostParty::set_cost_constraint_map) + .def("get_lookup_table", &XGBoostParty::get_lookup_table); + + py::class_(m, "SecureBoostParty") + .def(py::init>, int, vector, int, int, float, + int, bool, int>()) + .def("get_lookup_table", &SecureBoostParty::get_lookup_table) + .def("set_publickey", &SecureBoostParty::set_publickey) + .def("set_secretkey", &SecureBoostParty::set_secretkey); + + py::class_(m, "XGBoostNode") + .def("get_idxs", &XGBoostNode::get_idxs) + .def("get_party_id", &XGBoostNode::get_party_id) + .def("get_record_id", &XGBoostNode::get_record_id) + .def("get_num_parties", &XGBoostNode::get_num_parties) + .def("get_val", &XGBoostNode::get_val) + .def("get_score", &XGBoostNode::get_score) + .def("get_left", &XGBoostNode::get_left) + .def("get_right", &XGBoostNode::get_right) + .def("is_leaf", &XGBoostNode::is_leaf); + + py::class_(m, "SecureBoostNode") + .def("get_idxs", &SecureBoostNode::get_idxs) + .def("get_party_id", &SecureBoostNode::get_party_id) + .def("get_record_id", &SecureBoostNode::get_record_id) + .def("get_val", &SecureBoostNode::get_val) + .def("get_score", &SecureBoostNode::get_score) + .def("get_left", &SecureBoostNode::get_left) + .def("get_right", &SecureBoostNode::get_right) + .def("is_leaf", &SecureBoostNode::is_leaf); + + py::class_(m, "XGBoostTree") + .def("get_root_xgboost_node", &XGBoostTree::get_root_xgboost_node) + .def("print", &XGBoostTree::print) + .def("predict", &XGBoostTree::predict); + + py::class_(m, "SecureBoostTree") + .def("print", &SecureBoostTree::print) + .def("predict", &SecureBoostTree::predict); + + py::class_(m, "XGBoostClassifier") + .def(py::init()) + .def("fit", &XGBoostClassifier::fit) + .def("get_init_pred", &XGBoostClassifier::get_init_pred) + .def("load_estimators", &XGBoostClassifier::load_estimators) + .def("get_estimators", &XGBoostClassifier::get_estimators) + .def("get_parties", &XGBoostClassifier::get_parties) + .def("predict_raw", &XGBoostClassifier::predict_raw) + .def("predict_proba", &XGBoostClassifier::predict_proba); + + py::class_(m, "SecureBoostClassifier") + .def(py::init()) + .def("fit", &SecureBoostClassifier::fit) + .def("get_init_pred", &SecureBoostClassifier::get_init_pred) + .def("load_estimators", &SecureBoostClassifier::load_estimators) + .def("get_estimators", &SecureBoostClassifier::get_estimators) + .def("get_parties", &SecureBoostClassifier::get_parties) + .def("predict_raw", &SecureBoostClassifier::predict_raw) + .def("predict_proba", &SecureBoostClassifier::predict_proba); + + py::class_(m, "DataFrame") + .def(py::init, map, int>()) + .def("insert_continuous", &DataFrame::insert_continuous) + .def("insert_categorical", &DataFrame::insert_categorical) + .def("insert_continuous_column", &DataFrame::insert_continuous_column) + .def("insert_categorical_column", &DataFrame::insert_categorical_column) + .def("get_data_continuous", &DataFrame::get_data_continuous) + .def("get_data_categorical", &DataFrame::get_data_categorical); + + py::class_(m, "Mondrian") + .def(py::init()) + .def("get_final_partitions", &Mondrian::get_final_partitions) + .def("anonymize", &Mondrian::anonymize); #ifdef VERSION_INFO - m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); + m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); #else - m.attr("__version__") = "dev"; + m.attr("__version__") = "dev"; #endif } From f65f82bb062bf069b2bbd2fc4b7f0b8b20c71064 Mon Sep 17 00:00:00 2001 From: Hideaki Takahashi Date: Sun, 7 Apr 2024 17:11:57 +0900 Subject: [PATCH 2/2] Update doc (#177) * add the contribution guide * fix typos * update * solve callble issues * update docstrings --- docs/source/aijack.collaborative.fedexp.rst | 21 +++ docs/source/aijack.collaborative.moon.rst | 21 +++ docs/source/aijack.collaborative.rst | 2 + docs/source/conf.py | 2 +- src/aijack/attack/evasion/diva.py | 34 +++++ src/aijack/attack/evasion/evasion_attack.py | 22 +--- src/aijack/attack/evasion/fgsm.py | 31 +++++ src/aijack/attack/inversion/utils/distance.py | 20 +++ .../attack/inversion/utils/regularization.py | 35 +++++ src/aijack/attack/labelleakage/normattack.py | 12 ++ src/aijack/attack/poison/history.py | 10 ++ src/aijack/attack/poison/label_flip.py | 12 ++ src/aijack/attack/poison/mapf.py | 12 ++ src/aijack/defense/crobustness/pixeldp.py | 96 ++++++++++++++ src/aijack/defense/dp/manager/accountant.py | 124 ++++++++++++++++++ src/aijack/defense/dp/manager/adadps.py | 49 +++++++ src/aijack/defense/dp/manager/client.py | 24 ++++ src/aijack/defense/dp/manager/dp_manager.py | 52 ++++++++ src/aijack/defense/mid/nn.py | 53 ++++++++ src/aijack/defense/soteria/soteria_client.py | 14 ++ 20 files changed, 627 insertions(+), 19 deletions(-) create mode 100644 docs/source/aijack.collaborative.fedexp.rst create mode 100644 docs/source/aijack.collaborative.moon.rst diff --git a/docs/source/aijack.collaborative.fedexp.rst b/docs/source/aijack.collaborative.fedexp.rst new file mode 100644 index 00000000..4a709518 --- /dev/null +++ b/docs/source/aijack.collaborative.fedexp.rst @@ -0,0 +1,21 @@ +aijack.collaborative.fedexp package +=================================== + +Submodules +---------- + +aijack.collaborative.fedexp.server module +----------------------------------------- + +.. automodule:: aijack.collaborative.fedexp.server + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: aijack.collaborative.fedexp + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/aijack.collaborative.moon.rst b/docs/source/aijack.collaborative.moon.rst new file mode 100644 index 00000000..5109fb43 --- /dev/null +++ b/docs/source/aijack.collaborative.moon.rst @@ -0,0 +1,21 @@ +aijack.collaborative.moon package +================================= + +Submodules +---------- + +aijack.collaborative.moon.client module +--------------------------------------- + +.. automodule:: aijack.collaborative.moon.client + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: aijack.collaborative.moon + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/aijack.collaborative.rst b/docs/source/aijack.collaborative.rst index f94ccddd..ae229547 100644 --- a/docs/source/aijack.collaborative.rst +++ b/docs/source/aijack.collaborative.rst @@ -10,10 +10,12 @@ Subpackages aijack.collaborative.core aijack.collaborative.dsfl aijack.collaborative.fedavg + aijack.collaborative.fedexp aijack.collaborative.fedgems aijack.collaborative.fedkd aijack.collaborative.fedmd aijack.collaborative.fedprox + aijack.collaborative.moon aijack.collaborative.optimizer aijack.collaborative.splitnn aijack.collaborative.tree diff --git a/docs/source/conf.py b/docs/source/conf.py index b5317271..f57172ea 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -81,4 +81,4 @@ "style.css", ] -jupyter_execute_notebooks = "off" +nb_execution_mode = "off" diff --git a/src/aijack/attack/evasion/diva.py b/src/aijack/attack/evasion/diva.py index 0b4aab95..d741aba4 100644 --- a/src/aijack/attack/evasion/diva.py +++ b/src/aijack/attack/evasion/diva.py @@ -4,6 +4,30 @@ class DIVAWhiteBoxAttacker(BaseAttacker): + """Class implementing the DIVA white-box attack. + + This class provides functionality to perform the DIVA white-box attack on a target model. + + Args: + target_model (torch.nn.Module): The target model to be attacked. + target_model_on_edge (torch.nn.Module): The target model deployed on the edge. + c (float, optional): The trade-off parameter between origin and edge predictions. Defaults to 1.0. + num_itr (int, optional): The number of iterations for the attack. Defaults to 1000. + eps (float, optional): The maximum perturbation allowed. Defaults to 0.1. + lam (float, optional): The step size for gradient updates. Defaults to 0.01. + device (str, optional): The device to perform computation on. Defaults to "cpu". + + Attributes: + target_model (torch.nn.Module): The target model to be attacked. + target_model_on_edge (torch.nn.Module): The target model deployed on the edge. + c (float): The trade-off parameter between origin and edge predictions. + num_itr (int): The number of iterations for the attack. + eps (float): The maximum perturbation allowed. + lam (float): The step size for gradient updates. + device (str): The device to perform computation on. + + """ + def __init__( self, target_model, @@ -23,6 +47,16 @@ def __init__( self.device = device def attack(self, data): + """Performs the DIVA white-box attack on input data. + + Args: + data (tuple): A tuple containing input data and corresponding labels. + + Returns: + tuple: A tuple containing the adversarial examples and attack logs. + + """ + x, y = data x = x.to(self.device) y = y.to(self.device) diff --git a/src/aijack/attack/evasion/evasion_attack.py b/src/aijack/attack/evasion/evasion_attack.py index 85ec2bbf..0871c135 100644 --- a/src/aijack/attack/evasion/evasion_attack.py +++ b/src/aijack/attack/evasion/evasion_attack.py @@ -7,8 +7,7 @@ class Evasion_attack_sklearn(BaseAttacker): - """Creates an adversarial example against sklearn objects - reference https://arxiv.org/abs/1708.06131 + """Creates an adversarial example against sklearn objects based on https://arxiv.org/abs/1708.06131 Args: target_model (sklearn): sklearn classifier @@ -26,10 +25,8 @@ class Evasion_attack_sklearn(BaseAttacker): Attributes: target_model (sklearn): sklearn classifier - X_minus_1 (numpy.array): datasets that contains - only the class you want to misclasssify - dmax (float): max distance between the adversarial example - and initial one + X_minus_1 (numpy.array): datasets that contains only the class you want to misclasssify + dmax (float): max distance between the adversarial example and initial one max_iter (int): maxium number of iterations gamma (float): parameter gamma of svm (used for only svm) lam (float): trade - off parameter @@ -38,22 +35,11 @@ class Evasion_attack_sklearn(BaseAttacker): distance (str): type of distance such as L2 or L1 kde_type (str): type of kernel density estimator n_minus_1 (int): number of rows of X_minus_1 - delta_g (func): deviation of he discriminant function of a - surrogate classifier f learnt on D + delta_g (func): deviation of he discriminant function of a surrogate classifier f learnt on D Raises: ValueError: if given distance is not supported. - Examples: - >>>X_minus_1 = X_train[np.where(y_train == "3")] - >>>attacker = Attack_sklearn(target_model = target_model, - X_minus_1 = X_minus_1, - dmax = (5000 / 255) * 2.5, - max_iter = 300, - gamma = 1 / (X_train.shape[1] * - np.var(X_train)), - lam = 10, t = 0.5, h = 10) - >>>xm, log = attacker.attack(x0) """ def __init__( diff --git a/src/aijack/attack/evasion/fgsm.py b/src/aijack/attack/evasion/fgsm.py index aadfa80c..90228e27 100644 --- a/src/aijack/attack/evasion/fgsm.py +++ b/src/aijack/attack/evasion/fgsm.py @@ -4,6 +4,29 @@ class FGSMAttacker(BaseAttacker): + """Class implementing the Fast Gradient Sign Method (FGSM) attack. + + This class provides functionality to perform the FGSM attack on a target model. + + Args: + target_model (torch.nn.Module): The target model to be attacked. + criterion: The criterion to compute the loss. + eps (float, optional): The epsilon value for the FGSM attack. Defaults to 0.3. + grad_lower_bound (float, optional): The lower bound for the gradient. Defaults to -0.1. + grad_upper_bound (float, optional): The upper bound for the gradient. Defaults to 0.1. + output_lower_bound (float, optional): The lower bound for the output values. Defaults to -1.0. + output_upper_bound (float, optional): The upper bound for the output values. Defaults to 1.0. + + Attributes: + target_model (torch.nn.Module): The target model to be attacked. + criterion: The criterion to compute the loss. + eps (float): The epsilon value for the FGSM attack. + grad_lower_bound (float): The lower bound for the gradient. + grad_upper_bound (float): The upper bound for the gradient. + output_lower_bound (float): The lower bound for the output values. + output_upper_bound (float): The upper bound for the output values. + """ + def __init__( self, target_model, @@ -24,6 +47,14 @@ def __init__( self.output_upper_bound = output_upper_bound def attack(self, data): + """Performs the FGSM attack on input seed data. + + Args: + data (tuple): A tuple containing input seed data and corresponding labels. + + Returns: + torch.Tensor: The perturbed input data. + """ x, y = data x.requires_grad = True diff --git a/src/aijack/attack/inversion/utils/distance.py b/src/aijack/attack/inversion/utils/distance.py index f9ba77ce..61638bd2 100644 --- a/src/aijack/attack/inversion/utils/distance.py +++ b/src/aijack/attack/inversion/utils/distance.py @@ -1,4 +1,14 @@ def l2(fake_gradients, received_gradients, gradient_ignore_pos): + """Computes the L2 distance between fake and received gradients. + + Args: + fake_gradients (list of torch.Tensor): List of fake gradients. + received_gradients (list of torch.Tensor): List of received gradients. + gradient_ignore_pos (list of int): Positions to ignore while computing distance. + + Returns: + float: The L2 distance. + """ distance = 0 for i, (f_g, c_g) in enumerate(zip(fake_gradients, received_gradients)): if i not in gradient_ignore_pos: @@ -7,6 +17,16 @@ def l2(fake_gradients, received_gradients, gradient_ignore_pos): def cossim(fake_gradients, received_gradients, gradient_ignore_pos): + """Computes the cosine similarity distance between fake and received gradients. + + Args: + fake_gradients (list of torch.Tensor): List of fake gradients. + received_gradients (list of torch.Tensor): List of received gradients. + gradient_ignore_pos (list of int): Positions to ignore while computing distance. + + Returns: + float: The cosine similarity distance. + """ distance = 0 pnorm_0 = 0 pnorm_1 = 0 diff --git a/src/aijack/attack/inversion/utils/regularization.py b/src/aijack/attack/inversion/utils/regularization.py index 508aa711..c09bf7a1 100644 --- a/src/aijack/attack/inversion/utils/regularization.py +++ b/src/aijack/attack/inversion/utils/regularization.py @@ -2,23 +2,58 @@ def total_variance(x): + """Computes the total variance of an input tensor. + + Args: + x (torch.Tensor): The input tensor. + + Returns: + torch.Tensor: The total variance. + """ dx = torch.mean(torch.abs(x[:, :, :, :-1] - x[:, :, :, 1:])) dy = torch.mean(torch.abs(x[:, :, :-1, :] - x[:, :, 1:, :])) return dx + dy def label_matching(pred, label): + """Computes the label matching loss between predicted and target labels. + + Args: + pred (torch.Tensor): Predicted labels. + label (torch.Tensor): Target labels. + + Returns: + torch.Tensor: The label matching loss. + """ onehot_label = torch.eye(pred.shape[-1])[label] onehot_label = onehot_label.to(pred.device) return torch.sqrt(torch.sum((pred - onehot_label) ** 2)) def group_consistency(x, group_x): + """Computes the group consistency loss between an input and a group of inputs. + + Args: + x (torch.Tensor): The input tensor. + group_x (list): List of tensors representing the group. + + Returns: + torch.Tensor: The group consistency loss. + """ mean_group_x = sum(group_x) / len(group_x) return torch.norm(x - mean_group_x, p=2) def bn_regularizer(feature_maps, bn_layers): + """Computes the batch normalization regularizer loss. + + Args: + feature_maps (list): List of feature maps. + bn_layers (list): List of batch normalization layers. + + Returns: + torch.Tensor: The batch normalization regularizer loss. + """ bn_reg = 0 for i, layer in enumerate(bn_layers): fm = feature_maps[i] diff --git a/src/aijack/attack/labelleakage/normattack.py b/src/aijack/attack/labelleakage/normattack.py index c65d3c30..b69d232a 100644 --- a/src/aijack/attack/labelleakage/normattack.py +++ b/src/aijack/attack/labelleakage/normattack.py @@ -7,6 +7,18 @@ def attach_normattack_to_splitnn( cls, attack_criterion, target_client_index=0, device="cpu" ): + """Attaches a normalization attack to a SplitNN model. + + Args: + cls: The SplitNN model class. + attack_criterion: The criterion for the attack. + target_client_index (int, optional): Index of the target client. Defaults to 0. + device (str, optional): Device for computation. Defaults to "cpu". + + Returns: + class: A wrapper class with attached normalization attack. + """ + class NormAttackSplitNNWrapper(cls): def __init__(self, *args, **kwargs): super(NormAttackSplitNNWrapper, self).__init__(*args, **kwargs) diff --git a/src/aijack/attack/poison/history.py b/src/aijack/attack/poison/history.py index 64bcac04..ca1e1334 100644 --- a/src/aijack/attack/poison/history.py +++ b/src/aijack/attack/poison/history.py @@ -2,6 +2,16 @@ def attach_history_attack_to_client(cls, lam): + """Attaches a history attack to a client. + + Args: + cls: The client class. + lam (float): The lambda parameter for the attack. + + Returns: + class: A wrapper class with attached history attack. + """ + class HistoryAttackClientWrapper(cls): """Implementation of history attack proposed in https://arxiv.org/pdf/2203.08669.pdf""" diff --git a/src/aijack/attack/poison/label_flip.py b/src/aijack/attack/poison/label_flip.py index 6cffcebc..46f39f2d 100644 --- a/src/aijack/attack/poison/label_flip.py +++ b/src/aijack/attack/poison/label_flip.py @@ -8,6 +8,18 @@ def attach_label_flip_attack_to_client( cls, victim_label, target_label=None, class_num=None ): + """Attaches a label flip attack to a client. + + Args: + cls: The client class. + victim_label: The label to be replaced. + target_label: The label to replace the victim label with. If None, a random label will be chosen. + class_num: The number of classes. + + Returns: + class: A wrapper class with attached label flip attack. + """ + class LabelFlipAttackClientWrapper(cls): def __init__(self, *args, **kwargs): super(LabelFlipAttackClientWrapper, self).__init__(*args, **kwargs) diff --git a/src/aijack/attack/poison/mapf.py b/src/aijack/attack/poison/mapf.py index db3253fd..dcc04bb2 100644 --- a/src/aijack/attack/poison/mapf.py +++ b/src/aijack/attack/poison/mapf.py @@ -4,6 +4,18 @@ def attach_mapf_to_client(cls, lam, base_model_parameters=None): + """Attaches a MAPF attack to a client. + + Args: + cls: The client class. + lam (float): The lambda parameter for the attack. + base_model_parameters (list, optional): Base model parameters for parameter flipping. + If None, random parameters will be generated. Defaults to None. + + Returns: + class: A wrapper class with attached MAPF attack. + """ + class MAPFClientWrapper(cls): """Implementation of MAPF proposed in https://arxiv.org/pdf/2203.08669.pdf""" diff --git a/src/aijack/defense/crobustness/pixeldp.py b/src/aijack/defense/crobustness/pixeldp.py index 6a1f5183..cc816320 100644 --- a/src/aijack/defense/crobustness/pixeldp.py +++ b/src/aijack/defense/crobustness/pixeldp.py @@ -9,14 +9,46 @@ def clopper_pearson_interval(num_success, num_total, alpha): + """ + Calculate the Clopper-Pearson confidence interval. + + Args: + num_success (int): Number of successes. + num_total (int): Total number of trials. + alpha (float): Significance level. + + Returns: + tuple: Lower and upper bounds of the confidence interval. + """ return proportion_confint(num_success, num_total, alpha=2 * alpha, method="beta") def gaus_delta_term(delta): + """ + Calculate the Gaussian delta term. + + Args: + delta (float): Delta value. + + Returns: + float: Gaussian delta term. + """ return sqrt(2 * (log1c25 - log(delta))) def get_maximum_L_laplace(lower_bound, upper_bound, L, dp_eps): + """ + Calculate the maximum L value for Laplace mechanism. + + Args: + lower_bound (float): Lower bound of the confidence interval. + upper_bound (float): Upper bound of the confidence interval. + L (float): Sensitivity parameter. + dp_eps (float): Epsilon value for differential privacy. + + Returns: + float: Maximum L value. + """ if lower_bound <= upper_bound: return 0.0 return L * log(lower_bound / upper_bound) / (2 * dp_eps) @@ -33,6 +65,23 @@ def get_maximum_L_gaussian( eps_max=1.0, tolerance=0.001, ): + """ + Calculate the maximum L value for Gaussian mechanism. + + Args: + p_max_lb (float): Lower bound of the maximum probability. + p_sec_ub (float): Upper bound of the second maximum probability. + attack_size (float): Size of the attack. + dp_epsilon (float): Epsilon value for differential privacy. + dp_delta (float): Delta value for differential privacy. + delta_range (list, optional): Range of delta values. Defaults to None. + eps_min (float, optional): Minimum epsilon value. Defaults to 0.0. + eps_max (float, optional): Maximum epsilon value. Defaults to 1.0. + tolerance (float, optional): Tolerance for epsilon search. Defaults to 0.001. + + Returns: + float: Maximum L value. + """ # Based on the original implementation: # https://github.com/columbia/pixeldp/blob/master/models/utils/robustness.py if p_max_lb <= p_sec_ub: @@ -67,6 +116,20 @@ def get_maximum_L_gaussian( def get_certified_robustness_size_argmax(counts, eta, L, eps, delta, mode="gaussian"): + """ + Calculate the maximum certified robustness size. + + Args: + counts (torch.Tensor): Count of predictions. + eta (float): Eta value. + L (float): Sensitivity parameter. + eps (float): Epsilon value. + delta (float): Delta value. + mode (str, optional): Mode of calculation. Defaults to "gaussian". + + Returns: + float: Maximum certified robustness size. + """ total_counts = torch.sum(counts) sorted_counts, _ = torch.sort(counts) lb = clopper_pearson_interval(sorted_counts[-1], total_counts, eta)[0] @@ -98,6 +161,21 @@ def __init__( mode="laplace", sensitivity=1, ): + """ + Initialize the PixelDP module. + + Args: + model (torch.nn.Module): The model to be used. + num_classes (int): Number of classes. + L (float): Sensitivity parameter. + eps (float): Epsilon value. + delta (float): Delta value. + n_population_mc (int, optional): Number of samples for Monte Carlo. Defaults to 1000. + batch_size_mc (int, optional): Batch size for Monte Carlo. Defaults to 32. + eta (float, optional): Eta value. Defaults to 0.05. + mode (str, optional): Mode of operation. Defaults to "laplace". + sensitivity (float, optional): Sensitivity value. Defaults to 1. + """ super(PixelDP, self).__init__() self.model = model @@ -119,6 +197,15 @@ def __init__( raise ValueError(f"{mode} is not supported") def sample_noise(self, x): + """ + Sample noise for the given input. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: Sampled noise. + """ if self.mode == "laplace": return self.dist.sample(x.shape).to(x.device) else: @@ -154,6 +241,15 @@ def forward_eval(self, x): return preds, counts def certify(self, counts): + """ + Certify the robustness of the model. + + Args: + counts (torch.Tensor): Count of predictions. + + Returns: + float: Certified robustness size. + """ return get_certified_robustness_size_argmax( counts, self.eta, self.L, self.eps, self.delta, self.mode ) diff --git a/src/aijack/defense/dp/manager/accountant.py b/src/aijack/defense/dp/manager/accountant.py index f173b7eb..38a51944 100644 --- a/src/aijack/defense/dp/manager/accountant.py +++ b/src/aijack/defense/dp/manager/accountant.py @@ -17,6 +17,10 @@ class BaseMomentAccountant: + """ + Base class for computing the privacy budget using the Moments Accountant technique. + """ + def __init__( self, search="ternary", @@ -26,6 +30,17 @@ def __init__( orders=[], max_iterations=10000, ): + """ + Initialize the BaseMomentAccountant. + + Args: + search (str, optional): The search strategy. Defaults to "ternary". + order_min (int, optional): Minimum order. Defaults to 2. + order_max (int, optional): Maximum order. Defaults to 64. + precision (float, optional): Precision of the search. Defaults to 0.5. + orders (list, optional): List of orders. Defaults to []. + max_iterations (int, optional): Maximum number of iterations. Defaults to 10000. + """ self.order_min = order_min self.order_max = order_max self.orders = orders @@ -47,6 +62,18 @@ def __init__( self._cache = {} def calc_upperbound_of_rdp_onestep(self, alpha, noise_params, sampling_rate): + """ + Calculate the upper bound of Renyi Differential Privacy (RDP) for one step. + + Args: + alpha (float): Privacy parameter alpha. + noise_params (dict): Parameters of the noise distribution. + sampling_rate (float): Sampling rate. + + Returns: + float: Upper bound of RDP for one step. + """ + key = hash( f"{alpha}_{list(noise_params.keys())[0]}_{list(noise_params.values())[0]}_{sampling_rate}" ) @@ -65,6 +92,17 @@ def calc_upperbound_of_rdp_onestep(self, alpha, noise_params, sampling_rate): return self._cache[key] def _calc_upperbound_of_rdp(self, lam, steps_info): + """ + Calculate the upper bound of RDP. + + Args: + lam (float): Parameter lambda. + steps_info (list): Information about steps. + + Returns: + float: Upper bound of RDP. + """ + rdp = 0.0 for noise_params, sampling_rate, num_steps in steps_info: rdp += num_steps * self.calc_upperbound_of_rdp_onestep( @@ -73,12 +111,33 @@ def _calc_upperbound_of_rdp(self, lam, steps_info): return rdp def reset_step_info(self): + """Reset step information.""" self.steps_info = [] def add_step_info(self, noise_params, sampling_rate, num_steps): + """ + Add step information. + + Args: + noise_params (dict): Parameters of the noise distribution. + sampling_rate (float): Sampling rate. + num_steps (int): Number of steps. + """ self.steps_info.append((noise_params, sampling_rate, num_steps)) def step(self, noise_params, sampling_rate, num_steps): + """ + Decorator to add step information to a function. + + Args: + noise_params (dict): Parameters of the noise distribution. + sampling_rate (float): Sampling rate. + num_steps (int): Number of steps. + + Returns: + function: Decorated function. + """ + def _step(f): def _wrapper(*args, **keywords): result = f(*args, **keywords) @@ -100,6 +159,21 @@ def get_noise_multiplier( noise_multiplier_max=10, noise_multiplier_precision=0.01, ): + """Get noise multiplier. + + Args: + noise_multiplier_key (str): Key for noise multiplier. + target_epsilon (float): Target epsilon. + target_delta (float): Target delta. + sampling_rate (float): Sampling rate. + num_iterations (int): Number of iterations. + noise_multiplier_min (float, optional): Minimum noise multiplier. Defaults to 0. + noise_multiplier_max (float, optional): Maximum noise multiplier. Defaults to 10. + noise_multiplier_precision (float, optional): Precision of noise multiplier. Defaults to 0.01. + + Returns: + float: Noise multiplier. + """ eps = float("inf") while eps > target_epsilon: noise_multiplier_max = 2 * noise_multiplier_max @@ -133,6 +207,15 @@ def get_noise_multiplier( return noise_multiplier def get_delta(self, epsilon): + """ + Get delta. + + Args: + epsilon (float): Epsilon value. + + Returns: + float: Delta value. + """ optimal_lam = self.search( lambda order: (order - 1) * (self._calc_upperbound_of_rdp(order - 1, self.steps_info) - epsilon), @@ -151,6 +234,15 @@ def get_delta(self, epsilon): return min_delta def get_epsilon(self, delta): + """ + Get epsilon. + + Args: + delta (float): Delta value. + + Returns: + float: Epsilon value. + """ # log_inv_delta = math.log(1 / delta) def estimate_eps(order): @@ -175,6 +267,10 @@ def estimate_eps(order): class GeneralMomentAccountant(BaseMomentAccountant): + """ + Generalized class for computing the privacy budget using the Moments Accountant technique. + """ + def __init__( self, name="SGM", @@ -188,6 +284,21 @@ def __init__( max_iterations=10000, backend="cpp", ): + """ + Initialize the GeneralMomentAccountant. + + Args: + name (str, optional): Name of the accountant. Defaults to "SGM". + search (str, optional): The search strategy. Defaults to "ternary". + order_min (int, optional): Minimum order. Defaults to 2. + order_max (int, optional): Maximum order. Defaults to 64. + precision (float, optional): Precision of the search. Defaults to 0.5. + orders (list, optional): List of orders. Defaults to []. + noise_type (str, optional): Type of noise. Defaults to "Gaussian". + bound_type (str, optional): Type of bound. Defaults to "rdp_upperbound_closedformula". + max_iterations (int, optional): Maximum number of iterations. Defaults to 10000. + backend (str, optional): Backend for calculation. Defaults to "cpp". + """ super().__init__( search=search, order_min=order_min, @@ -201,12 +312,25 @@ def __init__( self._set_upperbound_func(backend, bound_type) def _set_noise_type(self, noise_type): + """ + Set the noise type. + + Args: + noise_type (str): Type of noise. + """ if noise_type == "Gaussian": self.eps_func = eps_gaussian elif noise_type == "Laplace": self.eps_func = eps_laplace def _set_upperbound_func(self, backend, bound_type): + """ + Set the upper bound function. + + Args: + backend (str): Backend for calculation. + bound_type (str): Type of bound. + """ if backend == "cpp" and bound_type == "rdp_upperbound_closedformula": self.calc_bound_of_rdp = ( calc_upperbound_of_rdp_with_Sampled_Gaussian_Mechanism diff --git a/src/aijack/defense/dp/manager/adadps.py b/src/aijack/defense/dp/manager/adadps.py index c3d2ab76..1cecb351 100644 --- a/src/aijack/defense/dp/manager/adadps.py +++ b/src/aijack/defense/dp/manager/adadps.py @@ -9,6 +9,12 @@ def _update_side_info_rmsprop(opt): + """ + Update side information for RMSprop optimizer. + + Args: + opt: Optimizer instance. + """ for group in opt.param_groups: for param, si in zip(group["params"], group["side_information"]): if param.requires_grad: @@ -16,6 +22,12 @@ def _update_side_info_rmsprop(opt): def _apply_side_infor_rmsprop(opt): + """ + Apply side information for RMSprop optimizer. + + Args: + opt: Optimizer instance. + """ for group in opt.param_groups: for param, si in zip(group["params"], group["side_information"]): if param.requires_grad: @@ -23,6 +35,12 @@ def _apply_side_infor_rmsprop(opt): def _update_side_info_adam(opt): + """ + Update side information for Adam optimizer. + + Args: + opt: Optimizer instance. + """ for group in opt.param_groups: for param, si, pm in zip( group["params"], group["side_information"], group["potential_momentum"] @@ -33,6 +51,12 @@ def _update_side_info_adam(opt): def _apply_side_infor_adam(opt): + """ + Apply side information for Adam optimizer. + + Args: + opt: Optimizer instance. + """ for group in opt.param_groups: for param, si, pm in zip( group["params"], group["side_information"], group["potential_momentum"] @@ -42,6 +66,12 @@ def _apply_side_infor_adam(opt): def _precondition_grads_with_side_info(opt): + """ + Precondition gradients with side information. + + Args: + opt: Optimizer instance. + """ if opt.mode == "rmsprop": _apply_side_infor_rmsprop(opt) elif opt.mode == "adam": @@ -60,6 +90,25 @@ def attach_adadps( beta=0.9, eps_to_avoid_nan=1e-8, ): + """ + Attach the AdaDPS optimizer to the given class. + + Args: + cls: Class to which AdaDPS optimizer will be attached. + accountant: Privacy accountant. + l2_norm_clip (float): L2 norm clip value. + noise_multiplier (float): Noise multiplier value. + lot_size (int): Lot size. + batch_size (int): Batch size. + dataset_size (int): Size of the dataset. + mode (str, optional): Mode of optimization. Defaults to "rmsprop". + beta (float, optional): Beta value. Defaults to 0.9. + eps_to_avoid_nan (float, optional): Epsilon value to avoid NaN. Defaults to 1e-8. + + Returns: + class: Class with AdaDPS optimizer attached. + """ + class AdaDPSWrapper(cls): """Implementation of AdaDPS proposed in `Private Adaptive Optimization with Side information` diff --git a/src/aijack/defense/dp/manager/client.py b/src/aijack/defense/dp/manager/client.py index 2ce9caeb..8bcc64a6 100644 --- a/src/aijack/defense/dp/manager/client.py +++ b/src/aijack/defense/dp/manager/client.py @@ -5,6 +5,17 @@ def attach_dpsgd_to_client(cls, privacy_manager, sigma): + """ + Attaches DPSGD (Differentially Private Stochastic Gradient Descent) functionality to the client class. + + Args: + cls: Client class to which DPSGD functionality will be attached. + privacy_manager: Privacy manager object providing DPSGD functionality. + sigma (float): Noise multiplier for privacy. + + Returns: + tuple: Tuple containing the DPSGDClientWrapper class and the privacy optimizer wrapper. + """ dpoptimizer_wrapper, lot_loader, batch_loader = privacy_manager.privatize(sigma) class DPSGDClientWrapper(cls): @@ -40,5 +51,18 @@ def local_train( class DPSGDClientManager(BaseManager): + """ + Manager class for attaching DPSGD to clients. + """ + def attach(self, cls): + """ + Attaches DPSGD to the client class. + + Args: + cls: Client class. + + Returns: + DPSGDClientWrapper: Wrapped client class with DPSGD functionality. + """ return attach_dpsgd_to_client(cls, *self.args, **self.kwargs) diff --git a/src/aijack/defense/dp/manager/dp_manager.py b/src/aijack/defense/dp/manager/dp_manager.py index dcaf6647..e7f29fe7 100644 --- a/src/aijack/defense/dp/manager/dp_manager.py +++ b/src/aijack/defense/dp/manager/dp_manager.py @@ -6,6 +6,22 @@ class DPSGDManager: + """ + Manager class for privatizing DPSGD (Differentially Private Stochastic Gradient Descent) optimization. + + Args: + accountant: Privacy accountant providing privacy guarantees. + optimizer_cls: Class of the optimizer to be privatized. + l2_norm_clip (float): L2 norm clip parameter for gradient clipping. + dataset: Dataset used for training. + lot_size (int): Size of the lot (local update). + batch_size (int): Size of the batch used for training. + iterations (int): Number of iterations. + smoothing (bool, optional): Whether to enable smoothing. Defaults to False. + smoothing_radius (float, optional): Smoothing radius. Defaults to 10.0. + + """ + def __init__( self, accountant, @@ -29,6 +45,16 @@ def __init__( self.smoothing_radius = smoothing_radius def privatize(self, noise_multiplier): + """ + Privatizes the optimizer. + + Args: + noise_multiplier (float): Noise multiplier for privacy. + + Returns: + tuple: Tuple containing the privatized optimizer class, lot loader function, and batch loader function. + """ + dpoptimizer_class = attach_dpoptimizer( self.optimizer_cls, self.accountant, @@ -57,6 +83,22 @@ def batch_loader(lot): class AdaDPSManager: + """ + Manager class for privatizing AdaDPS (Adaptive Differentially Private Stochastic Gradient Descent) optimization. + + Args: + accountant: Privacy accountant providing privacy guarantees. + optimizer_cls: Class of the optimizer to be privatized. + l2_norm_clip (float): L2 norm clip parameter for gradient clipping. + dataset: Dataset used for training. + lot_size (int): Size of the lot (local update). + batch_size (int): Size of the batch used for training. + iterations (int): Number of iterations. + mode (str, optional): Mode of optimization (rmsprop or adam). Defaults to "rmsprop". + beta (float, optional): Beta parameter for optimization. Defaults to 0.9. + eps_to_avoid_nan (float, optional): Epsilon parameter to avoid NaN during optimization. Defaults to 1e-8. + """ + def __init__( self, accountant, @@ -82,6 +124,16 @@ def __init__( self.eps_to_avoid_nan = eps_to_avoid_nan def privatize(self, noise_multiplier): + """ + Privatizes the optimizer. + + Args: + noise_multiplier (float): Noise multiplier for privacy. + + Returns: + tuple: Tuple containing the privatized optimizer class, lot loader function, and batch loader function. + """ + dpoptimizer_class = attach_adadps( self.optimizer_cls, self.accountant, diff --git a/src/aijack/defense/mid/nn.py b/src/aijack/defense/mid/nn.py index 077bff13..459a0df4 100644 --- a/src/aijack/defense/mid/nn.py +++ b/src/aijack/defense/mid/nn.py @@ -5,6 +5,17 @@ class VIB(nn.Module): + """ + Variational Information Bottleneck (VIB) module. + + Args: + encoder (torch.nn.Module): Encoder module. + decoder (torch.nn.Module): Decoder module. + dim_z (int, optional): Dimension of latent variable z. Defaults to 256. + num_samples (int, optional): Number of samples. Defaults to 10. + beta (float, optional): Beta value. Defaults to 1e-3. + """ + def __init__(self, encoder, decoder, dim_z=256, num_samples=10, beta=1e-3): super(VIB, self).__init__() self.dim_z = dim_z @@ -14,6 +25,17 @@ def __init__(self, encoder, decoder, dim_z=256, num_samples=10, beta=1e-3): self.beta = beta def get_params_of_p_z_given_x(self, x): + """ + Compute parameters of p(z|x). + + Args: + x (torch.Tensor): Input tensor. + + Returns: + tuple: Tuple containing mean and standard deviation of p(z|x). + Raises: + ValueError: If the output dimension of encoder is not 2 * dim_z. + """ encoder_output = self.encoder(x) if encoder_output.shape[1] != self.dim_z * 2: raise ValueError("the output dimension of encoder must be 2 * dim_z") @@ -22,12 +44,33 @@ def get_params_of_p_z_given_x(self, x): return mu, sigma def sampling_from_encoder(self, mu, sigma, batch_size): + """ + Sample from encoder distribution. + + Args: + mu (torch.Tensor): Mean of the distribution. + sigma (torch.Tensor): Standard deviation of the distribution. + batch_size (int): Batch size. + + Returns: + torch.Tensor: Sampled tensor from encoder distribution. + """ return mu + sigma * torch.normal( torch.zeros(self.num_samples, batch_size, self.dim_z), torch.ones(self.num_samples, batch_size, self.dim_z), ) def forward(self, x): + """ + Forward pass of the VIB module. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: Output tensor. + dict: Dictionary containing sampled outputs and parameters. + """ batch_size = x.size()[0] # encoder @@ -51,6 +94,16 @@ def forward(self, x): return outputs def loss(self, y, result_dict): + """ + Compute loss. + + Args: + y (torch.Tensor): Target tensor. + result_dict (dict): Dictionary containing sampled outputs and parameters. + + Returns: + torch.Tensor: Loss value. + """ sampled_y_pred = result_dict["sampled_decoded_outputs"] p_z_given_x_mu = result_dict["p_z_given_x_mu"] p_z_given_x_sigma = result_dict["p_z_given_x_sigma"] diff --git a/src/aijack/defense/soteria/soteria_client.py b/src/aijack/defense/soteria/soteria_client.py index a58ea6e0..a6ad1a53 100644 --- a/src/aijack/defense/soteria/soteria_client.py +++ b/src/aijack/defense/soteria/soteria_client.py @@ -10,6 +10,20 @@ def attach_soteria_to_client( epsilon=0.2, target_layer_name=None, ): + """ + Attaches the Soteria wrapper to the client class. + + Args: + cls: The client class to which Soteria will be attached. + input_layer (str): Name of the input layer. + perturbed_layer (str): Name of the perturbed layer. + epsilon (float, optional): Privacy budget epsilon. Defaults to 0.2. + target_layer_name (str, optional): Name of the target layer. Defaults to None. + + Returns: + class: Client class with Soteria wrapper attached. + """ + class SoteriaClientWrapper(cls): """Implementation of https://arxiv.org/pdf/2012.06043.pdf"""