From 1082339e70c5922cb382e60bfa25a5849da82f9b Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Fri, 19 Apr 2024 10:24:20 +0100 Subject: [PATCH 01/19] fix keccak ref1 remove spill --- .../keccak1600/amd64/ref1/keccakf1600.jinc | 5 +--- .../kyber/kyber768/amd64/ref/indcpa.jinc | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/common/keccak/keccak1600/amd64/ref1/keccakf1600.jinc b/src/common/keccak/keccak1600/amd64/ref1/keccakf1600.jinc index 85bbfd40..e261b30b 100644 --- a/src/common/keccak/keccak1600/amd64/ref1/keccakf1600.jinc +++ b/src/common/keccak/keccak1600/amd64/ref1/keccakf1600.jinc @@ -130,23 +130,20 @@ inline fn __round_ref1(reg ptr u64[25] e a, reg u64 rc) -> reg ptr u64[25] inline fn __keccakf1600_ref1(reg ptr u64[25] a) -> reg ptr u64[25] { reg ptr u64[24] RC; - stack ptr u64[24] s_RC; stack u64[25] s_e; reg ptr u64[25] e; + reg u64 c rc; RC = KECCAK1600_RC; - s_RC = RC; e = s_e; c = 0; while (c < KECCAK_ROUNDS - 1) { - RC = s_RC; rc = RC[(int) c]; e = __round_ref1(e, a, rc); - RC = s_RC; rc = RC[(int) c + 1]; a = __round_ref1(a, e, rc); diff --git a/src/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc b/src/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc index 5e0ac756..34c8982f 100644 --- a/src/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc +++ b/src/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc @@ -98,6 +98,9 @@ fn __indcpa_enc(stack u64 sctp, reg ptr u8[32] msgp, reg u64 pkp, reg ptr u8[KYB reg u64 ctp; reg u16 t; reg u8 nonce; + stack ptr u8[KYBER_SYMBYTES] noiseseed_s; + + noiseseed_s = noiseseed; pkpv = __polyvec_frombytes(pkp); @@ -116,20 +119,31 @@ fn __indcpa_enc(stack u64 sctp, reg ptr u8[32] msgp, reg u64 pkp, reg ptr u8[KYB aat = __gen_matrix(publicseed, 1); + noiseseed = noiseseed_s; nonce = 0; sp[0:KYBER_N] = _poly_getnoise(sp[0:KYBER_N], noiseseed, nonce); + + noiseseed = noiseseed_s; nonce = 1; sp[KYBER_N:KYBER_N] = _poly_getnoise(sp[KYBER_N:KYBER_N], noiseseed, nonce); + + noiseseed = noiseseed_s; nonce = 2; sp[2*KYBER_N:KYBER_N] = _poly_getnoise(sp[2*KYBER_N:KYBER_N], noiseseed, nonce); + noiseseed = noiseseed_s; nonce = 3; ep[0:KYBER_N] = _poly_getnoise(ep[0:KYBER_N], noiseseed, nonce); + + noiseseed = noiseseed_s; nonce = 4; ep[KYBER_N:KYBER_N] = _poly_getnoise(ep[KYBER_N:KYBER_N], noiseseed, nonce); + + noiseseed = noiseseed_s; nonce = 5; ep[2*KYBER_N:KYBER_N] = _poly_getnoise(ep[2*KYBER_N:KYBER_N], noiseseed, nonce); + noiseseed = noiseseed_s; nonce = 6; epp = _poly_getnoise(epp, noiseseed, nonce); @@ -167,6 +181,9 @@ fn __iindcpa_enc(reg ptr u8[KYBER_CT_LEN] ctp, reg ptr u8[32] msgp, reg u64 pkp, reg u16 t; reg u8 nonce; stack ptr u8[KYBER_CT_LEN] sctp; + stack ptr u8[KYBER_SYMBYTES] noiseseed_s; + + noiseseed_s = noiseseed; sctp = ctp; @@ -187,20 +204,31 @@ fn __iindcpa_enc(reg ptr u8[KYBER_CT_LEN] ctp, reg ptr u8[32] msgp, reg u64 pkp, aat = __gen_matrix(publicseed, 1); + noiseseed = noiseseed_s; nonce = 0; sp[0:KYBER_N] = _poly_getnoise(sp[0:KYBER_N], noiseseed, nonce); + + noiseseed = noiseseed_s; nonce = 1; sp[KYBER_N:KYBER_N] = _poly_getnoise(sp[KYBER_N:KYBER_N], noiseseed, nonce); + + noiseseed = noiseseed_s; nonce = 2; sp[2*KYBER_N:KYBER_N] = _poly_getnoise(sp[2*KYBER_N:KYBER_N], noiseseed, nonce); + noiseseed = noiseseed_s; nonce = 3; ep[0:KYBER_N] = _poly_getnoise(ep[0:KYBER_N], noiseseed, nonce); + + noiseseed = noiseseed_s; nonce = 4; ep[KYBER_N:KYBER_N] = _poly_getnoise(ep[KYBER_N:KYBER_N], noiseseed, nonce); + + noiseseed = noiseseed_s; nonce = 5; ep[2*KYBER_N:KYBER_N] = _poly_getnoise(ep[2*KYBER_N:KYBER_N], noiseseed, nonce); + noiseseed = noiseseed_s; nonce = 6; epp = _poly_getnoise(epp, noiseseed, nonce); From 7e65815ece4c5a0dcc993dfdb054d0a72763bb77 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Mon, 29 Apr 2024 08:53:55 +0100 Subject: [PATCH 02/19] libjade: update to dual license --- LICENSE | 122 +----------------------- LICENSES/Apache-2.0.txt | 202 ++++++++++++++++++++++++++++++++++++++++ LICENSES/CC0-1.0.txt | 121 ++++++++++++++++++++++++ 3 files changed, 324 insertions(+), 121 deletions(-) create mode 100644 LICENSES/Apache-2.0.txt create mode 100644 LICENSES/CC0-1.0.txt diff --git a/LICENSE b/LICENSE index 0e259d42..59ff3e16 100644 --- a/LICENSE +++ b/LICENSE @@ -1,121 +1 @@ -Creative Commons Legal Code - -CC0 1.0 Universal - - CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE - LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN - ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS - INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES - REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS - PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM - THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED - HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator -and subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for -the purpose of contributing to a commons of creative, cultural and -scientific works ("Commons") that the public can reliably and without fear -of later claims of infringement build upon, modify, incorporate in other -works, reuse and redistribute as freely as possible in any form whatsoever -and for any purposes, including without limitation commercial purposes. -These owners may contribute to the Commons to promote the ideal of a free -culture and the further production of creative, cultural and scientific -works, or to gain reputation or greater distribution for their Work in -part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any -expectation of additional consideration or compensation, the person -associating CC0 with a Work (the "Affirmer"), to the extent that he or she -is an owner of Copyright and Related Rights in the Work, voluntarily -elects to apply CC0 to the Work and publicly distribute the Work under its -terms, with knowledge of his or her Copyright and Related Rights in the -Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not -limited to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, - communicate, and translate a Work; - ii. moral rights retained by the original author(s) and/or performer(s); -iii. publicity and privacy rights pertaining to a person's image or - likeness depicted in a Work; - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - v. rights protecting the extraction, dissemination, use and reuse of data - in a Work; - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation - thereof, including any amended or successor version of such - directive); and -vii. other similar, equivalent or corresponding rights throughout the - world based on applicable law or treaty, and any national - implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention -of, applicable law, Affirmer hereby overtly, fully, permanently, -irrevocably and unconditionally waives, abandons, and surrenders all of -Affirmer's Copyright and Related Rights and associated claims and causes -of action, whether now known or unknown (including existing as well as -future claims and causes of action), in the Work (i) in all territories -worldwide, (ii) for the maximum duration provided by applicable law or -treaty (including future time extensions), (iii) in any current or future -medium and for any number of copies, and (iv) for any purpose whatsoever, -including without limitation commercial, advertising or promotional -purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each -member of the public at large and to the detriment of Affirmer's heirs and -successors, fully intending that such Waiver shall not be subject to -revocation, rescission, cancellation, termination, or any other legal or -equitable action to disrupt the quiet enjoyment of the Work by the public -as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason -be judged legally invalid or ineffective under applicable law, then the -Waiver shall be preserved to the maximum extent permitted taking into -account Affirmer's express Statement of Purpose. In addition, to the -extent the Waiver is so judged Affirmer hereby grants to each affected -person a royalty-free, non transferable, non sublicensable, non exclusive, -irrevocable and unconditional license to exercise Affirmer's Copyright and -Related Rights in the Work (i) in all territories worldwide, (ii) for the -maximum duration provided by applicable law or treaty (including future -time extensions), (iii) in any current or future medium and for any number -of copies, and (iv) for any purpose whatsoever, including without -limitation commercial, advertising or promotional purposes (the -"License"). The License shall be deemed effective as of the date CC0 was -applied by Affirmer to the Work. Should any part of the License for any -reason be judged legally invalid or ineffective under applicable law, such -partial invalidity or ineffectiveness shall not invalidate the remainder -of the License, and in such case Affirmer hereby affirms that he or she -will not (i) exercise any of his or her remaining Copyright and Related -Rights in the Work or (ii) assert any associated claims and causes of -action with respect to the Work, in either case contrary to Affirmer's -express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - b. Affirmer offers the Work as-is and makes no representations or - warranties of any kind concerning the Work, express, implied, - statutory or otherwise, including without limitation warranties of - title, merchantability, fitness for a particular purpose, non - infringement, or the absence of latent or other defects, accuracy, or - the present or absence of errors, whether or not discoverable, all to - the greatest extent permissible under applicable law. - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without - limitation any person's Copyright and Related Rights in the Work. - Further, Affirmer disclaims responsibility for obtaining any necessary - consents, permissions or other rights required for any use of the - Work. - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to - this CC0 or use of the Work. +SPDX-License-Identifier: CC0-1.0 OR Apache-2.0 diff --git a/LICENSES/Apache-2.0.txt b/LICENSES/Apache-2.0.txt new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/LICENSES/Apache-2.0.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSES/CC0-1.0.txt b/LICENSES/CC0-1.0.txt new file mode 100644 index 00000000..0e259d42 --- /dev/null +++ b/LICENSES/CC0-1.0.txt @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. From 70eb190a54e657d3c21f85afab0f57daf006ffee Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Tue, 25 Jun 2024 09:56:46 +0100 Subject: [PATCH 03/19] sct: towards jasmin-ct --- src/Makefile | 4 +++- src/Makefile.checksct | 9 +++++---- src/Makefile.common | 3 +++ 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Makefile b/src/Makefile index 1001829f..80e48e29 100644 --- a/src/Makefile +++ b/src/Makefile @@ -6,7 +6,9 @@ CC ?= clang CFLAGS ?= -O3 -Wall -Wextra -Wpedantic -Wvla -Werror -std=c99 \ -Wundef -Wshadow -Wcast-align -Wpointer-arith -Wmissing-prototypes \ -fstrict-aliasing -fno-common -pipe -JASMIN ?= jasminc + +JASMIN ?= jasminc +JASMIN_CT ?= jasmin-ct # -------------------------------------------------------------------- CI ?= 0 diff --git a/src/Makefile.checksct b/src/Makefile.checksct index 7a1bfc0c..d2a2ce09 100644 --- a/src/Makefile.checksct +++ b/src/Makefile.checksct @@ -5,10 +5,11 @@ ifneq ($(OP),) -SCT_FLAGS ?= +# TODO: remove --infer +SCT_FLAGS ?= --infer -CHECK_SCT_S = ($(JASMINC) -slice $* -checkSCT $(SCT_FLAGS) $< > $@ 2>&1) $(CIT) -CHECK_SCT = ($(JASMINC) -checkSCT $(SCT_FLAGS) $< > $@ 2>&1) $(CIT) +CHECK_SCT_SLICE = ($(JASMIN_CT) $(JINCLUDE) -slice $* --sct $(SCT_FLAGS) $< > $@ 2>&1) $(CIT) +CHECK_SCT = ($(JASMIN_CT) $(JINCLUDE) --sct $(SCT_FLAGS) $< > $@ 2>&1) $(CIT) SCT_TARGETS = $(addsuffix .sct, $(FUNCTIONS)) @@ -21,7 +22,7 @@ $(OP).sct : $(OP).jazz $(DEPS_DIR)/$(OP).sct.d | $(DEPS_DIR) $(CI_DIR) $(SCT_TARGETS): %.sct : $(OP).jazz $(DEPS_DIR)/%.sct.d | $(DEPS_DIR) $(CI_DIR) $(DEPS) - $(CHECK_SCT_S) + $(CHECK_SCT_SLICE) DEPFILES := \ $(DEPFILES) \ diff --git a/src/Makefile.common b/src/Makefile.common index ab28f62b..0606c32d 100644 --- a/src/Makefile.common +++ b/src/Makefile.common @@ -35,10 +35,13 @@ endif JEXT ?= jazz override JFLAGS += -noinsertarraycopy JINCLUDE = -I Jade:$(SRC) + JASMIN ?= jasminc JASMINC := $(JASMIN) $(JFLAGS) $(JINCLUDE) COMPILE = ($(JASMINC) -o $@ $<) $(CIT) +JASMIN_CT ?= jasmin-ct + # -------------------------------------------------------------------- include $(SRC)/$(OPERATION)/EcFlags.mk From 3a6c9b41a3248a2c6f2c1a6131c34de0d70b793b Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Wed, 26 Jun 2024 07:05:27 +0100 Subject: [PATCH 04/19] sct: towards jasmin-ct (2) --- src/Makefile | 7 +++---- src/Makefile.checksct | 24 +++++++++++++++++++----- src/Makefile.common | 2 -- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/Makefile b/src/Makefile index 80e48e29..27d7314d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -24,7 +24,7 @@ SRC := . FILTER ?= $(SRC)/crypto_% JAZZ ?= $(filter $(FILTER), $(filter-out $(addprefix ./,$(EXCLUDE)), $(sort $(dir $(shell find $(SRC) -name '*.jazz'))))) SAFETY ?= $(addsuffix safety, $(JAZZ)) -SCT ?= $(addsuffix sct, $(JAZZ)) +SCT ?= $(addsuffix check_sct, $(JAZZ)) SOURCES ?= $(filter-out ./, $(sort $(dir $(shell find $(SRC) -name 'Makefile')))) ASM := $(shell find $(SRC) -name '*.s') @@ -74,9 +74,8 @@ $(SAFETY): $(MAKE) -C $(@D) $(@F) || true # -------------------------------------------------------------------- - -.PHONY: sct -sct: $(SCT) +.PHONY: check_sct +check_sct: $(SCT) $(SCT): $(MAKE) -C $(@D) $(@F) || true diff --git a/src/Makefile.checksct b/src/Makefile.checksct index d2a2ce09..2168ccfb 100644 --- a/src/Makefile.checksct +++ b/src/Makefile.checksct @@ -3,27 +3,41 @@ # functions # - it is meant to be included by Makefile.common +# JASMIN_CT belongs here (and not Makefile.common): some options differ from jasminc +JASMIN_CT ?= jasmin-ct + ifneq ($(OP),) -# TODO: remove --infer +# TODO: remove --infer and annotate exported functions SCT_FLAGS ?= --infer -CHECK_SCT_SLICE = ($(JASMIN_CT) $(JINCLUDE) -slice $* --sct $(SCT_FLAGS) $< > $@ 2>&1) $(CIT) -CHECK_SCT = ($(JASMIN_CT) $(JINCLUDE) --sct $(SCT_FLAGS) $< > $@ 2>&1) $(CIT) +CHECK_SCT_SLICE = (JASMINPATH="Jade=$(SRC)" $(JASMIN_CT) --slice $* --sct $(SCT_FLAGS) $< > $@ 2>&1) $(CIT) +CHECK_SCT_SLICE_STDOUT = (JASMINPATH="Jade=$(SRC)" $(JASMIN_CT) --slice $* --sct $(SCT_FLAGS) $< ) $(CIT) + +CHECK_SCT = (JASMINPATH="Jade=$(SRC)" $(JASMIN_CT) --sct $(SCT_FLAGS) $< > $@ 2>&1) $(CIT) +CHECK_SCT_STDOUT = (JASMINPATH="Jade=$(SRC)" $(JASMIN_CT) --sct $(SCT_FLAGS) $< ) $(CIT) -SCT_TARGETS = $(addsuffix .sct, $(FUNCTIONS)) +SCT_TARGETS = $(addsuffix .sct, $(FUNCTIONS)) +SCT_TARGETS_STDOUT = $(addsuffix .stdout, $(SCT_TARGETS)) -sct: $(SCT_TARGETS) +check_sct: $(SCT_TARGETS) $(OP).sct : $(OP).jazz $(DEPS_DIR)/$(OP).sct.d | $(DEPS_DIR) $(CI_DIR) $(DEPS) $(CHECK_SCT) +$(OP).sct.stdout : $(OP).jazz | $(CI_DIR) + $(CHECK_SCT_STDOUT) + $(SCT_TARGETS): %.sct : $(OP).jazz $(DEPS_DIR)/%.sct.d | $(DEPS_DIR) $(CI_DIR) $(DEPS) $(CHECK_SCT_SLICE) +$(SCT_TARGETS_STDOUT): +%.sct.stdout : $(OP).jazz | $(CI_DIR) + $(CHECK_SCT_SLICE_STDOUT) + DEPFILES := \ $(DEPFILES) \ $(addprefix $(DEPS_DIR)/, $(addsuffix .sct.d, $(FUNCTIONS) $(OP))) diff --git a/src/Makefile.common b/src/Makefile.common index 0606c32d..9b1b272f 100644 --- a/src/Makefile.common +++ b/src/Makefile.common @@ -40,8 +40,6 @@ JASMIN ?= jasminc JASMINC := $(JASMIN) $(JFLAGS) $(JINCLUDE) COMPILE = ($(JASMINC) -o $@ $<) $(CIT) -JASMIN_CT ?= jasmin-ct - # -------------------------------------------------------------------- include $(SRC)/$(OPERATION)/EcFlags.mk From cf0920b16f51009e9a198b3874de0a50447890a3 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Wed, 26 Jun 2024 07:43:26 +0100 Subject: [PATCH 05/19] sct: crypto_hash/sha256/amd64/ref --- src/crypto_hash/sha256/amd64/ref/hash.jazz | 3 +++ src/crypto_hash/sha256/amd64/ref/sha256.jinc | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/crypto_hash/sha256/amd64/ref/hash.jazz b/src/crypto_hash/sha256/amd64/ref/hash.jazz index bed68245..2a04a350 100644 --- a/src/crypto_hash/sha256/amd64/ref/hash.jazz +++ b/src/crypto_hash/sha256/amd64/ref/hash.jazz @@ -3,6 +3,9 @@ require "sha256.jinc" export fn jade_hash_sha256_amd64_ref(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha256_ref(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha256/amd64/ref/sha256.jinc b/src/crypto_hash/sha256/amd64/ref/sha256.jinc index fa7497e4..c5a83de4 100644 --- a/src/crypto_hash/sha256/amd64/ref/sha256.jinc +++ b/src/crypto_hash/sha256/amd64/ref/sha256.jinc @@ -192,7 +192,7 @@ fn _blocks_0_ref(reg ptr u32[8] _H, reg u64 in inlen) -> reg ptr u32[8], reg u64 stack ptr u32[8] Hp; reg ptr u32[8] H; reg u64 tr; - stack u64 in_s; + #mmx reg u64 in_s; Kp = SHA256_K; Hp = _H; @@ -275,9 +275,9 @@ fn _blocks_1_ref(reg ptr u32[8] _H, reg ptr u32[32] sblocks, reg u64 nblocks) -> reg ptr u32[64] Kp; stack ptr u32[8] Hp; reg ptr u32[8] H; - stack ptr u32[32] s_sblocks; + #mmx reg ptr u32[32] s_sblocks; reg u64 i oblocks tr; - stack u64 s_i; + #mmx reg u64 s_i; Kp = SHA256_K; Hp = _H; @@ -395,7 +395,7 @@ inline fn __lastblocks_ref(reg u64 in inlen bits) -> stack u32[32], reg u64 inline fn __sha256_ref(reg u64 out in inlen) { reg u64 bits nblocks; - stack u64 s_out s_bits; + #mmx reg u64 s_out s_bits; stack u32[8] H; reg ptr u32[8] Hp; stack u32[32] sblocks; From 36658f181bbfc51d08c2d30ca73af96b743eda62 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Wed, 26 Jun 2024 07:45:17 +0100 Subject: [PATCH 06/19] sct: crypto_hash/sha512/amd64/ref --- src/crypto_hash/sha512/amd64/ref/hash.jazz | 3 +++ src/crypto_hash/sha512/amd64/ref/sha512.jinc | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/crypto_hash/sha512/amd64/ref/hash.jazz b/src/crypto_hash/sha512/amd64/ref/hash.jazz index 76212246..9990d2dd 100644 --- a/src/crypto_hash/sha512/amd64/ref/hash.jazz +++ b/src/crypto_hash/sha512/amd64/ref/hash.jazz @@ -3,6 +3,9 @@ require "sha512.jinc" export fn jade_hash_sha512_amd64_ref(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha512_ref(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha512/amd64/ref/sha512.jinc b/src/crypto_hash/sha512/amd64/ref/sha512.jinc index 184af39b..15c49814 100644 --- a/src/crypto_hash/sha512/amd64/ref/sha512.jinc +++ b/src/crypto_hash/sha512/amd64/ref/sha512.jinc @@ -192,7 +192,7 @@ fn _blocks_0_ref(reg ptr u64[8] _H, reg u64 in inlen) -> reg ptr u64[8], reg u64 stack ptr u64[8] Hp; reg ptr u64[8] H; reg u64 tr; - stack u64 in_s; + #mmx reg u64 in_s; Kp = SHA512_K; Hp = _H; @@ -275,9 +275,9 @@ fn _blocks_1_ref(reg ptr u64[8] _H, reg ptr u64[32] sblocks, reg u64 nblocks) -> reg ptr u64[80] Kp; stack ptr u64[8] Hp; reg ptr u64[8] H; - stack ptr u64[32] s_sblocks; + #mmx reg ptr u64[32] s_sblocks; reg u64 i oblocks tr; - stack u64 s_i; + #mmx reg u64 s_i; Kp = SHA512_K; Hp = _H; @@ -395,7 +395,7 @@ inline fn __lastblocks_ref(reg u64 in inlen bits) -> stack u64[32], reg u64 inline fn __sha512_ref(reg u64 out in inlen) { reg u64 bits nblocks; - stack u64 s_out s_bits; + #mmx reg u64 s_out s_bits; stack u64[8] H; reg ptr u64[8] Hp; stack u64[32] sblocks; From 9268d1ca24059bf0d41fb69401aa728d7bb80165 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Wed, 26 Jun 2024 08:15:47 +0100 Subject: [PATCH 07/19] sct: crypto_hash sha3-* ref1 --- .../keccak/keccak1600/amd64/ref1/keccak1600.jinc | 12 ++++++------ src/crypto_hash/sha3-224/amd64/ref1/hash.jazz | 3 +++ src/crypto_hash/sha3-256/amd64/ref1/hash.jazz | 3 +++ src/crypto_hash/sha3-384/amd64/ref1/hash.jazz | 3 +++ src/crypto_hash/sha3-512/amd64/ref1/hash.jazz | 3 +++ 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/common/keccak/keccak1600/amd64/ref1/keccak1600.jinc b/src/common/keccak/keccak1600/amd64/ref1/keccak1600.jinc index c6dcf710..6d18b83e 100644 --- a/src/common/keccak/keccak1600/amd64/ref1/keccak1600.jinc +++ b/src/common/keccak/keccak1600/amd64/ref1/keccak1600.jinc @@ -87,7 +87,7 @@ inline fn __absorb_ref1( reg u64 rate // rate already in bytes -- it is returned bc of spills ) -> reg ptr u64[25], reg u64 { - stack u64 s_in s_inlen s_rate; + #mmx reg u64 s_in s_inlen s_rate; reg u8 trail_byte; // intermediate blocks @@ -173,13 +173,13 @@ inline fn __xtr_bytes_ref1( inline fn __squeeze_ref1( reg mut ptr u64[25] state, - stack u64 s_out, - reg u64 outlen, - reg u64 rate + #mmx reg u64 s_out, + reg u64 outlen, + reg u64 rate ) { reg u64 out; - stack u64 s_outlen s_rate; + #mmx reg u64 s_outlen s_rate; // intermediate blocks while ( outlen > rate ) @@ -212,7 +212,7 @@ inline fn __keccak1600_ref1(reg u64 out outlen in inlen, reg u8 trail_byte, reg { stack u64[25] _state; reg ptr u64[25] state; - stack u64 s_out s_outlen; + #mmx reg u64 s_out s_outlen; stack u8 s_trail_byte; s_out = out; diff --git a/src/crypto_hash/sha3-224/amd64/ref1/hash.jazz b/src/crypto_hash/sha3-224/amd64/ref1/hash.jazz index df9387c2..6411abba 100644 --- a/src/crypto_hash/sha3-224/amd64/ref1/hash.jazz +++ b/src/crypto_hash/sha3-224/amd64/ref1/hash.jazz @@ -3,6 +3,9 @@ require "sha3-224.jinc" export fn jade_hash_sha3_224_amd64_ref1(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_224_ref1(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-256/amd64/ref1/hash.jazz b/src/crypto_hash/sha3-256/amd64/ref1/hash.jazz index e8a10bf8..0538261b 100644 --- a/src/crypto_hash/sha3-256/amd64/ref1/hash.jazz +++ b/src/crypto_hash/sha3-256/amd64/ref1/hash.jazz @@ -3,6 +3,9 @@ require "sha3-256.jinc" export fn jade_hash_sha3_256_amd64_ref1(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_256_ref1(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-384/amd64/ref1/hash.jazz b/src/crypto_hash/sha3-384/amd64/ref1/hash.jazz index 166e9a76..53a0e0cd 100644 --- a/src/crypto_hash/sha3-384/amd64/ref1/hash.jazz +++ b/src/crypto_hash/sha3-384/amd64/ref1/hash.jazz @@ -3,6 +3,9 @@ require "sha3-384.jinc" export fn jade_hash_sha3_384_amd64_ref1(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_384_ref1(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-512/amd64/ref1/hash.jazz b/src/crypto_hash/sha3-512/amd64/ref1/hash.jazz index 453a96a8..26126522 100644 --- a/src/crypto_hash/sha3-512/amd64/ref1/hash.jazz +++ b/src/crypto_hash/sha3-512/amd64/ref1/hash.jazz @@ -3,6 +3,9 @@ require "sha3-512.jinc" export fn jade_hash_sha3_512_amd64_ref1(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_512_ref1(hash, input, input_length); ?{}, r = #set0(); return r; From b639b734557358cc5e5e81388c9b608a6c7ad926 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Wed, 26 Jun 2024 08:20:38 +0100 Subject: [PATCH 08/19] sct: crypto_hash sha3-* avx2 --- .../keccak1600/amd64/avx2/keccak1600.jinc | 119 +++++++++++++----- .../keccak1600/amd64/avx2/keccakf1600.jinc | 8 +- src/crypto_hash/sha3-224/amd64/avx2/hash.jazz | 7 +- .../sha3-224/amd64/avx2/sha3-224.jinc | 4 +- src/crypto_hash/sha3-256/amd64/avx2/hash.jazz | 7 +- .../sha3-256/amd64/avx2/sha3-256.jinc | 4 +- src/crypto_hash/sha3-384/amd64/avx2/hash.jazz | 7 +- .../sha3-384/amd64/avx2/sha3-384.jinc | 4 +- src/crypto_hash/sha3-512/amd64/avx2/hash.jazz | 7 +- .../sha3-512/amd64/avx2/sha3-512.jinc | 4 +- 10 files changed, 122 insertions(+), 49 deletions(-) diff --git a/src/common/keccak/keccak1600/amd64/avx2/keccak1600.jinc b/src/common/keccak/keccak1600/amd64/avx2/keccak1600.jinc index 4403e5bb..3cbacb8a 100644 --- a/src/common/keccak/keccak1600/amd64/avx2/keccak1600.jinc +++ b/src/common/keccak/keccak1600/amd64/avx2/keccak1600.jinc @@ -33,23 +33,32 @@ inline fn __add_full_block_avx2( stack u64[28] s_state, reg ptr u64[25] a_jagged_p, reg u64 in inlen, - reg u64 rate -) -> reg u256[7], stack u64[28], reg u64, reg u64 + reg u64 rate, + #msf reg u64 ms +) -> reg u256[7], stack u64[28], reg u64, reg u64, #msf reg u64 { inline int i; reg u64 j l t rate8; + reg bool loop_condition; rate8 = rate; rate8 >>= 3; j = 0; - while ( j < rate8 ) + while { loop_condition = ( j < rate8 ); } ( loop_condition ) { + ms = #update_msf(loop_condition, ms); + t = [in + 8*j]; + l = a_jagged_p[(int) j]; + l = #protect(l, ms); + s_state[(int) l] = t; j += 1; + } + ms = #update_msf(!loop_condition, ms); //TODO: check & change to #VPBROADCAST_4u64 t = s_state[0]; @@ -63,7 +72,7 @@ inline fn __add_full_block_avx2( in += rate; inlen -= rate; - return state, s_state, in, inlen; + return state, s_state, in, inlen, ms; } @@ -74,42 +83,56 @@ inline fn __add_final_block_avx2( reg ptr u64[25] a_jagged_p, reg u64 in inlen, reg u8 trail_byte, - reg u64 rate -) -> reg u256[7] + reg u64 rate, + #msf reg u64 ms +) -> reg u256[7], #msf reg u64 { inline int i; reg u64 j l t inlen8; reg u8 c; + reg bool loop_condition; s_state = __init_s_state_avx2(); inlen8 = inlen; inlen8 >>= 3; j = 0; - while ( j < inlen8 ) + + while { loop_condition = (j < inlen8); } ( loop_condition ) { + ms = #update_msf(loop_condition, ms); + t = [in + 8*j]; l = a_jagged_p[(int) j]; + l = #protect(l, ms); + s_state[(int) l] = t; j += 1; } + ms = #update_msf(!loop_condition, ms); + l = a_jagged_p[(int) j]; + l = #protect(l, ms); + l <<= 3; j <<= 3; - while ( j < inlen ) + while { loop_condition = ( j < inlen ); } ( loop_condition ) { + ms = #update_msf(loop_condition, ms); c = (u8)[in + j]; s_state[u8 (int) l] = c; j += 1; l += 1; } + ms = #update_msf(!loop_condition, ms); s_state[u8 (int) l] = trail_byte; // j = (rate-1) >> 3; j = rate; j -= 1; j >>= 3; l = a_jagged_p[(int) j]; + l = #protect(l, ms); l <<= 3; // l += ((rate-1) & 0x7) j = rate; j -= 1; j &= 0x7; @@ -125,7 +148,7 @@ inline fn __add_final_block_avx2( for i = 0 to 7 { state[i] ^= s_state[u256 i]; } - return state; + return state, ms; } @@ -134,12 +157,14 @@ inline fn __xtr_full_block_avx2( reg u256[7] state, reg ptr u64[25] a_jagged_p, reg u64 out, - reg u64 len -) -> reg u64 + reg u64 len, + #msf reg u64 ms +) -> reg u64, #msf reg u64 { inline int i; stack u64[28] s_state; reg u64 j l t len8; + reg bool loop_condition; for i = 0 to 7 { s_state[u256 i] = state[i]; } @@ -147,17 +172,22 @@ inline fn __xtr_full_block_avx2( len8 = len; len8 >>= 3; j = 0; - while ( j < len8 ) + while { loop_condition = ( j < len8 ); } ( loop_condition ) { + ms = #update_msf(loop_condition, ms); + l = a_jagged_p[(int) j]; + l = #protect(l, ms); + t = s_state[(int) l]; [out + 8*j] = t; j += 1; } + ms = #update_msf(!loop_condition, ms); out += len; - return out; + return out, ms; } @@ -166,13 +196,15 @@ inline fn __xtr_bytes_avx2( reg u256[7] state, reg ptr u64[25] a_jagged_p, reg u64 out, - reg u64 len + reg u64 len, + #msf reg u64 ms ) -> reg u64 { inline int i; stack u64[28] s_state; reg u64 j l t len8; reg u8 c; + reg bool loop_condition; for i = 0 to 7 { s_state[u256 i] = state[i]; } @@ -180,13 +212,22 @@ inline fn __xtr_bytes_avx2( len8 = len; len8 >>= 3; j = 0; - while ( j < len8 ) - { l = a_jagged_p[(int) j]; + while { loop_condition = ( j < len8 ); } ( loop_condition ) + { + ms = #update_msf(loop_condition, ms); + + l = a_jagged_p[(int) j]; + l = #protect(l, ms); + t = s_state[(int) l]; [out + 8*j] = t; j += 1; } + ms = #update_msf(!loop_condition, ms); + l = a_jagged_p[(int)j]; + l = #protect(l, ms); + j <<= 3; l <<= 3; @@ -208,65 +249,75 @@ inline fn __absorb_avx2( reg u256[7] state, reg u64 in inlen, reg u8 trail_byte, - reg u64 rate -) -> reg u256[7] + reg u64 rate, + #msf reg u64 ms +) -> reg u256[7], #msf reg u64 { stack u64[28] s_state; reg ptr u64[25] a_jagged_p; + reg bool loop_condition; a_jagged_p = KECCAK_A_JAGGED; s_state = __init_s_state_avx2(); // intermediate blocks - while ( inlen >= rate ) + while { loop_condition = (inlen >= rate); } (loop_condition) { - state, s_state, in, inlen = __add_full_block_avx2(state, s_state, a_jagged_p, in, inlen, rate); - state = __keccakf1600_avx2(state); + ms = #update_msf(loop_condition, ms); + + state, s_state, in, inlen, ms = __add_full_block_avx2(state, s_state, a_jagged_p, in, inlen, rate, ms); + + state, ms = __keccakf1600_avx2(state, ms); } + ms = #update_msf(!loop_condition, ms); // final block - state = __add_final_block_avx2(state, s_state, a_jagged_p, in, inlen, trail_byte, rate); + state, ms = __add_final_block_avx2(state, s_state, a_jagged_p, in, inlen, trail_byte, rate, ms); - return state; + return state, ms; } -inline fn __squeeze_avx2(reg u256[7] state, reg u64 out outlen rate) +inline fn __squeeze_avx2(reg u256[7] state, reg u64 out outlen rate, #msf reg u64 ms) { reg ptr u64[25] a_jagged_p; + reg bool loop_condition; a_jagged_p = KECCAK_A_JAGGED; // intermediate blocks - while ( outlen > rate ) + while { loop_condition = (outlen > rate); } ( loop_condition ) { - state = __keccakf1600_avx2(state); - out = __xtr_full_block_avx2(state, a_jagged_p, out, rate); + ms = #update_msf(loop_condition, ms); + + state, ms = __keccakf1600_avx2(state, ms); + out, ms = __xtr_full_block_avx2(state, a_jagged_p, out, rate, ms); outlen -= rate; } + ms = #update_msf(!loop_condition, ms); - state = __keccakf1600_avx2(state); - out = __xtr_bytes_avx2(state, a_jagged_p, out, outlen); + state, ms = __keccakf1600_avx2(state, ms); + out = __xtr_bytes_avx2(state, a_jagged_p, out, outlen, ms); } -inline fn __keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate) +inline fn __keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate, #msf reg u64 ms) { reg u256[7] state; state = __keccak_init_avx2(); // absorb - state = __absorb_avx2(state, in, inlen, trail_byte, rate); + state, ms = __absorb_avx2(state, in, inlen, trail_byte, rate, ms); // squeeze - __squeeze_avx2(state, out, outlen, rate); + __squeeze_avx2(state, out, outlen, rate, ms); } -fn _keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate) +fn _keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate, #msf reg u64 ms) { - __keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate); + __keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate, ms); } diff --git a/src/common/keccak/keccak1600/amd64/avx2/keccakf1600.jinc b/src/common/keccak/keccak1600/amd64/avx2/keccakf1600.jinc index 6ca9dda6..907981ee 100644 --- a/src/common/keccak/keccak1600/amd64/avx2/keccakf1600.jinc +++ b/src/common/keccak/keccak1600/amd64/avx2/keccakf1600.jinc @@ -59,7 +59,7 @@ u64[25] KECCAK_A_JAGGED = }; -inline fn __keccakf1600_avx2(reg u256[7] state) -> reg u256[7] +inline fn __keccakf1600_avx2(reg u256[7] state, #msf reg u64 ms) -> reg u256[7], #msf reg u64 { reg u256[9] t; reg u256 c00 c14 d00 d14; @@ -194,9 +194,11 @@ inline fn __keccakf1600_avx2(reg u256[7] state) -> reg u256[7] iotas_o += 32; _,_,_,zf,r = #DEC_64(r); - }(!zf) + }(!zf) { ms = #update_msf(!zf, ms); } - return state; + ms = #update_msf(zf, ms); + + return state, ms; } diff --git a/src/crypto_hash/sha3-224/amd64/avx2/hash.jazz b/src/crypto_hash/sha3-224/amd64/avx2/hash.jazz index 77ae780a..97d4822a 100644 --- a/src/crypto_hash/sha3-224/amd64/avx2/hash.jazz +++ b/src/crypto_hash/sha3-224/amd64/avx2/hash.jazz @@ -3,7 +3,12 @@ require "sha3-224.jinc" export fn jade_hash_sha3_224_amd64_avx2(reg u64 hash input input_length) -> reg u64 { reg u64 r; - __sha3_224_avx2(hash, input, input_length); + #msf reg u64 ms; + + ms = #init_msf(); + + __sha3_224_avx2(hash, input, input_length, ms); + ?{}, r = #set0(); return r; } diff --git a/src/crypto_hash/sha3-224/amd64/avx2/sha3-224.jinc b/src/crypto_hash/sha3-224/amd64/avx2/sha3-224.jinc index 10f0d31b..42e20d46 100644 --- a/src/crypto_hash/sha3-224/amd64/avx2/sha3-224.jinc +++ b/src/crypto_hash/sha3-224/amd64/avx2/sha3-224.jinc @@ -1,6 +1,6 @@ from Jade require "common/keccak/keccak1600/amd64/avx2/keccak1600.jinc" -inline fn __sha3_224_avx2(reg u64 out in inlen) +inline fn __sha3_224_avx2(reg u64 out in inlen, #msf reg u64 ms) { reg u64 outlen rate; reg u8 trail_byte; @@ -9,7 +9,7 @@ inline fn __sha3_224_avx2(reg u64 out in inlen) trail_byte = 0x6; rate = (1152/8); - _keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate); + _keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate, ms); } diff --git a/src/crypto_hash/sha3-256/amd64/avx2/hash.jazz b/src/crypto_hash/sha3-256/amd64/avx2/hash.jazz index 462c1c0b..88f6b8ff 100644 --- a/src/crypto_hash/sha3-256/amd64/avx2/hash.jazz +++ b/src/crypto_hash/sha3-256/amd64/avx2/hash.jazz @@ -3,7 +3,12 @@ require "sha3-256.jinc" export fn jade_hash_sha3_256_amd64_avx2(reg u64 hash input input_length) -> reg u64 { reg u64 r; - __sha3_256_avx2(hash, input, input_length); + #msf reg u64 ms; + + ms = #init_msf(); + + __sha3_256_avx2(hash, input, input_length, ms); + ?{}, r = #set0(); return r; } diff --git a/src/crypto_hash/sha3-256/amd64/avx2/sha3-256.jinc b/src/crypto_hash/sha3-256/amd64/avx2/sha3-256.jinc index ee575bb5..6a808935 100644 --- a/src/crypto_hash/sha3-256/amd64/avx2/sha3-256.jinc +++ b/src/crypto_hash/sha3-256/amd64/avx2/sha3-256.jinc @@ -1,6 +1,6 @@ from Jade require "common/keccak/keccak1600/amd64/avx2/keccak1600.jinc" -inline fn __sha3_256_avx2(reg u64 out in inlen) +inline fn __sha3_256_avx2(reg u64 out in inlen, #msf reg u64 ms) { reg u64 outlen rate; reg u8 trail_byte; @@ -9,7 +9,7 @@ inline fn __sha3_256_avx2(reg u64 out in inlen) trail_byte = 0x6; rate = (1088/8); - _keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate); + _keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate, ms); } diff --git a/src/crypto_hash/sha3-384/amd64/avx2/hash.jazz b/src/crypto_hash/sha3-384/amd64/avx2/hash.jazz index 0be82db3..75e61f6c 100644 --- a/src/crypto_hash/sha3-384/amd64/avx2/hash.jazz +++ b/src/crypto_hash/sha3-384/amd64/avx2/hash.jazz @@ -3,7 +3,12 @@ require "sha3-384.jinc" export fn jade_hash_sha3_384_amd64_avx2(reg u64 hash input input_length) -> reg u64 { reg u64 r; - __sha3_384_avx2(hash, input, input_length); + #msf reg u64 ms; + + ms = #init_msf(); + + __sha3_384_avx2(hash, input, input_length, ms); + ?{}, r = #set0(); return r; } diff --git a/src/crypto_hash/sha3-384/amd64/avx2/sha3-384.jinc b/src/crypto_hash/sha3-384/amd64/avx2/sha3-384.jinc index db29845f..4737c251 100644 --- a/src/crypto_hash/sha3-384/amd64/avx2/sha3-384.jinc +++ b/src/crypto_hash/sha3-384/amd64/avx2/sha3-384.jinc @@ -1,6 +1,6 @@ from Jade require "common/keccak/keccak1600/amd64/avx2/keccak1600.jinc" -inline fn __sha3_384_avx2(reg u64 out in inlen) +inline fn __sha3_384_avx2(reg u64 out in inlen, #msf reg u64 ms) { reg u64 outlen rate; reg u8 trail_byte; @@ -9,7 +9,7 @@ inline fn __sha3_384_avx2(reg u64 out in inlen) trail_byte = 0x6; rate = (832/8); - _keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate); + _keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate, ms); } diff --git a/src/crypto_hash/sha3-512/amd64/avx2/hash.jazz b/src/crypto_hash/sha3-512/amd64/avx2/hash.jazz index 49335d0d..50070315 100644 --- a/src/crypto_hash/sha3-512/amd64/avx2/hash.jazz +++ b/src/crypto_hash/sha3-512/amd64/avx2/hash.jazz @@ -3,7 +3,12 @@ require "sha3-512.jinc" export fn jade_hash_sha3_512_amd64_avx2(reg u64 hash input input_length) -> reg u64 { reg u64 r; - __sha3_512_avx2(hash, input, input_length); + #msf reg u64 ms; + + ms = #init_msf(); + + __sha3_512_avx2(hash, input, input_length, ms); + ?{}, r = #set0(); return r; } diff --git a/src/crypto_hash/sha3-512/amd64/avx2/sha3-512.jinc b/src/crypto_hash/sha3-512/amd64/avx2/sha3-512.jinc index 17ce4c24..0a9da967 100644 --- a/src/crypto_hash/sha3-512/amd64/avx2/sha3-512.jinc +++ b/src/crypto_hash/sha3-512/amd64/avx2/sha3-512.jinc @@ -1,6 +1,6 @@ from Jade require "common/keccak/keccak1600/amd64/avx2/keccak1600.jinc" -inline fn __sha3_512_avx2(reg u64 out in inlen) +inline fn __sha3_512_avx2(reg u64 out in inlen, #msf reg u64 ms) { reg u64 outlen rate; reg u8 trail_byte; @@ -9,7 +9,7 @@ inline fn __sha3_512_avx2(reg u64 out in inlen) trail_byte = 0x6; rate = (576/8); - _keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate); + _keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate, ms); } From 43fd681a17c3bd28c40f5a24bdd78a47d025e241 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Wed, 26 Jun 2024 08:34:56 +0100 Subject: [PATCH 09/19] sct: crypto_hash sha3-* ref and bmi1 --- .../keccak/keccak1600/amd64/bmi1/keccak1600.jinc | 12 ++++++------ .../keccak/keccak1600/amd64/bmi1/keccakf1600.jinc | 2 +- .../keccak/keccak1600/amd64/ref/keccak1600.jinc | 8 ++++---- src/crypto_hash/sha3-224/amd64/bmi1/hash.jazz | 3 +++ src/crypto_hash/sha3-224/amd64/ref/hash.jazz | 3 +++ src/crypto_hash/sha3-256/amd64/bmi1/hash.jazz | 3 +++ src/crypto_hash/sha3-256/amd64/ref/hash.jazz | 3 +++ src/crypto_hash/sha3-384/amd64/bmi1/hash.jazz | 3 +++ src/crypto_hash/sha3-384/amd64/ref/hash.jazz | 3 +++ src/crypto_hash/sha3-512/amd64/bmi1/hash.jazz | 3 +++ src/crypto_hash/sha3-512/amd64/ref/hash.jazz | 3 +++ 11 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/common/keccak/keccak1600/amd64/bmi1/keccak1600.jinc b/src/common/keccak/keccak1600/amd64/bmi1/keccak1600.jinc index fa81ca75..9e427d46 100644 --- a/src/common/keccak/keccak1600/amd64/bmi1/keccak1600.jinc +++ b/src/common/keccak/keccak1600/amd64/bmi1/keccak1600.jinc @@ -88,7 +88,7 @@ inline fn __absorb_bmi1( reg u64 rate // rate already in bytes -- it is returned bc of spills ) -> reg ptr u64[25], reg u64 { - stack u64 s_in s_inlen s_rate; + #mmx reg u64 s_in s_inlen s_rate; reg u8 trail_byte; // intermediate blocks @@ -174,13 +174,13 @@ inline fn __xtr_bytes_bmi1( inline fn __squeeze_bmi1( reg mut ptr u64[25] state, - stack u64 s_out, - reg u64 outlen, - reg u64 rate + #mmx reg u64 s_out, + reg u64 outlen, + reg u64 rate ) { reg u64 out; - stack u64 s_outlen s_rate; + #mmx reg u64 s_outlen s_rate; // intermediate blocks while ( outlen > rate ) @@ -213,7 +213,7 @@ inline fn __keccak1600_bmi1(reg u64 out outlen in inlen, reg u8 trail_byte, reg { stack u64[25] _state; reg ptr u64[25] state; - stack u64 s_out s_outlen; + #mmx reg u64 s_out s_outlen; stack u8 s_trail_byte; s_out = out; diff --git a/src/common/keccak/keccak1600/amd64/bmi1/keccakf1600.jinc b/src/common/keccak/keccak1600/amd64/bmi1/keccakf1600.jinc index 565c69ae..40754c55 100644 --- a/src/common/keccak/keccak1600/amd64/bmi1/keccakf1600.jinc +++ b/src/common/keccak/keccak1600/amd64/bmi1/keccakf1600.jinc @@ -129,7 +129,7 @@ inline fn __round_bmi1(reg ptr u64[25] e a, reg u64 rc) -> reg ptr u64[25] inline fn __keccakf1600_bmi1(reg ptr u64[25] a) -> reg ptr u64[25] { reg ptr u64[24] RC; - stack ptr u64[24] s_RC; + #mmx reg ptr u64[24] s_RC; stack u64[25] s_e; reg ptr u64[25] e; reg u64 c rc; diff --git a/src/common/keccak/keccak1600/amd64/ref/keccak1600.jinc b/src/common/keccak/keccak1600/amd64/ref/keccak1600.jinc index cd718735..f903e7cb 100644 --- a/src/common/keccak/keccak1600/amd64/ref/keccak1600.jinc +++ b/src/common/keccak/keccak1600/amd64/ref/keccak1600.jinc @@ -87,7 +87,7 @@ inline fn __absorb_ref( reg u64 rate // rate already in bytes -- it is returned bc of spills ) -> stack u64[25], reg u64 { - stack u64 s_in s_inlen s_rate; + #mmx reg u64 s_in s_inlen s_rate; reg u8 trail_byte; // intermediate blocks @@ -171,13 +171,13 @@ inline fn __xtr_bytes_ref( inline fn __squeeze_ref( stack u64[25] state, - stack u64 s_out, + #mmx reg u64 s_out, reg u64 outlen, reg u64 rate ) { reg u64 out; - stack u64 s_outlen s_rate; + #mmx reg u64 s_outlen s_rate; // intermediate blocks while ( outlen > rate ) @@ -205,7 +205,7 @@ inline fn __squeeze_ref( inline fn __keccak1600_ref(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate) { stack u64[25] state; - stack u64 s_out s_outlen; + #mmx reg u64 s_out s_outlen; stack u8 s_trail_byte; s_out = out; diff --git a/src/crypto_hash/sha3-224/amd64/bmi1/hash.jazz b/src/crypto_hash/sha3-224/amd64/bmi1/hash.jazz index 9703da0d..df52afb9 100644 --- a/src/crypto_hash/sha3-224/amd64/bmi1/hash.jazz +++ b/src/crypto_hash/sha3-224/amd64/bmi1/hash.jazz @@ -3,6 +3,9 @@ require "sha3-224.jinc" export fn jade_hash_sha3_224_amd64_bmi1(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_224_bmi1(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-224/amd64/ref/hash.jazz b/src/crypto_hash/sha3-224/amd64/ref/hash.jazz index 0bda7d05..9739444c 100644 --- a/src/crypto_hash/sha3-224/amd64/ref/hash.jazz +++ b/src/crypto_hash/sha3-224/amd64/ref/hash.jazz @@ -3,6 +3,9 @@ require "sha3-224.jinc" export fn jade_hash_sha3_224_amd64_ref(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_224_ref(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-256/amd64/bmi1/hash.jazz b/src/crypto_hash/sha3-256/amd64/bmi1/hash.jazz index f2e646cb..bba2e585 100644 --- a/src/crypto_hash/sha3-256/amd64/bmi1/hash.jazz +++ b/src/crypto_hash/sha3-256/amd64/bmi1/hash.jazz @@ -3,6 +3,9 @@ require "sha3-256.jinc" export fn jade_hash_sha3_256_amd64_bmi1(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_256_bmi1(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-256/amd64/ref/hash.jazz b/src/crypto_hash/sha3-256/amd64/ref/hash.jazz index 6c381cce..bfa36c72 100644 --- a/src/crypto_hash/sha3-256/amd64/ref/hash.jazz +++ b/src/crypto_hash/sha3-256/amd64/ref/hash.jazz @@ -3,6 +3,9 @@ require "sha3-256.jinc" export fn jade_hash_sha3_256_amd64_ref(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_256_ref(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-384/amd64/bmi1/hash.jazz b/src/crypto_hash/sha3-384/amd64/bmi1/hash.jazz index 6090b84e..48b124fb 100644 --- a/src/crypto_hash/sha3-384/amd64/bmi1/hash.jazz +++ b/src/crypto_hash/sha3-384/amd64/bmi1/hash.jazz @@ -3,6 +3,9 @@ require "sha3-384.jinc" export fn jade_hash_sha3_384_amd64_bmi1(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_384_bmi1(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-384/amd64/ref/hash.jazz b/src/crypto_hash/sha3-384/amd64/ref/hash.jazz index fb952862..65518d29 100644 --- a/src/crypto_hash/sha3-384/amd64/ref/hash.jazz +++ b/src/crypto_hash/sha3-384/amd64/ref/hash.jazz @@ -3,6 +3,9 @@ require "sha3-384.jinc" export fn jade_hash_sha3_384_amd64_ref(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_384_ref(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-512/amd64/bmi1/hash.jazz b/src/crypto_hash/sha3-512/amd64/bmi1/hash.jazz index 79a4f3ce..4ff72114 100644 --- a/src/crypto_hash/sha3-512/amd64/bmi1/hash.jazz +++ b/src/crypto_hash/sha3-512/amd64/bmi1/hash.jazz @@ -3,6 +3,9 @@ require "sha3-512.jinc" export fn jade_hash_sha3_512_amd64_bmi1(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_512_bmi1(hash, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_hash/sha3-512/amd64/ref/hash.jazz b/src/crypto_hash/sha3-512/amd64/ref/hash.jazz index aa265621..c127947c 100644 --- a/src/crypto_hash/sha3-512/amd64/ref/hash.jazz +++ b/src/crypto_hash/sha3-512/amd64/ref/hash.jazz @@ -3,6 +3,9 @@ require "sha3-512.jinc" export fn jade_hash_sha3_512_amd64_ref(reg u64 hash input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __sha3_512_ref(hash, input, input_length); ?{}, r = #set0(); return r; From a283669451be00372fffe73eccdb60e24b138216 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Wed, 26 Jun 2024 08:50:25 +0100 Subject: [PATCH 10/19] sct: crypto_xof --- src/common/keccak/keccak1600/amd64/spec/keccak1600.jinc | 2 +- src/crypto_xof/shake128/amd64/avx2/shake128.jinc | 4 ++-- src/crypto_xof/shake128/amd64/avx2/xof.jazz | 6 +++++- src/crypto_xof/shake128/amd64/bmi1/xof.jazz | 3 +++ src/crypto_xof/shake128/amd64/ref/xof.jazz | 3 +++ src/crypto_xof/shake128/amd64/ref1/xof.jazz | 3 +++ src/crypto_xof/shake256/amd64/avx2/shake256.jinc | 4 ++-- src/crypto_xof/shake256/amd64/avx2/xof.jazz | 7 ++++++- src/crypto_xof/shake256/amd64/bmi1/xof.jazz | 3 +++ src/crypto_xof/shake256/amd64/ref/xof.jazz | 3 +++ src/crypto_xof/shake256/amd64/ref1/xof.jazz | 3 +++ src/crypto_xof/shake256/amd64/spec/xof.jazz | 3 +++ 12 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/common/keccak/keccak1600/amd64/spec/keccak1600.jinc b/src/common/keccak/keccak1600/amd64/spec/keccak1600.jinc index 7dd3b9e3..783813b2 100644 --- a/src/common/keccak/keccak1600/amd64/spec/keccak1600.jinc +++ b/src/common/keccak/keccak1600/amd64/spec/keccak1600.jinc @@ -136,7 +136,7 @@ inline fn __xtr_bytes_spec( inline fn __keccak1600_spec(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate) { stack u64[25] state; - stack u64 s_out s_outlen s_in s_inlen s_rate; + #mmx reg u64 s_out s_outlen s_in s_inlen s_rate; stack u8 s_trail_byte; s_out = out; diff --git a/src/crypto_xof/shake128/amd64/avx2/shake128.jinc b/src/crypto_xof/shake128/amd64/avx2/shake128.jinc index 187aac91..70875d0c 100644 --- a/src/crypto_xof/shake128/amd64/avx2/shake128.jinc +++ b/src/crypto_xof/shake128/amd64/avx2/shake128.jinc @@ -1,6 +1,6 @@ from Jade require "common/keccak/keccak1600/amd64/avx2/keccak1600.jinc" -inline fn __shake128_avx2(reg u64 out outlen in inlen) +inline fn __shake128_avx2(reg u64 out outlen in inlen, #msf reg u64 ms) { reg u64 rate; reg u8 trail_byte; @@ -8,7 +8,7 @@ inline fn __shake128_avx2(reg u64 out outlen in inlen) trail_byte = 0x1F; rate = (1344/8); - __keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate); + __keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate, ms); } diff --git a/src/crypto_xof/shake128/amd64/avx2/xof.jazz b/src/crypto_xof/shake128/amd64/avx2/xof.jazz index 23dd3b45..6b41b262 100644 --- a/src/crypto_xof/shake128/amd64/avx2/xof.jazz +++ b/src/crypto_xof/shake128/amd64/avx2/xof.jazz @@ -3,7 +3,11 @@ require "shake128.jinc" export fn jade_xof_shake128_amd64_avx2(reg u64 output output_length input input_length) -> reg u64 { reg u64 r; - __shake128_avx2(output, output_length, input, input_length); + #msf reg u64 ms; + + ms = #init_msf(); + + __shake128_avx2(output, output_length, input, input_length, ms); ?{}, r = #set0(); return r; } diff --git a/src/crypto_xof/shake128/amd64/bmi1/xof.jazz b/src/crypto_xof/shake128/amd64/bmi1/xof.jazz index 19921991..4c32db8d 100644 --- a/src/crypto_xof/shake128/amd64/bmi1/xof.jazz +++ b/src/crypto_xof/shake128/amd64/bmi1/xof.jazz @@ -3,6 +3,9 @@ require "shake128.jinc" export fn jade_xof_shake128_amd64_bmi1(reg u64 output output_length input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __shake128_bmi1(output, output_length, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_xof/shake128/amd64/ref/xof.jazz b/src/crypto_xof/shake128/amd64/ref/xof.jazz index ad386786..3cb07b30 100644 --- a/src/crypto_xof/shake128/amd64/ref/xof.jazz +++ b/src/crypto_xof/shake128/amd64/ref/xof.jazz @@ -3,6 +3,9 @@ require "shake128.jinc" export fn jade_xof_shake128_amd64_ref(reg u64 output output_length input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __shake128_ref(output, output_length, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_xof/shake128/amd64/ref1/xof.jazz b/src/crypto_xof/shake128/amd64/ref1/xof.jazz index 28e571ea..063f9637 100644 --- a/src/crypto_xof/shake128/amd64/ref1/xof.jazz +++ b/src/crypto_xof/shake128/amd64/ref1/xof.jazz @@ -3,6 +3,9 @@ require "shake128.jinc" export fn jade_xof_shake128_amd64_ref1(reg u64 output output_length input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __shake128_ref1(output, output_length, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_xof/shake256/amd64/avx2/shake256.jinc b/src/crypto_xof/shake256/amd64/avx2/shake256.jinc index 37c02fef..17a24c27 100644 --- a/src/crypto_xof/shake256/amd64/avx2/shake256.jinc +++ b/src/crypto_xof/shake256/amd64/avx2/shake256.jinc @@ -1,6 +1,6 @@ from Jade require "common/keccak/keccak1600/amd64/avx2/keccak1600.jinc" -inline fn __shake256_avx2(reg u64 out outlen in inlen) +inline fn __shake256_avx2(reg u64 out outlen in inlen, #msf reg u64 ms) { reg u64 rate; reg u8 trail_byte; @@ -8,7 +8,7 @@ inline fn __shake256_avx2(reg u64 out outlen in inlen) trail_byte = 0x1F; rate = (1088/8); - __keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate); + __keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate, ms); } diff --git a/src/crypto_xof/shake256/amd64/avx2/xof.jazz b/src/crypto_xof/shake256/amd64/avx2/xof.jazz index 169f7701..80d3ae69 100644 --- a/src/crypto_xof/shake256/amd64/avx2/xof.jazz +++ b/src/crypto_xof/shake256/amd64/avx2/xof.jazz @@ -3,7 +3,12 @@ require "shake256.jinc" export fn jade_xof_shake256_amd64_avx2(reg u64 output output_length input input_length) -> reg u64 { reg u64 r; - __shake256_avx2(output, output_length, input, input_length); + #msf reg u64 ms; + + ms = #init_msf(); + + __shake256_avx2(output, output_length, input, input_length, ms); + ?{}, r = #set0(); return r; } diff --git a/src/crypto_xof/shake256/amd64/bmi1/xof.jazz b/src/crypto_xof/shake256/amd64/bmi1/xof.jazz index f0988dd9..0a01874f 100644 --- a/src/crypto_xof/shake256/amd64/bmi1/xof.jazz +++ b/src/crypto_xof/shake256/amd64/bmi1/xof.jazz @@ -3,6 +3,9 @@ require "shake256.jinc" export fn jade_xof_shake256_amd64_bmi1(reg u64 output output_length input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __shake256_bmi1(output, output_length, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_xof/shake256/amd64/ref/xof.jazz b/src/crypto_xof/shake256/amd64/ref/xof.jazz index 8eb4e643..c876881e 100644 --- a/src/crypto_xof/shake256/amd64/ref/xof.jazz +++ b/src/crypto_xof/shake256/amd64/ref/xof.jazz @@ -3,6 +3,9 @@ require "shake256.jinc" export fn jade_xof_shake256_amd64_ref(reg u64 output output_length input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __shake256_ref(output, output_length, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_xof/shake256/amd64/ref1/xof.jazz b/src/crypto_xof/shake256/amd64/ref1/xof.jazz index 2051d26f..23d811bb 100644 --- a/src/crypto_xof/shake256/amd64/ref1/xof.jazz +++ b/src/crypto_xof/shake256/amd64/ref1/xof.jazz @@ -3,6 +3,9 @@ require "shake256.jinc" export fn jade_xof_shake256_amd64_ref1(reg u64 output output_length input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __shake256_ref1(output, output_length, input, input_length); ?{}, r = #set0(); return r; diff --git a/src/crypto_xof/shake256/amd64/spec/xof.jazz b/src/crypto_xof/shake256/amd64/spec/xof.jazz index f7045070..04b30887 100644 --- a/src/crypto_xof/shake256/amd64/spec/xof.jazz +++ b/src/crypto_xof/shake256/amd64/spec/xof.jazz @@ -3,6 +3,9 @@ require "shake256.jinc" export fn jade_xof_shake256_amd64_spec(reg u64 output output_length input input_length) -> reg u64 { reg u64 r; + + _ = #init_msf(); + __shake256_spec(output, output_length, input, input_length); ?{}, r = #set0(); return r; From 11385db258b2b42d259babc3eb49897b6ca49d73 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Thu, 27 Jun 2024 12:42:32 +0100 Subject: [PATCH 11/19] sct: fix compilation of kyber* (no sct yet); there will be a separate PR; --- src/common/keccak/common/fips202_DIRTY.jinc | 6 +- .../amd64/avx2/keccak1600_nomsf.jinc | 272 ++++++++++++++++++ .../amd64/avx2/keccakf1600_nomsf.jinc | 202 +++++++++++++ 3 files changed, 476 insertions(+), 4 deletions(-) create mode 100644 src/common/keccak/keccak1600/amd64/avx2/keccak1600_nomsf.jinc create mode 100644 src/common/keccak/keccak1600/amd64/avx2/keccakf1600_nomsf.jinc diff --git a/src/common/keccak/common/fips202_DIRTY.jinc b/src/common/keccak/common/fips202_DIRTY.jinc index 92698c60..82f6c335 100644 --- a/src/common/keccak/common/fips202_DIRTY.jinc +++ b/src/common/keccak/common/fips202_DIRTY.jinc @@ -1,7 +1,5 @@ -param int KECCAK_ROUNDS=24; - -from Jade require "common/keccak/keccak1600/amd64/avx2/keccak1600.jinc" -from Jade require "common/keccak/keccak1600/amd64/avx2/keccakf1600.jinc" +from Jade require "common/keccak/keccak1600/amd64/avx2/keccak1600_nomsf.jinc" +from Jade require "common/keccak/keccak1600/amd64/avx2/keccakf1600_nomsf.jinc" require "fips202_params.jinc" #[returnaddress="stack"] diff --git a/src/common/keccak/keccak1600/amd64/avx2/keccak1600_nomsf.jinc b/src/common/keccak/keccak1600/amd64/avx2/keccak1600_nomsf.jinc new file mode 100644 index 00000000..0f6ace84 --- /dev/null +++ b/src/common/keccak/keccak1600/amd64/avx2/keccak1600_nomsf.jinc @@ -0,0 +1,272 @@ +param int KECCAK_ROUNDS=24; + +require "keccakf1600_nomsf.jinc" + +inline fn __keccak_init_avx2() -> reg u256[7] +{ + inline int i; + reg u256[7] state; + + for i=0 to 7 + { state[i] = #set0_256(); } + + return state; +} + + +inline fn __init_s_state_avx2() -> stack u64[28] +{ + inline int i; + stack u64[28] s_state; + reg u256 zero; + + zero = #set0_256(); + for i=0 to 7 + { s_state[u256 i] = zero; } + + return s_state; +} + + +inline fn __add_full_block_avx2( + reg u256[7] state, + stack u64[28] s_state, + reg ptr u64[25] a_jagged_p, + reg u64 in inlen, + reg u64 rate +) -> reg u256[7], stack u64[28], reg u64, reg u64 +{ + + inline int i; + reg u64 j l t rate8; + + rate8 = rate; + rate8 >>= 3; + j = 0; + while ( j < rate8 ) + { + t = [in + 8*j]; + l = a_jagged_p[(int) j]; + s_state[(int) l] = t; + j += 1; + } + + //TODO: check & change to #VPBROADCAST_4u64 + t = s_state[0]; + s_state[1] = t; + s_state[2] = t; + s_state[3] = t; + + for i = 0 to 7 + { state[i] ^= s_state[u256 i]; } + + in += rate; + inlen -= rate; + + return state, s_state, in, inlen; +} + + +// TODO: refactor when this feature is available: https://github.com/haslab/libjbn/wiki/Feature-request-%231#procedural-parameters +inline fn __add_final_block_avx2( + reg u256[7] state, + stack u64[28] s_state, + reg ptr u64[25] a_jagged_p, + reg u64 in inlen, + reg u8 trail_byte, + reg u64 rate +) -> reg u256[7] +{ + inline int i; + reg u64 j l t inlen8; + reg u8 c; + + s_state = __init_s_state_avx2(); + + inlen8 = inlen; + inlen8 >>= 3; + j = 0; + while ( j < inlen8 ) + { + t = [in + 8*j]; + l = a_jagged_p[(int) j]; + s_state[(int) l] = t; + j += 1; + } + l = a_jagged_p[(int) j]; + l <<= 3; + j <<= 3; + + while ( j < inlen ) + { + c = (u8)[in + j]; + s_state[u8 (int) l] = c; + j += 1; + l += 1; + } + + s_state[u8 (int) l] = trail_byte; + + // j = (rate-1) >> 3; + j = rate; j -= 1; j >>= 3; + l = a_jagged_p[(int) j]; + l <<= 3; + // l += ((rate-1) & 0x7) + j = rate; j -= 1; j &= 0x7; + l += j; + + s_state[u8 (int) l] ^= 0x80; + + t = s_state[0]; + s_state[1] = t; + s_state[2] = t; + s_state[3] = t; + + for i = 0 to 7 + { state[i] ^= s_state[u256 i]; } + + return state; +} + + +// obs: @pre: len <= rate_in_bytes +inline fn __xtr_full_block_avx2( + reg u256[7] state, + reg ptr u64[25] a_jagged_p, + reg u64 out, + reg u64 len +) -> reg u64 +{ + inline int i; + stack u64[28] s_state; + reg u64 j l t len8; + + for i = 0 to 7 + { s_state[u256 i] = state[i]; } + + len8 = len; + len8 >>= 3; + j = 0; + while ( j < len8 ) + { + l = a_jagged_p[(int) j]; + t = s_state[(int) l]; + [out + 8*j] = t; + j += 1; + } + + out += len; + + return out; +} + + +// obs: @pre: len <= rate_in_bytes +inline fn __xtr_bytes_avx2( + reg u256[7] state, + reg ptr u64[25] a_jagged_p, + reg u64 out, + reg u64 len +) -> reg u64 +{ + inline int i; + stack u64[28] s_state; + reg u64 j l t len8; + reg u8 c; + + for i = 0 to 7 + { s_state[u256 i] = state[i]; } + + len8 = len; + len8 >>= 3; + j = 0; + while ( j < len8 ) + { l = a_jagged_p[(int) j]; + t = s_state[(int) l]; + [out + 8*j] = t; + j += 1; + } + l = a_jagged_p[(int)j]; + j <<= 3; + l <<= 3; + + while ( j < len ) + { + c = s_state[u8 (int) l]; + (u8)[out + j] = c; + j += 1; + l += 1; + } + + out += len; + + return out; +} + + +inline fn __absorb_avx2( + reg u256[7] state, + reg u64 in inlen, + reg u8 trail_byte, + reg u64 rate +) -> reg u256[7] +{ + stack u64[28] s_state; + reg ptr u64[25] a_jagged_p; + + a_jagged_p = KECCAK_A_JAGGED; + s_state = __init_s_state_avx2(); + + // intermediate blocks + while ( inlen >= rate ) + { + state, s_state, in, inlen = __add_full_block_avx2(state, s_state, a_jagged_p, in, inlen, rate); + state = __keccakf1600_avx2(state); + } + + // final block + state = __add_final_block_avx2(state, s_state, a_jagged_p, in, inlen, trail_byte, rate); + + return state; +} + + +inline fn __squeeze_avx2(reg u256[7] state, reg u64 out outlen rate) +{ + reg ptr u64[25] a_jagged_p; + + a_jagged_p = KECCAK_A_JAGGED; + + // intermediate blocks + while ( outlen > rate ) + { + state = __keccakf1600_avx2(state); + out = __xtr_full_block_avx2(state, a_jagged_p, out, rate); + outlen -= rate; + } + + state = __keccakf1600_avx2(state); + out = __xtr_bytes_avx2(state, a_jagged_p, out, outlen); +} + + +inline fn __keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate) +{ + reg u256[7] state; + + state = __keccak_init_avx2(); + + // absorb + state = __absorb_avx2(state, in, inlen, trail_byte, rate); + + // squeeze + __squeeze_avx2(state, out, outlen, rate); +} + + +fn _keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate) +{ + __keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate); +} + + diff --git a/src/common/keccak/keccak1600/amd64/avx2/keccakf1600_nomsf.jinc b/src/common/keccak/keccak1600/amd64/avx2/keccakf1600_nomsf.jinc new file mode 100644 index 00000000..6ca9dda6 --- /dev/null +++ b/src/common/keccak/keccak1600/amd64/avx2/keccakf1600_nomsf.jinc @@ -0,0 +1,202 @@ + +u256[24] KECCAK_IOTAS = +{ (4u64)[0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001] + ,(4u64)[0x0000000000008082, 0x0000000000008082, 0x0000000000008082, 0x0000000000008082] + ,(4u64)[0x800000000000808a, 0x800000000000808a, 0x800000000000808a, 0x800000000000808a] + ,(4u64)[0x8000000080008000, 0x8000000080008000, 0x8000000080008000, 0x8000000080008000] + ,(4u64)[0x000000000000808b, 0x000000000000808b, 0x000000000000808b, 0x000000000000808b] + ,(4u64)[0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001] + ,(4u64)[0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081] + ,(4u64)[0x8000000000008009, 0x8000000000008009, 0x8000000000008009, 0x8000000000008009] + ,(4u64)[0x000000000000008a, 0x000000000000008a, 0x000000000000008a, 0x000000000000008a] + ,(4u64)[0x0000000000000088, 0x0000000000000088, 0x0000000000000088, 0x0000000000000088] + ,(4u64)[0x0000000080008009, 0x0000000080008009, 0x0000000080008009, 0x0000000080008009] + ,(4u64)[0x000000008000000a, 0x000000008000000a, 0x000000008000000a, 0x000000008000000a] + ,(4u64)[0x000000008000808b, 0x000000008000808b, 0x000000008000808b, 0x000000008000808b] + ,(4u64)[0x800000000000008b, 0x800000000000008b, 0x800000000000008b, 0x800000000000008b] + ,(4u64)[0x8000000000008089, 0x8000000000008089, 0x8000000000008089, 0x8000000000008089] + ,(4u64)[0x8000000000008003, 0x8000000000008003, 0x8000000000008003, 0x8000000000008003] + ,(4u64)[0x8000000000008002, 0x8000000000008002, 0x8000000000008002, 0x8000000000008002] + ,(4u64)[0x8000000000000080, 0x8000000000000080, 0x8000000000000080, 0x8000000000000080] + ,(4u64)[0x000000000000800a, 0x000000000000800a, 0x000000000000800a, 0x000000000000800a] + ,(4u64)[0x800000008000000a, 0x800000008000000a, 0x800000008000000a, 0x800000008000000a] + ,(4u64)[0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081] + ,(4u64)[0x8000000000008080, 0x8000000000008080, 0x8000000000008080, 0x8000000000008080] + ,(4u64)[0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001] + ,(4u64)[0x8000000080008008, 0x8000000080008008, 0x8000000080008008, 0x8000000080008008] +}; + + +u256[6] KECCAK_RHOTATES_LEFT = +{ + (4u64)[41, 36, 18, 3], + (4u64)[27, 28, 62, 1], + (4u64)[39, 56, 6, 45], + (4u64)[ 8, 55, 61, 10], + (4u64)[20, 25, 15, 2], + (4u64)[14, 21, 43, 44] +}; + + +u256[6] KECCAK_RHOTATES_RIGHT = +{ + (4u64)[64-41, 64-36, 64-18, 64- 3], + (4u64)[64-27, 64-28, 64-62, 64- 1], + (4u64)[64-39, 64-56, 64- 6, 64-45], + (4u64)[64- 8, 64-55, 64-61, 64-10], + (4u64)[64-20, 64-25, 64-15, 64- 2], + (4u64)[64-14, 64-21, 64-43, 64-44] +}; + + +u64[25] KECCAK_A_JAGGED = +{ + 0, 4, 5, 6, 7, + 10, 24, 13, 18, 23, + 8, 16, 25, 22, 15, + 11, 12, 21, 26, 19, + 9, 20, 17, 14, 27 +}; + + +inline fn __keccakf1600_avx2(reg u256[7] state) -> reg u256[7] +{ + reg u256[9] t; + reg u256 c00 c14 d00 d14; + + reg bool zf; + reg u64 r iotas_o; + + reg ptr u256[24] iotas_p; + reg ptr u256[6] rhotates_left_p; + reg ptr u256[6] rhotates_right_p; + + iotas_p = KECCAK_IOTAS; + iotas_o = 0; + rhotates_left_p = KECCAK_RHOTATES_LEFT; + rhotates_right_p = KECCAK_RHOTATES_RIGHT; + + r = KECCAK_ROUNDS; + while + { + //######################################## Theta + c00 = #VPSHUFD_256(state[2], (4u2)[1,0,3,2]); + c14 = state[5] ^ state[3]; + t[2] = state[4] ^ state[6]; + c14 = c14 ^ state[1]; + c14 = c14 ^ t[2]; + t[4] = #VPERMQ(c14, (4u2)[2,1,0,3]); + c00 = c00 ^ state[2]; + t[0] = #VPERMQ(c00, (4u2)[1,0,3,2]); + t[1] = c14 >>4u64 63; + t[2] = c14 +4u64 c14; + t[1] = t[1] | t[2]; + d14 = #VPERMQ(t[1], (4u2)[0,3,2,1]); + d00 = t[1] ^ t[4]; + d00 = #VPERMQ(d00, (4u2)[0,0,0,0]); + c00 = c00 ^ state[0]; + c00 = c00 ^ t[0]; + t[0] = c00 >>4u64 63; + t[1] = c00 +4u64 c00; + t[1] = t[1] | t[0]; + state[2] = state[2] ^ d00; + state[0] = state[0] ^ d00; + d14 = #VPBLEND_8u32(d14, t[1], (8u1)[1,1,0,0,0,0,0,0]); + t[4] = #VPBLEND_8u32(t[4], c00, (8u1)[0,0,0,0,0,0,1,1]); + d14 = d14 ^ t[4]; + + //######################################## Rho + Pi + pre-Chi shuffle + t[3] = #VPSLLV_4u64(state[2], rhotates_left_p[0] ); + state[2] = #VPSRLV_4u64(state[2], rhotates_right_p[0] ); + state[2] = state[2] | t[3]; + state[3] = state[3] ^ d14; + t[4] = #VPSLLV_4u64(state[3], rhotates_left_p[2] ); + state[3] = #VPSRLV_4u64(state[3], rhotates_right_p[2] ); + state[3] = state[3] | t[4]; + state[4] = state[4] ^ d14; + t[5] = #VPSLLV_4u64(state[4], rhotates_left_p[3] ); + state[4] = #VPSRLV_4u64(state[4], rhotates_right_p[3] ); + state[4] = state[4] | t[5]; + state[5] = state[5] ^ d14; + t[6] = #VPSLLV_4u64(state[5], rhotates_left_p[4] ); + state[5] = #VPSRLV_4u64(state[5], rhotates_right_p[4] ); + state[5] = state[5] | t[6]; + state[6] = state[6] ^ d14; + t[3] = #VPERMQ(state[2], (4u2)[2,0,3,1]); + t[4] = #VPERMQ(state[3], (4u2)[2,0,3,1]); + t[7] = #VPSLLV_4u64(state[6], rhotates_left_p[5] ); + t[1] = #VPSRLV_4u64(state[6], rhotates_right_p[5] ); + t[1] = t[1] | t[7]; + state[1] = state[1] ^ d14; + t[5] = #VPERMQ(state[4], (4u2)[0,1,2,3]); + t[6] = #VPERMQ(state[5], (4u2)[1,3,0,2]); + t[8] = #VPSLLV_4u64(state[1], rhotates_left_p[1] ); + t[2] = #VPSRLV_4u64(state[1], rhotates_right_p[1] ); + t[2] = t[2] | t[8]; + + //######################################## Chi + t[7] = #VPSRLDQ_256(t[1], 8); + t[0] = !t[1] & t[7]; + state[3] = #VPBLEND_8u32(t[2], t[6], (8u1)[0,0,0,0,1,1,0,0]); + t[8] = #VPBLEND_8u32(t[4], t[2], (8u1)[0,0,0,0,1,1,0,0]); + state[5] = #VPBLEND_8u32(t[3], t[4], (8u1)[0,0,0,0,1,1,0,0]); + t[7] = #VPBLEND_8u32(t[2], t[3], (8u1)[0,0,0,0,1,1,0,0]); + state[3] = #VPBLEND_8u32(state[3], t[4], (8u1)[0,0,1,1,0,0,0,0]); + t[8] = #VPBLEND_8u32(t[8], t[5], (8u1)[0,0,1,1,0,0,0,0]); + state[5] = #VPBLEND_8u32(state[5], t[2], (8u1)[0,0,1,1,0,0,0,0]); + t[7] = #VPBLEND_8u32(t[7], t[6], (8u1)[0,0,1,1,0,0,0,0]); + state[3] = #VPBLEND_8u32(state[3], t[5], (8u1)[1,1,0,0,0,0,0,0]); + t[8] = #VPBLEND_8u32(t[8], t[6], (8u1)[1,1,0,0,0,0,0,0]); + state[5] = #VPBLEND_8u32(state[5], t[6], (8u1)[1,1,0,0,0,0,0,0]); + t[7] = #VPBLEND_8u32(t[7], t[4], (8u1)[1,1,0,0,0,0,0,0]); + state[3] = !state[3] & t[8]; + state[5] = !state[5] & t[7]; + state[6] = #VPBLEND_8u32(t[5], t[2], (8u1)[0,0,0,0,1,1,0,0]); + t[8] = #VPBLEND_8u32(t[3], t[5], (8u1)[0,0,0,0,1,1,0,0]); + state[3] = state[3] ^ t[3]; + state[6] = #VPBLEND_8u32(state[6], t[3], (8u1)[0,0,1,1,0,0,0,0]); + t[8] = #VPBLEND_8u32(t[8], t[4], (8u1)[0,0,1,1,0,0,0,0]); + state[5] = state[5] ^ t[5]; + state[6] = #VPBLEND_8u32(state[6], t[4], (8u1)[1,1,0,0,0,0,0,0]); + t[8] = #VPBLEND_8u32(t[8], t[2], (8u1)[1,1,0,0,0,0,0,0]); + state[6] = !state[6] & t[8]; + state[6] = state[6] ^ t[6]; + state[4] = #VPERMQ(t[1], (4u2)[0,1,3,2]); + t[8] = #VPBLEND_8u32(state[4], state[0], (8u1)[0,0,1,1,0,0,0,0]); + state[1] = #VPERMQ(t[1], (4u2)[0,3,2,1]); + state[1] = #VPBLEND_8u32(state[1], state[0], (8u1)[1,1,0,0,0,0,0,0]); + state[1] = !state[1] & t[8]; + state[2] = #VPBLEND_8u32(t[4], t[5], (8u1)[0,0,0,0,1,1,0,0]); + t[7] = #VPBLEND_8u32(t[6], t[4], (8u1)[0,0,0,0,1,1,0,0]); + state[2] = #VPBLEND_8u32(state[2], t[6], (8u1)[0,0,1,1,0,0,0,0]); + t[7] = #VPBLEND_8u32(t[7], t[3], (8u1)[0,0,1,1,0,0,0,0]); + state[2] = #VPBLEND_8u32(state[2], t[3], (8u1)[1,1,0,0,0,0,0,0]); + t[7] = #VPBLEND_8u32(t[7], t[5], (8u1)[1,1,0,0,0,0,0,0]); + state[2] = !state[2] & t[7]; + state[2] = state[2] ^ t[2]; + t[0] = #VPERMQ(t[0], (4u2)[0,0,0,0]); + state[3] = #VPERMQ(state[3], (4u2)[0,1,2,3]); + state[5] = #VPERMQ(state[5], (4u2)[2,0,3,1]); + state[6] = #VPERMQ(state[6], (4u2)[1,3,0,2]); + state[4] = #VPBLEND_8u32(t[6], t[3], (8u1)[0,0,0,0,1,1,0,0]); + t[7] = #VPBLEND_8u32(t[5], t[6], (8u1)[0,0,0,0,1,1,0,0]); + state[4] = #VPBLEND_8u32(state[4], t[5], (8u1)[0,0,1,1,0,0,0,0]); + t[7] = #VPBLEND_8u32(t[7], t[2], (8u1)[0,0,1,1,0,0,0,0]); + state[4] = #VPBLEND_8u32(state[4], t[2], (8u1)[1,1,0,0,0,0,0,0]); + t[7] = #VPBLEND_8u32(t[7], t[3], (8u1)[1,1,0,0,0,0,0,0]); + state[4] = !state[4] & t[7]; + state[0] = state[0] ^ t[0]; + state[1] = state[1] ^ t[1]; + state[4] = state[4] ^ t[4]; + + //######################################## Iota + state[0] = state[0] ^ iotas_p.[(int) iotas_o]; + iotas_o += 32; + + _,_,_,zf,r = #DEC_64(r); + }(!zf) + + return state; +} + + From 1a0cf66b4ff6adf45bfae21f0aad6c0c11502ed8 Mon Sep 17 00:00:00 2001 From: Vincent Laporte Date: Thu, 27 Jun 2024 17:45:32 +0200 Subject: [PATCH 12/19] Remove declarations of unused variables --- src/common/keccak/common/fips202_DIRTY.jinc | 1 - .../kyber/common/amd64/avx2/poly.jinc | 17 +++-------------- .../kyber/common/amd64/avx2/polyvec.jinc | 2 -- src/crypto_kem/kyber/common/amd64/kem.jinc | 1 - src/crypto_kem/kyber/common/amd64/ref/poly.jinc | 3 --- .../kyber/common/amd64/ref/polyvec.jinc | 2 -- .../kyber/common/amd64/ref/verify.jinc | 2 +- .../kyber/kyber512/amd64/ref/indcpa.jinc | 2 +- .../kyber/kyber768/amd64/avx2/gen_matrix.jinc | 1 - .../kyber/kyber768/amd64/ref/indcpa.jinc | 10 ++++------ .../kyber/kyber768/amd64/ref/poly.jinc | 4 ---- .../kyber/kyber768/amd64/ref/polyvec.jinc | 2 -- .../kyber/kyber768/amd64/ref/verify.jinc | 2 +- .../curve25519/amd64/ref5/scalarmult.jazz | 2 -- .../dilithium/common/amd64/avx2/expandA.jinc | 1 - .../common/amd64/avx2/expandA_end.jinc | 4 ---- .../dilithium/common/amd64/avx2/expandMask.jinc | 4 ---- .../common/amd64/avx2/expandMask_end.jinc | 2 -- .../dilithium/common/amd64/avx2/ntt.jinc | 7 ++----- .../dilithium/common/amd64/expandS.jinc | 7 ++----- .../dilithium/common/amd64/fips202.jinc | 1 - .../dilithium/common/amd64/keygen_end.jinc | 5 ----- .../dilithium/common/amd64/packing.jinc | 5 +---- .../dilithium/common/amd64/poly.jinc | 5 +---- .../dilithium/common/amd64/verify_end.jinc | 13 ++++--------- .../dilithium/dilithium2/amd64/avx2/common.jinc | 5 +---- .../dilithium/dilithium3/amd64/avx2/common.jinc | 3 --- .../falcon/falcon512/amd64/avx2/test.jazz | 1 - 28 files changed, 21 insertions(+), 93 deletions(-) diff --git a/src/common/keccak/common/fips202_DIRTY.jinc b/src/common/keccak/common/fips202_DIRTY.jinc index 82f6c335..cc4c86af 100644 --- a/src/common/keccak/common/fips202_DIRTY.jinc +++ b/src/common/keccak/common/fips202_DIRTY.jinc @@ -336,7 +336,6 @@ fn _sha3_512_32(reg ptr u8[64] out, reg const ptr u8[32] in) -> stack u8[64] inline fn __shake128_absorb34(reg u256[7] state, reg const ptr u8[34] in) -> reg u256[7] { - reg u128 t128; stack u64[28] s_state; stack u64[25] a_jagged_p; reg u64 l t; diff --git a/src/crypto_kem/kyber/common/amd64/avx2/poly.jinc b/src/crypto_kem/kyber/common/amd64/avx2/poly.jinc index 1241817f..d739c8ed 100644 --- a/src/crypto_kem/kyber/common/amd64/avx2/poly.jinc +++ b/src/crypto_kem/kyber/common/amd64/avx2/poly.jinc @@ -208,11 +208,7 @@ fn _poly_compress(reg u64 rp, reg ptr u16[KYBER_N] a) -> reg ptr u16[KYBER_N] { inline int i; reg u256 f0 f1 f2 f3 v shift1 mask shift2 permidx; - reg u128 t0 t1 t3; reg ptr u16[16] x16p; - reg u64 t64; - reg u32 t32; - reg u16 t16; a = _poly_csubq(a); @@ -258,11 +254,7 @@ fn _poly_compress_1(reg ptr u8[KYBER_POLYCOMPRESSEDBYTES] rp, reg ptr u16[KYBER_ { inline int i; reg u256 f0 f1 f2 f3 v shift1 mask shift2 permidx; - reg u128 t0 t1 t3; reg ptr u16[16] x16p; - reg u64 t64; - reg u32 t32; - reg u16 t16; a = _poly_csubq(a); @@ -451,7 +443,7 @@ u8[16] pfm_idx_s = {0, 1, 4, 5, 8, 9, 12, 13, fn _poly_frommsg(reg ptr u16[KYBER_N] rp, reg u64 ap) -> stack u16[KYBER_N] { inline int i; - reg u256 f g0 g1 g2 g3 g4 h0 h1 h2 h3; + reg u256 f g0 g1 g2 g3 h0 h1 h2 h3; reg u256 shift idx hqs; reg ptr u16[16] x16p; @@ -499,7 +491,7 @@ fn _poly_frommsg(reg ptr u16[KYBER_N] rp, reg u64 ap) -> stack u16[KYBER_N] fn _poly_frommsg_1(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_INDCPA_MSGBYTES] ap) -> stack u16[KYBER_N] { inline int i; - reg u256 f g0 g1 g2 g3 g4 h0 h1 h2 h3; + reg u256 f g0 g1 g2 g3 h0 h1 h2 h3; reg u256 shift idx hqs; reg ptr u16[16] x16p; @@ -879,7 +871,7 @@ inline fn __invntt___butterfly64x(reg u256 rl0 rl1 rl2 rl3 rh0 rh1 rh2 rh3 zl0 zl1 zh0 zh1 qx16) -> reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256 { - reg u256 t0 t1 t2 t3 t4 t5 t6 t7; + reg u256 t0 t1 t2 t3; t0 = #VPSUB_16u16(rl0, rh0); t1 = #VPSUB_16u16(rl1, rh1); @@ -921,7 +913,6 @@ fn _poly_invntt(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] { reg u256 zeta0 zeta1 zeta2 zeta3 r0 r1 r2 r3 r4 r5 r6 r7 qx16 vx16 flox16 fhix16; reg ptr u16[400] zetasp; - reg ptr u16[16] qx16p; inline int i; zetasp = jzetas_inv_exp; @@ -1119,8 +1110,6 @@ fn __butterfly64x(reg u256 rl0 rl1 rl2 rl3 rh0 rh1 rh2 rh3 zl0 zl1 zh0 zh1 qx16) fn _poly_ntt(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] { reg u256 zeta0 zeta1 zeta2 zeta3 r0 r1 r2 r3 r4 r5 r6 r7 qx16 vx16; - reg u32 t; - reg u16 w; reg ptr u16[400] zetasp; inline int i; diff --git a/src/crypto_kem/kyber/common/amd64/avx2/polyvec.jinc b/src/crypto_kem/kyber/common/amd64/avx2/polyvec.jinc index ee2d6ab8..e4f288fc 100644 --- a/src/crypto_kem/kyber/common/amd64/avx2/polyvec.jinc +++ b/src/crypto_kem/kyber/common/amd64/avx2/polyvec.jinc @@ -80,7 +80,6 @@ fn __polyvec_compress(reg u64 rp, stack u16[KYBER_VECN] a) reg u256 f0 f1 f2 v v8 off shift1 mask shift2 sllvdidx shufbidx; reg u128 t0 t1; reg ptr u16[16] x16p; - reg ptr u8[32] x8p; a = __polyvec_csubq(a); @@ -127,7 +126,6 @@ fn __polyvec_compress_1(reg ptr u8[KYBER_POLYVECCOMPRESSEDBYTES] rp, stack u16[K reg u256 f0 f1 f2 v v8 off shift1 mask shift2 sllvdidx shufbidx; reg u128 t0 t1; reg ptr u16[16] x16p; - reg ptr u8[32] x8p; a = __polyvec_csubq(a); diff --git a/src/crypto_kem/kyber/common/amd64/kem.jinc b/src/crypto_kem/kyber/common/amd64/kem.jinc index 62f1e4b8..ea240165 100644 --- a/src/crypto_kem/kyber/common/amd64/kem.jinc +++ b/src/crypto_kem/kyber/common/amd64/kem.jinc @@ -50,7 +50,6 @@ fn __crypto_kem_enc_derand_jazz(reg u64 ctp, reg u64 shkp, reg u64 pkp, reg ptr stack u8[KYBER_SYMBYTES * 2] buf kr; stack u64 s_pkp s_ctp s_shkp; reg u64 t64; - inline int i; s_pkp = pkp; s_ctp = ctp; diff --git a/src/crypto_kem/kyber/common/amd64/ref/poly.jinc b/src/crypto_kem/kyber/common/amd64/ref/poly.jinc index 3978eaec..5c40ec54 100644 --- a/src/crypto_kem/kyber/common/amd64/ref/poly.jinc +++ b/src/crypto_kem/kyber/common/amd64/ref/poly.jinc @@ -45,7 +45,6 @@ fn _poly_csubq(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] fn _poly_basemul(reg ptr u16[KYBER_N] rp, reg const ptr u16[KYBER_N] ap bp) -> reg ptr u16[KYBER_N] { - reg u64 offset; reg u16 zeta; reg u16 r0; reg u16 r1; @@ -296,7 +295,6 @@ fn _poly_frommsg(reg ptr u16[KYBER_N] rp, reg u64 ap) -> stack u16[KYBER_N] reg u8 c; reg u16 t; inline int i; - inline int j; for i = 0 to KYBER_INDCPA_MSGBYTES { @@ -359,7 +357,6 @@ fn _i_poly_frommsg(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_INDCPA_MSGBYTES] ap reg u8 c; reg u16 t; inline int i; - inline int j; for i = 0 to KYBER_INDCPA_MSGBYTES { diff --git a/src/crypto_kem/kyber/common/amd64/ref/polyvec.jinc b/src/crypto_kem/kyber/common/amd64/ref/polyvec.jinc index e1aee308..ea7c31d8 100644 --- a/src/crypto_kem/kyber/common/amd64/ref/polyvec.jinc +++ b/src/crypto_kem/kyber/common/amd64/ref/polyvec.jinc @@ -31,7 +31,6 @@ fn __polyvec_compress(reg u64 rp, stack u16[KYBER_VECN] a) { stack u16[KYBER_VECN] aa; reg u16 c, b; - reg u16 d; reg u64[4] t; reg u64 i j; inline int k; @@ -95,7 +94,6 @@ fn __i_polyvec_compress(reg ptr u8[KYBER_POLYVECCOMPRESSEDBYTES] rp, stack u16[K { stack u16[KYBER_VECN] aa; reg u16 c, b; - reg u16 d; reg u64[4] t; reg u64 i j; inline int k; diff --git a/src/crypto_kem/kyber/common/amd64/ref/verify.jinc b/src/crypto_kem/kyber/common/amd64/ref/verify.jinc index ad521ce9..effce7b6 100644 --- a/src/crypto_kem/kyber/common/amd64/ref/verify.jinc +++ b/src/crypto_kem/kyber/common/amd64/ref/verify.jinc @@ -25,7 +25,7 @@ fn __verify(reg u64 ctp, reg ptr u8[KYBER_INDCPA_BYTES] ctpc) -> reg u64 inline fn __cmov(reg ptr u8[KYBER_SYMBYTES] dst, reg u64 src cnd) -> reg ptr u8[KYBER_SYMBYTES] { - reg u8 t1 t2 bcond; + reg u8 t1 t2; inline int i; cnd = -cnd; diff --git a/src/crypto_kem/kyber/kyber512/amd64/ref/indcpa.jinc b/src/crypto_kem/kyber/kyber512/amd64/ref/indcpa.jinc index c1bb634b..8c76ab6c 100644 --- a/src/crypto_kem/kyber/kyber512/amd64/ref/indcpa.jinc +++ b/src/crypto_kem/kyber/kyber512/amd64/ref/indcpa.jinc @@ -12,7 +12,7 @@ fn __indcpa_keypair_derand(reg u64 pkp, reg u64 skp, reg ptr u8[KYBER_SYMBYTES] stack u8[64] buf; stack u8[KYBER_SYMBYTES] publicseed noiseseed; reg u64 t64; - reg u8 nonce, c; + reg u8 nonce; inline int i; spkp = pkp; diff --git a/src/crypto_kem/kyber/kyber768/amd64/avx2/gen_matrix.jinc b/src/crypto_kem/kyber/kyber768/amd64/avx2/gen_matrix.jinc index 9ae8a167..afbe3819 100644 --- a/src/crypto_kem/kyber/kyber768/amd64/avx2/gen_matrix.jinc +++ b/src/crypto_kem/kyber/kyber768/amd64/avx2/gen_matrix.jinc @@ -575,7 +575,6 @@ fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, inline int transposed) -> stack u stack u256 fs; reg u256 f; reg u64 ctr0 ctr1 ctr2 ctr3 tmp; - stack u64 ctr0_s; reg u8 flg0 flg1 bflg; reg bool b; reg bool zf; diff --git a/src/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc b/src/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc index 34c8982f..b8581bd5 100644 --- a/src/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc +++ b/src/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc @@ -92,11 +92,10 @@ fn __indcpa_enc(stack u64 sctp, reg ptr u8[32] msgp, reg u64 pkp, reg ptr u8[KYB { stack u16[KYBER_VECN] pkpv sp ep bp; stack u16[KYBER_K*KYBER_VECN] aat; - stack u16[KYBER_N] k poly epp v poly0 poly1 poly2; + stack u16[KYBER_N] k epp v; stack u8[KYBER_SYMBYTES] publicseed; - reg u64 i j t64; + reg u64 i t64; reg u64 ctp; - reg u16 t; reg u8 nonce; stack ptr u8[KYBER_SYMBYTES] noiseseed_s; @@ -175,10 +174,9 @@ fn __iindcpa_enc(reg ptr u8[KYBER_CT_LEN] ctp, reg ptr u8[32] msgp, reg u64 pkp, { stack u16[KYBER_VECN] pkpv sp ep bp; stack u16[KYBER_K*KYBER_VECN] aat; - stack u16[KYBER_N] k poly epp v poly0 poly1 poly2; + stack u16[KYBER_N] k epp v; stack u8[KYBER_SYMBYTES] publicseed; - reg u64 i j t64; - reg u16 t; + reg u64 i t64; reg u8 nonce; stack ptr u8[KYBER_CT_LEN] sctp; stack ptr u8[KYBER_SYMBYTES] noiseseed_s; diff --git a/src/crypto_kem/kyber/kyber768/amd64/ref/poly.jinc b/src/crypto_kem/kyber/kyber768/amd64/ref/poly.jinc index b36b9033..8befa13d 100644 --- a/src/crypto_kem/kyber/kyber768/amd64/ref/poly.jinc +++ b/src/crypto_kem/kyber/kyber768/amd64/ref/poly.jinc @@ -59,7 +59,6 @@ fn _poly_csubq(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] fn _poly_basemul(reg ptr u16[KYBER_N] rp, reg const ptr u16[KYBER_N] ap bp) -> reg ptr u16[KYBER_N] { - reg u64 offset; reg u16 zeta; reg u16 r0; reg u16 r1; @@ -316,7 +315,6 @@ fn _poly_frommsg(reg ptr u16[KYBER_N] rp, reg u64 ap) -> stack u16[KYBER_N] reg u8 c; reg u16 t; inline int i; - inline int j; for i = 0 to 32 { @@ -379,7 +377,6 @@ fn _i_poly_frommsg(reg ptr u16[KYBER_N] rp, reg ptr u8[32] ap) -> stack u16[KYBE reg u8 c; reg u16 t; inline int i; - inline int j; for i = 0 to 32 { @@ -441,7 +438,6 @@ fn _poly_getnoise(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_SYMBYTES] seed, reg { stack u8[33] extseed; /* 33 = KYBER_SYMBYTES +1 */ stack u8[128] buf; /* 128 = KYBER_ETA*KYBER_N/4 */ - reg u64 outlen; reg u8 c,a,b; reg u16 t; reg u64 i j; diff --git a/src/crypto_kem/kyber/kyber768/amd64/ref/polyvec.jinc b/src/crypto_kem/kyber/kyber768/amd64/ref/polyvec.jinc index a9b3fec9..5025f146 100644 --- a/src/crypto_kem/kyber/kyber768/amd64/ref/polyvec.jinc +++ b/src/crypto_kem/kyber/kyber768/amd64/ref/polyvec.jinc @@ -25,7 +25,6 @@ fn __polyvec_compress(reg u64 rp, stack u16[KYBER_VECN] a) { stack u16[KYBER_VECN] aa; reg u16 c, b; - reg u16 d; reg u64[4] t; reg u64 i j; inline int k; @@ -90,7 +89,6 @@ fn __i_polyvec_compress(reg ptr u8[KYBER_POLYVECCOMPRESSEDBYTES] rp, stack u16[K { stack u16[KYBER_VECN] aa; reg u16 c, b; - reg u16 d; reg u64[4] t; reg u64 i j; inline int k; diff --git a/src/crypto_kem/kyber/kyber768/amd64/ref/verify.jinc b/src/crypto_kem/kyber/kyber768/amd64/ref/verify.jinc index 986916c8..5c2746ee 100644 --- a/src/crypto_kem/kyber/kyber768/amd64/ref/verify.jinc +++ b/src/crypto_kem/kyber/kyber768/amd64/ref/verify.jinc @@ -30,7 +30,7 @@ fn __verify(reg u64 ctp, reg ptr u8[KYBER_CT_LEN] ctpc) -> reg u64 inline fn __cmov(reg ptr u8[KYBER_SYMBYTES] dst, reg u64 src cnd) -> reg ptr u8[KYBER_SYMBYTES] { - reg u8 t1 t2 bcond; + reg u8 t1 t2; inline int i; cnd = -cnd; diff --git a/src/crypto_scalarmult/curve25519/amd64/ref5/scalarmult.jazz b/src/crypto_scalarmult/curve25519/amd64/ref5/scalarmult.jazz index 50d2d533..41043499 100644 --- a/src/crypto_scalarmult/curve25519/amd64/ref5/scalarmult.jazz +++ b/src/crypto_scalarmult/curve25519/amd64/ref5/scalarmult.jazz @@ -4,7 +4,6 @@ require "curve25519.jinc" export fn jade_scalarmult_curve25519_amd64_ref5(#spill_to_mmx reg u64 qp np pp) -> reg u64 { reg u64 r; - stack u64 qps; reg u64[4] q n p; _ = #init_msf(); @@ -25,7 +24,6 @@ export fn jade_scalarmult_curve25519_amd64_ref5(#spill_to_mmx reg u64 qp np pp) export fn jade_scalarmult_curve25519_amd64_ref5_base(#spill_to_mmx reg u64 qp np) -> reg u64 { reg u64 r; - stack u64 qps; reg u64[4] q n; _ = #init_msf(); diff --git a/src/crypto_sign/dilithium/common/amd64/avx2/expandA.jinc b/src/crypto_sign/dilithium/common/amd64/avx2/expandA.jinc index 6d5d9af7..06f42bb1 100644 --- a/src/crypto_sign/dilithium/common/amd64/avx2/expandA.jinc +++ b/src/crypto_sign/dilithium/common/amd64/avx2/expandA.jinc @@ -109,7 +109,6 @@ fn expandA_chunk( stack u64[4] coeffs_filled; stack u64[4] xof_offset; - reg u64 lane; reg u256 v256 v256_zero; reg u64 v64; diff --git a/src/crypto_sign/dilithium/common/amd64/avx2/expandA_end.jinc b/src/crypto_sign/dilithium/common/amd64/avx2/expandA_end.jinc index 8c190c71..ee91634d 100644 --- a/src/crypto_sign/dilithium/common/amd64/avx2/expandA_end.jinc +++ b/src/crypto_sign/dilithium/common/amd64/avx2/expandA_end.jinc @@ -8,8 +8,6 @@ fn expandA_aligned(stack u8[32] rho) -> stack u32[Li2_k * Li2_l * Li2_polydeg] stack u32[Li2_k * Li2_l * Li2_polydeg] matrix; stack u64[4] nonces; - stack u64[4] coeffs_left; - stack u64[4] xof_bytes_left; inline int row col chunk idx lane; @@ -62,8 +60,6 @@ fn expandA_unaligned2(stack u8[32] rho) -> stack u32[Li2_k * Li2_l * Li2_polydeg stack u32[Li2_polydeg] scratch0 scratch1; stack u64[4] nonces; - stack u64[4] coeffs_left; - stack u64[4] xof_bytes_left; inline int row col chunk idx lane; diff --git a/src/crypto_sign/dilithium/common/amd64/avx2/expandMask.jinc b/src/crypto_sign/dilithium/common/amd64/avx2/expandMask.jinc index 7a53144e..177c067e 100644 --- a/src/crypto_sign/dilithium/common/amd64/avx2/expandMask.jinc +++ b/src/crypto_sign/dilithium/common/amd64/avx2/expandMask.jinc @@ -105,8 +105,6 @@ fn expandMask_poly_gamma1_217_4x( stack u64 y_packed_filled; stack u64 output_squeeze_counter; stack u256[25] xof; - stack u64[4] coeffs_filled; - stack u64[4] xof_offset; reg u64 addr v64; reg u64 i j; @@ -182,8 +180,6 @@ fn expandMask_poly_gamma1_219_4x( stack u64 y_packed_filled; stack u64 output_squeeze_counter; stack u256[25] xof; - stack u64[4] coeffs_filled; - stack u64[4] xof_offset; reg u64 addr v64; reg u64 i j; diff --git a/src/crypto_sign/dilithium/common/amd64/avx2/expandMask_end.jinc b/src/crypto_sign/dilithium/common/amd64/avx2/expandMask_end.jinc index 96ba1d4c..6ea6f10c 100644 --- a/src/crypto_sign/dilithium/common/amd64/avx2/expandMask_end.jinc +++ b/src/crypto_sign/dilithium/common/amd64/avx2/expandMask_end.jinc @@ -13,8 +13,6 @@ fn expandMask_buffered( reg u32 v32; - reg ptr u32[Li2_polydeg] poly; - ?{}, polys_generated = #set0_64(); while (polys_generated < Li2_l) { if buffer_offset >= 4 { diff --git a/src/crypto_sign/dilithium/common/amd64/avx2/ntt.jinc b/src/crypto_sign/dilithium/common/amd64/avx2/ntt.jinc index 07db8d35..1bbf7c80 100644 --- a/src/crypto_sign/dilithium/common/amd64/avx2/ntt.jinc +++ b/src/crypto_sign/dilithium/common/amd64/avx2/ntt.jinc @@ -217,9 +217,7 @@ fn ntt_levels0t1(reg ptr u32[256] poly_ptr, reg u256 q, inline int offset) // TODO: Interleave loads/stores with arithmetic ops reg u256 zeta_qinv zeta; - reg u256 poly0 poly1 poly2 poly3 poly4 poly5 poly6 poly7 poly8; - - inline int ii; + reg u256 poly0 poly1 poly2 poly3 poly4 poly5 poly6 poly7; poly0 = #VMOVDQU_256(poly_ptr.[u256 (32 * (0*4 + offset))]); poly1 = #VMOVDQU_256(poly_ptr.[u256 (32 * (1*4 + offset))]); @@ -267,8 +265,7 @@ fn ntt_levels2t7(reg ptr u32[256] poly_ptr, reg u256 q, inline int offset) // TODO: Interleave shuffles with butterflies reg u256 zeta_qinv0 zeta_qinv1 zeta0 zeta1; - reg u256[8] poly; - reg u256 poly0 poly1 poly2 poly3 poly4 poly5 poly6 poly7 poly8 polyx; + reg u256 poly0 poly1 poly2 poly3 poly4 poly5 poly6 poly7 polyx; poly0 = #VMOVDQU_256(poly_ptr.[u256 32 * (8*offset + 0)]); poly1 = #VMOVDQU_256(poly_ptr.[u256 32 * (8*offset + 1)]); diff --git a/src/crypto_sign/dilithium/common/amd64/expandS.jinc b/src/crypto_sign/dilithium/common/amd64/expandS.jinc index 2d9355f1..9667f528 100644 --- a/src/crypto_sign/dilithium/common/amd64/expandS.jinc +++ b/src/crypto_sign/dilithium/common/amd64/expandS.jinc @@ -12,7 +12,6 @@ fn expandSEta2_poly(stack u8[64] rho_prime, reg u16 elem_idx, reg ptr u32[Li2_po // temps reg u64 i; - reg u64 addr; reg u8 c; reg u32 c32 t0 t1; @@ -89,10 +88,8 @@ fn expandSEta4_poly(stack u8[64] rho_prime, reg u16 elem_idx, reg ptr u32[Li2_po // temps reg u64 i; - reg u8 c1 c2; - reg u32 u32_c1; + reg u8 c1; reg u32 v; - reg u64 addr; reg u8 c; reg u32 c32; @@ -144,4 +141,4 @@ fn expandSEta4_poly(stack u8[64] rho_prime, reg u16 elem_idx, reg ptr u32[Li2_po } } return poly; -} \ No newline at end of file +} diff --git a/src/crypto_sign/dilithium/common/amd64/fips202.jinc b/src/crypto_sign/dilithium/common/amd64/fips202.jinc index aeb015ad..3d41a8a5 100644 --- a/src/crypto_sign/dilithium/common/amd64/fips202.jinc +++ b/src/crypto_sign/dilithium/common/amd64/fips202.jinc @@ -126,7 +126,6 @@ u64[24] roundconstants = {0x0000000000000001, 0x0000000000008082, 0x800000000000 0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008}; fn __keccakf1600_ref(reg ptr u64[25] state) -> reg ptr u64[25] { - inline int round; reg ptr u64[24] constptr; reg u64 rctr; diff --git a/src/crypto_sign/dilithium/common/amd64/keygen_end.jinc b/src/crypto_sign/dilithium/common/amd64/keygen_end.jinc index ddf3d3dd..fad51941 100644 --- a/src/crypto_sign/dilithium/common/amd64/keygen_end.jinc +++ b/src/crypto_sign/dilithium/common/amd64/keygen_end.jinc @@ -14,7 +14,6 @@ fn keygen_inner(reg ptr u8[32] random_zeta) stack u8[32] rho; reg ptr u8[32] rho_rsp; stack u8[64] rho_prime; - reg ptr u8[64] rho_prime_rsp; stack u8[32] k; // FFTs stack u32[Li2_k * Li2_l * Li2_polydeg] fft_matA; @@ -22,9 +21,6 @@ fn keygen_inner(reg ptr u8[32] random_zeta) reg ptr u32[Li2_l * Li2_polydeg] s1_rsp; stack u32[Li2_k * Li2_polydeg] s2; - stack u8[SHAKE256_RATE] s256_out; - - stack u32[Li2_k * Li2_polydeg] s2; stack u32[Li2_k * Li2_polydeg] t t1 t0; reg ptr u8[Li2_pack_s1len] s1_in_sk; @@ -36,7 +32,6 @@ fn keygen_inner(reg ptr u8[32] random_zeta) //temp variables reg u64 i; inline int j; - reg u32 v32; reg u8 c; state = shake256_absorb32(random_zeta); diff --git a/src/crypto_sign/dilithium/common/amd64/packing.jinc b/src/crypto_sign/dilithium/common/amd64/packing.jinc index 07038863..59a78c9e 100644 --- a/src/crypto_sign/dilithium/common/amd64/packing.jinc +++ b/src/crypto_sign/dilithium/common/amd64/packing.jinc @@ -19,7 +19,6 @@ fn polyeta_2_pack(reg ptr u32[Li2_polydeg] s1, reg ptr u8[Li2_pack_eta_2_len] de inline int _eta; reg u32 dest0 dest1 dest2; reg u32 t1 t2 t3 t4 t5 t6 t7; - reg u32 coeff; reg u64 i addr; _eta = 2; @@ -81,7 +80,6 @@ fn polyeta_4_pack(reg ptr u32[Li2_polydeg] s1, reg ptr u8[Li2_pack_eta_4_len] de -> reg ptr u8[Li2_pack_eta_4_len] { inline int _eta; - reg u8 value; reg u32 lo hi; reg u32 coeff; @@ -117,7 +115,7 @@ fn polyeta_2_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_2_len] a inline int _eta; reg u64 i; reg u32 a0 a1 a2; - reg u32 c32_0 c32_1; + reg u32 c32_0; reg u32 coeff; reg u64 dest_addr src_addr; @@ -715,7 +713,6 @@ fn polyt0_pack(reg ptr u32[Li2_polydeg] t0, reg ptr u8[Li2_pack_t0len] dest) reg u64 addr; reg u32 v32; - stack u32 v32s; reg u8 c c1; diff --git a/src/crypto_sign/dilithium/common/amd64/poly.jinc b/src/crypto_sign/dilithium/common/amd64/poly.jinc index f5c73db1..875bf6a3 100644 --- a/src/crypto_sign/dilithium/common/amd64/poly.jinc +++ b/src/crypto_sign/dilithium/common/amd64/poly.jinc @@ -38,9 +38,6 @@ fn poly_subtract(reg ptr u32[Li2_polydeg] f g difference) fn poly_accumulate(reg ptr u32[Li2_polydeg] f sum) -> reg ptr u32[Li2_polydeg] { - reg u32 temp; - stack u32 x1 x2 y; - reg u32 v32 result; reg u64 i; @@ -147,4 +144,4 @@ fn poly_checknorm(reg ptr u32[Li2_polydeg] f, inline int threshold) result_s = result; return result_s; -} \ No newline at end of file +} diff --git a/src/crypto_sign/dilithium/common/amd64/verify_end.jinc b/src/crypto_sign/dilithium/common/amd64/verify_end.jinc index 7bd0b87a..ebb3ff35 100644 --- a/src/crypto_sign/dilithium/common/amd64/verify_end.jinc +++ b/src/crypto_sign/dilithium/common/amd64/verify_end.jinc @@ -23,7 +23,7 @@ fn unpack_hints(reg ptr u8[Li2_omega + Li2_k] hints_buf, reg ptr u32[Li2_k * Li2 reg u64 i j; reg u8 done fail status; reg bool tmp; - reg u32 zero one; + reg u32 zero; reg u64 k hints_elem_offset idx idx1 idx2 idxtmp; reg u64 hints_cumpop; // cumulative popcount of hints @@ -119,10 +119,9 @@ fn verify_inner(stack ptr u8[Li2_SIGN_LEN] sig, reg u64 m, reg u64 m_len, stack reg u32 r_status; stack u32 status; stack u8 z_normcheck_fail c_tilde_result; - reg u64 i j; + reg u64 i; reg u8 byte; - reg u8 k l hints_popcount hints_popcount_fail done; - reg u64 hint_index; + reg u8 hints_popcount_fail; reg ptr u8[Li2_SIGN_LEN] sig_rsp; reg ptr u8[Li2_PK_LEN] pk_rsp; @@ -130,7 +129,7 @@ fn verify_inner(stack ptr u8[Li2_SIGN_LEN] sig, reg u64 m, reg u64 m_len, stack stack u64[25] keccak_state; reg ptr u64[25] keccak_state_rsp; stack u8[32] tr c_tilde c_tilde2; - reg ptr u8[32] tr_rsp c_tilde_rsp c_tilde2_rsp; + reg ptr u8[32] tr_rsp; stack u8[64] mu; stack u32[Li2_k * Li2_l * Li2_polydeg] fft_matA; @@ -144,8 +143,6 @@ fn verify_inner(stack ptr u8[Li2_SIGN_LEN] sig, reg u64 m, reg u64 m_len, stack reg ptr u8[Li2_pack_t1len] t1_buf; reg ptr u8[Li2_omega + Li2_k] hints_buf; - reg u32 coeff; - inline int ii; //status = 0xFF; @@ -230,10 +227,8 @@ inline fn verify(reg u64 ptr_sig, reg u64 ptr_m, reg u64 m_len, reg u64 ptr_pk) -> reg u32 { stack u8[Li2_PK_LEN] pk; reg ptr u8[Li2_PK_LEN] pk_rsp; - stack ptr u8[Li2_PK_LEN] pk_ssp; stack u8[Li2_SIGN_LEN] sig; reg ptr u8[Li2_SIGN_LEN] sig_rsp; - stack ptr u8[Li2_SIGN_LEN] sig_ssp; reg u8 byte; reg u64 i; diff --git a/src/crypto_sign/dilithium/dilithium2/amd64/avx2/common.jinc b/src/crypto_sign/dilithium/dilithium2/amd64/avx2/common.jinc index a3b6363c..d8f19529 100644 --- a/src/crypto_sign/dilithium/dilithium2/amd64/avx2/common.jinc +++ b/src/crypto_sign/dilithium/dilithium2/amd64/avx2/common.jinc @@ -28,9 +28,6 @@ fn use_hint(reg u32 a hint) inline fn expandMask_poly_4x(reg ptr u32[4 * Li2_polydeg] f_4x, reg ptr u8[64] rho_prime, stack u16 kappa) -> reg ptr u32[4 * Li2_polydeg] { - reg u64 i; - reg u32 v32; - f_4x = expandMask_poly_gamma1_217_4x(f_4x, rho_prime, kappa); return f_4x; } @@ -54,4 +51,4 @@ fn expandA(stack u8[32] rho) -> stack u32[Li2_k * Li2_l * Li2_polydeg] { stack u32[Li2_k * Li2_l * Li2_polydeg] matrix; matrix = expandA_aligned(rho); return matrix; -} \ No newline at end of file +} diff --git a/src/crypto_sign/dilithium/dilithium3/amd64/avx2/common.jinc b/src/crypto_sign/dilithium/dilithium3/amd64/avx2/common.jinc index e904742e..614e4a78 100644 --- a/src/crypto_sign/dilithium/dilithium3/amd64/avx2/common.jinc +++ b/src/crypto_sign/dilithium/dilithium3/amd64/avx2/common.jinc @@ -28,9 +28,6 @@ fn use_hint(reg u32 a hint) inline fn expandMask_poly_4x(reg ptr u32[4 * Li2_polydeg] f_4x, reg ptr u8[64] rho_prime, stack u16 kappa) -> reg ptr u32[4 * Li2_polydeg] { - reg u64 i; - reg u32 v32; - f_4x = expandMask_poly_gamma1_219_4x(f_4x, rho_prime, kappa); return f_4x; } diff --git a/src/crypto_sign/falcon/falcon512/amd64/avx2/test.jazz b/src/crypto_sign/falcon/falcon512/amd64/avx2/test.jazz index 7349e298..f1892f09 100644 --- a/src/crypto_sign/falcon/falcon512/amd64/avx2/test.jazz +++ b/src/crypto_sign/falcon/falcon512/amd64/avx2/test.jazz @@ -10,7 +10,6 @@ fn __decode_public_key_external(reg u64 h pk) -> reg u32 { reg u32 failed; stack u16[ARRAY_N] h_buff; reg u64 i; - reg u16 tmp16; h_buff, failed = __decode_public_key(h_buff, pk); From 65b2a51c18fe9c088895754951e73989380b98b1 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Fri, 19 Apr 2024 19:16:55 +0800 Subject: [PATCH 13/19] remove redundant spill, pass sct check --- .../common/amd64/ref/matrix_mul_opt.jinc | 29 +++++-------------- .../frodo/common/amd64/ref/shake128_opt.jinc | 8 ++--- .../frodo/common/amd64/ref/shake256_opt.jinc | 16 +++++----- .../frodo/frodo640shake/amd64/ref/kem.jazz | 15 ++++++---- .../frodo/frodo640shake/amd64/ref/kem.jinc | 24 +++++++-------- 5 files changed, 41 insertions(+), 51 deletions(-) diff --git a/src/crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc index deca2d84..9ecb91e7 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc @@ -1,7 +1,7 @@ // notes: "16" instead of BYTES_SEED_A on purpose // compilation should fail if BYTES_SEED_A changes -inline fn __pad_seedA(reg ptr u8[16] seedA) -> reg ptr u8[2 + 16 + 6] { +inline fn __pad_seedA(reg ptr u8[16] seedA) -> stack u8[2 + 16 + 6] { reg u64 i j; stack u8[2+16+ 6] s_index_seed_padding; reg u8 v; @@ -31,10 +31,9 @@ fn __AS_plus_E_opt( -> reg ptr u16[NNBAR] { - reg ptr u8[2+16+ 6] index_seed_padding; + stack u8[2+16+ 6] index_seed_padding; - stack u16[N * 8] s_A; - reg ptr u16[N * 8] A; + stack u16[N * 8] A; reg u64 A_offset B_offset S_offset; inline int p; @@ -50,14 +49,12 @@ fn __AS_plus_E_opt( i += 1; } - () = #spill(E, index_seed_padding); + () = #spill(E); - A = s_A; i = 0; B_offset = 0; while( i < N ) { () = #spill(B, S); - () = #unspill(index_seed_padding); A_offset = 0; j = 0; @@ -75,7 +72,6 @@ fn __AS_plus_E_opt( } () = #unspill(B, S); - () = #spill(index_seed_padding); j = 0; jN = 0; @@ -128,10 +124,9 @@ fn __SA_plus_E_opt( -> reg ptr u16[NNBAR] { - reg ptr u8[2+16+ 6] index_seed_padding; + stack u8[2+16+ 6] index_seed_padding; - stack u16[N * 8] s_A; - reg ptr u16[N * 8] A; + stack u16[N * 8] A; reg u64 A_offset B_offset S_offset; inline int p; @@ -141,23 +136,16 @@ fn __SA_plus_E_opt( index_seed_padding = __pad_seedA(seedA); - () = #spill(index_seed_padding); - - A = s_A; i = 0; while( i < N ) { - - () = #spill(B, S); - () = #unspill(index_seed_padding); - - // A_offset = 0; j = 0; + () = #spill(B, S); + while( j < 8 ) { ij = #LEA(i + j); - () = #spill(i, j); index_seed_padding[u16 0] = (16u) ij; @@ -168,7 +156,6 @@ fn __SA_plus_E_opt( } () = #unspill(B, S); - () = #spill(index_seed_padding); // j = 0; diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc index 320c29af..d00a2f52 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc @@ -233,11 +233,11 @@ fn __shake128_pkh_opt( j += 1; } - () = #spill(i, j); + () = #spill(i, j, in); state = __keccakf1600_ref1(state); - () = #unspill(i, j); + () = #unspill(i, j, in); i += SHAKE128_RATE/8; } @@ -438,11 +438,11 @@ fn __shake128_ss_opt( j += 1; } - () = #spill(i, j); + () = #spill(i, j, in); state = __keccakf1600_ref1(state); - () = #unspill(i, j); + () = #unspill(i, j, in); i += SHAKE128_RATE/8; } diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc index 177ee356..f89b82eb 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc @@ -89,11 +89,11 @@ fn __shake256_r_opt( i = 0; while (i < OUTRND * SHAKE256_RATE/8) { - () = #spill(i, j, out); + () = #spill(i, out); state = __keccakf1600_ref1(state); - () = #unspill(i, j, out); + () = #unspill(i, out); j = 0; while (j < SHAKE256_RATE/8) { @@ -107,11 +107,11 @@ fn __shake256_r_opt( i += SHAKE256_RATE/8; } - () = #spill(i, j, out); + () = #spill(i, out); state = __keccakf1600_ref1(state); - () = #unspill(i, j, out); + () = #unspill(i, out); i = 0; while (i < (OUTLEN % SHAKE256_RATE) / 8) { @@ -160,11 +160,11 @@ fn __shake256_pkh_opt( j += 1; } - () = #spill(i, j); + () = #spill(i, j, in); state = __keccakf1600_ref1(state); - () = #unspill(i, j); + () = #unspill(i, j, in); i += SHAKE256_RATE/8; } @@ -365,11 +365,11 @@ fn __shake256_ss_opt( j += 1; } - () = #spill(i, j); + () = #spill(i, j, in); state = __keccakf1600_ref1(state); - () = #unspill(i, j); + () = #unspill(i, j, in); i += SHAKE256_RATE/8; } diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jazz b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jazz index b6dd27f1..82fd4de5 100644 --- a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jazz +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jazz @@ -1,36 +1,41 @@ from Jade require "crypto_kem/frodo/common/frodo640_params.jinc" from Jade require "crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc" -export fn jade_kem_frodo_frodo640shake_amd64_ref_keypair_derand(#public reg u64 pkp skp coinsp) -> #public reg u64 { +export fn jade_kem_frodo_frodo640shake_amd64_ref_keypair_derand(reg u64 pkp skp coinsp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_keypair_derand(pkp, skp, coinsp); ?{}, r = #set0(); return r; } -export fn jade_kem_frodo_frodo640shake_amd64_ref_keypair(#public reg u64 pkp skp) -> #public reg u64 { +export fn jade_kem_frodo_frodo640shake_amd64_ref_keypair(reg u64 pkp skp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_keypair(pkp, skp); ?{}, r = #set0(); return r; } -export fn jade_kem_frodo_frodo640shake_amd64_ref_enc_derand(#public reg u64 ctp ssp pkp coinsp) -> #public reg u64 { +export fn jade_kem_frodo_frodo640shake_amd64_ref_enc_derand(reg u64 ctp ssp pkp coinsp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_enc_derand(ctp, ssp, pkp, coinsp); ?{}, r = #set0(); return r; } -export fn jade_kem_frodo_frodo640shake_amd64_ref_enc(#public reg u64 ctp ssp pkp) -> #public reg u64 { +export fn jade_kem_frodo_frodo640shake_amd64_ref_enc(reg u64 ctp ssp pkp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_enc(ctp, ssp, pkp); ?{}, r = #set0(); return r; } -export fn jade_kem_frodo_frodo640shake_amd64_ref_dec(#public reg u64 ssp ctp skp) -> #public reg u64 { +export fn jade_kem_frodo_frodo640shake_amd64_ref_dec(reg u64 ssp ctp skp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_dec(ssp, ctp, skp); ?{}, r = #set0(); return r; diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc index 922c5a50..6568a3a4 100644 --- a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc @@ -21,28 +21,27 @@ fn __frodo_amd64_ref_keypair_derand( // s || seedA || b || S_T || pkh stack u8[BYTES_SK] sk; - () = #spill(i, j, pkp, skp); + () = #spill(pkp, skp); for k = 0 to BYTES_SEC/8 { sk[u64 k] = coins[u64 k]; } // gen seedA + () = #spill(coins); // stack_coins = coins pk[0:BYTES_SEED_A] = __shake128_seed_A_opt(pk[0:BYTES_SEED_A], coins[BYTES_SEC + BYTES_SEED_SE:BYTES_SEED_A]); // gen S || E + () = #unspill(coins); // coins = stack_coins SE = __shake128_r_opt(SE, coins[BYTES_SEC:BYTES_SEED_SE]); SE = __sample_2NNBAR(SE); - () = #spill(coins); - // B = A*S+E B = __AS_plus_E_opt(B, pk[0:BYTES_SEED_A], SE[0:NNBAR], SE[NNBAR:NNBAR]); // pack pk[BYTES_SEED_A:D * N] = __pack_B(pk[BYTES_SEED_A:D * N], B); - () = #unspill(i); i = 0; while (i < BYTES_PK/8) { sk[u64 BYTES_SEC/8 + i] = pk[u64 i]; @@ -54,11 +53,11 @@ fn __frodo_amd64_ref_keypair_derand( sk[u64 BYTES_SEC/8 + BYTES_PK/8 + i] = SE[u64 i]; i += 1; } - () = #spill(i); sk[BYTES_SEC + BYTES_PK + 2 * NNBAR : BYTES_SEC] = __shake128_pkh_opt(sk[BYTES_SEC + BYTES_PK + 2 * NNBAR:BYTES_SEC], pk); - () = #unspill(i, j, pkp, skp); + () = #unspill(pkp, skp); + _ = #init_msf(); i = 0; j = 0; while (i < BYTES_PK/8) { [pkp + j] = pk[u64 i]; @@ -102,9 +101,6 @@ fn __frodo_amd64_ref_enc_derand( reg ptr u16[NBAR * NBAR] V; stack u8[BYTES_SEC] ss; - pkp = pkp; - () = #spill(ctp, ssp, i, j); - // gen u || salt for k = 0 to (BYTES_SEC + BYTES_SALT)/8 { pkh_u_salt[u64 BYTES_SEC/8 + k] = coins[u64 k]; @@ -114,7 +110,6 @@ fn __frodo_amd64_ref_enc_derand( ct_k[u64 (D * N + D * NBAR)/8 + k] = pkh_u_salt[u64 (BYTES_SEC * 2)/8 + k]; } - () = #unspill(i, j); // read pk i = 0; j = 0; while (i < BYTES_PK/8) { @@ -122,7 +117,8 @@ fn __frodo_amd64_ref_enc_derand( i += 1; j += 8; } - () = #spill(i, j); + + () = #spill(ctp, ssp, i, j); // pkh pkh_u_salt[0:BYTES_SEC] = __shake128_pkh_opt(pkh_u_salt[0:BYTES_SEC], pk); @@ -145,6 +141,7 @@ fn __frodo_amd64_ref_enc_derand( // B' = S'A + E'' Bp = SEE[NNBAR:NNBAR]; + Bp = __SA_plus_E_opt(Bp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); // c1 <- Pack(B') @@ -169,6 +166,7 @@ fn __frodo_amd64_ref_enc_derand( () = #unspill(i, j, ctp, ssp); i = 0; j = 0; + _ = #init_msf(); while (i < BYTES_CT/8) { [ctp + j] = ct_k[u64 i]; i += 1; @@ -296,6 +294,7 @@ fn _frodo_amd64_ref_dec(reg u64 ssp ctp skp) { ss = __shake128_ss_opt(ss, ct_k); + _ = #init_msf(); ssp = s_ssp; for k = 0 to BYTES_SEC/8 { [ssp + 8*k] = ss[u64 k]; @@ -315,7 +314,7 @@ fn _frodo_amd64_ref_keypair(reg u64 pkp skp) { fn _frodo_amd64_ref_keypair_derand(reg u64 pkp skp coinsp) { #public stack u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins; - reg u64 i; stack u64 s_i; + reg u64 i; pkp = pkp; skp = skp; @@ -326,7 +325,6 @@ fn _frodo_amd64_ref_keypair_derand(reg u64 pkp skp coinsp) { i += 1; } - s_i = i; __frodo_amd64_ref_keypair_derand(pkp, skp, coins); } From 11523744d7d782e58442156487e95164d2f39d68 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Mon, 29 Apr 2024 16:34:01 +0800 Subject: [PATCH 14/19] indcpa keypair --- .../frodo/frodo640shake/amd64/ref/indcpa.jinc | 38 +++++++++++ .../frodo/frodo640shake/amd64/ref/kem.jinc | 66 +++++++++---------- 2 files changed, 68 insertions(+), 36 deletions(-) create mode 100644 src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc new file mode 100644 index 00000000..60649cab --- /dev/null +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc @@ -0,0 +1,38 @@ + +inline +fn __indcpa_keypair_derand( + #spill_to_mmx reg ptr u8[BYTES_SEED_A + BYTES_SEED_SE] coins +) -> stack u8[BYTES_PK], stack u8[2*NNBAR] { + stack u8[BYTES_PK] pk; // seedA || b + stack u8[2*NNBAR] sk; // S_T + stack u16[2 * NNBAR] SE; + stack u16[NNBAR] B; + + reg u64 i; + + i = 0; + while (i < BYTES_SEED_A/8) { + pk[u64 i] = coins[u64 i]; + i += 1; + } + + () = #spill(coins); + // gen S || E + SE = __shake128_r_opt(SE, coins[BYTES_SEED_A:BYTES_SEED_SE]); + + SE = __sample_2NNBAR(SE); + + // B = A*S+E + B = __AS_plus_E_opt(B, pk[0:BYTES_SEED_A], SE[0:NNBAR], SE[NNBAR:NNBAR]); + + // pack + pk[BYTES_SEED_A:D * N] = __pack_B(pk[BYTES_SEED_A:D * N], B); + + i = 0; + while (i < 2 * NNBAR / 8) { + sk[u64 i] = SE[u64 i]; + i += 1; + } + + return pk, sk; +} diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc index 6568a3a4..63172e6a 100644 --- a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc @@ -4,72 +4,66 @@ from Jade require "crypto_kem/frodo/common/amd64/ref/noise.jinc" from Jade require "crypto_kem/frodo/common/amd64/ref/matrix.jinc" from Jade require "crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc" from Jade require "crypto_kem/frodo/common/amd64/ref/pack.jinc" +require "./indcpa.jinc" // coins = s || seed SE || z +inline fn __frodo_amd64_ref_keypair_derand( reg u64 pkp skp, - #spill_to_mmx reg ptr u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins) { - stack u16[2 * NNBAR] SE; - stack u16[NNBAR] B; - - inline int k; - reg u64 i j; + #spill_to_mmx reg ptr u8[BYTES_SEC + BYTES_SEED_SE + BYTES_SEED_A] coins) { + reg u64 i; // seedA || b stack u8[BYTES_PK] pk; + stack u8[BYTES_SEED_A + BYTES_SEED_SE] indcoins; + stack u8[BYTES_SEC] pkh; - // s || seedA || b || S_T || pkh - stack u8[BYTES_SK] sk; - - () = #spill(pkp, skp); + // S_T + stack u8[2*NNBAR] sk; - for k = 0 to BYTES_SEC/8 { - sk[u64 k] = coins[u64 k]; + i = 0; + while (i < BYTES_SEC/8) { + [skp + i*8] = coins[u64 i]; + i += 1; } - // gen seedA - () = #spill(coins); // stack_coins = coins - pk[0:BYTES_SEED_A] = __shake128_seed_A_opt(pk[0:BYTES_SEED_A], coins[BYTES_SEC + BYTES_SEED_SE:BYTES_SEED_A]); + // copy seedSE + i = 0; + while (i < BYTES_SEED_SE/8) { + indcoins[u64 BYTES_SEED_A/8 + i] = coins[u64 BYTES_SEC/8 + i]; + i += 1; + } - // gen S || E - () = #unspill(coins); // coins = stack_coins - SE = __shake128_r_opt(SE, coins[BYTES_SEC:BYTES_SEED_SE]); - SE = __sample_2NNBAR(SE); + () = #spill(pkp, skp, coins); + indcoins[0:BYTES_SEED_A] = __shake128_seed_A_opt(indcoins[0:BYTES_SEED_A], coins[BYTES_SEC + BYTES_SEED_SE:BYTES_SEED_A]); - // B = A*S+E - B = __AS_plus_E_opt(B, pk[0:BYTES_SEED_A], SE[0:NNBAR], SE[NNBAR:NNBAR]); + pk, sk = __indcpa_keypair_derand(indcoins); + pkh = __shake128_pkh_opt(pkh, pk); - // pack - pk[BYTES_SEED_A:D * N] = __pack_B(pk[BYTES_SEED_A:D * N], B); + () = #unspill(pkp, skp); i = 0; while (i < BYTES_PK/8) { - sk[u64 BYTES_SEC/8 + i] = pk[u64 i]; + [skp + BYTES_SEC + i*8] = pk[u64 i]; i += 1; } i = 0; while (i < 2 * NNBAR / 8) { - sk[u64 BYTES_SEC/8 + BYTES_PK/8 + i] = SE[u64 i]; + [skp + BYTES_SEC + BYTES_PK + i*8] = sk[u64 i]; i += 1; } - sk[BYTES_SEC + BYTES_PK + 2 * NNBAR : BYTES_SEC] = __shake128_pkh_opt(sk[BYTES_SEC + BYTES_PK + 2 * NNBAR:BYTES_SEC], pk); - - () = #unspill(pkp, skp); - _ = #init_msf(); - i = 0; j = 0; + i = 0; while (i < BYTES_PK/8) { - [pkp + j] = pk[u64 i]; + [pkp + i*8] = pk[u64 i]; i += 1; - j += 8; } - i = 0; j = 0; - while (i < BYTES_SK/8) { - [skp + j] = sk[u64 i]; + i = 0; + while (i < BYTES_SEC/8) { + [skp + BYTES_SK - BYTES_SEC + i*8] = pkh[u64 i]; i += 1; - j += 8; } } From 97cc255d3bdd62be9d0e8c7751f1886e4c83b7f7 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Mon, 29 Apr 2024 18:56:49 +0800 Subject: [PATCH 15/19] incpa enc --- .../frodo/common/amd64/ref/shake128_opt.jinc | 51 ++++++++ .../frodo/frodo640shake/amd64/ref/indcpa.jinc | 73 +++++++++++- .../frodo/frodo640shake/amd64/ref/kem.jinc | 110 +++++++----------- 3 files changed, 166 insertions(+), 68 deletions(-) diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc index d00a2f52..a7d481f7 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc @@ -268,6 +268,57 @@ fn __shake128_pkh_opt( return out; } +fn __shake128_SE_k_opt2( + #spill_to_mmx reg ptr u8[BYTES_SEED_SE + BYTES_SEC] out, + #spill_to_mmx reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) +-> reg ptr u8[BYTES_SEED_SE + BYTES_SEC] { + #spill_to_mmx reg u64 i; + + stack u64[25] s_state; + reg ptr u64[25] state; + reg u64 t0 zero; + inline int INLEN OUTLEN; + + INLEN = 2 * BYTES_SEC + BYTES_SALT; + OUTLEN = BYTES_SEED_SE + BYTES_SEC; + + state = s_state; + + i = 0; + while (i < INLEN/8) { + t0 = in[u64 i]; + state[i] = t0; + + i += 1; + } + ?{}, zero = #set0(); + + i = INLEN/8; + while (i < 25) { + state[i] = zero; + i += 1; + } + + state[u8 INLEN] = 0x1f; + state[u8 SHAKE128_RATE-1] = 0x80; + + () = #spill(out); + + state = __keccakf1600_ref1(state); + + () = #unspill(out); + + i = 0; + while (i < OUTLEN/8) { + t0 = state[u64 i]; + out[u64 i] = t0; + i += 1; + } + + return out; +} + + fn __shake128_SE_k_opt( #spill_to_mmx reg ptr u8[1 + BYTES_SEED_SE + BYTES_SEC] out, #spill_to_mmx reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc index 60649cab..7bc509bd 100644 --- a/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc @@ -8,11 +8,12 @@ fn __indcpa_keypair_derand( stack u16[2 * NNBAR] SE; stack u16[NNBAR] B; - reg u64 i; + reg u64 i t; i = 0; while (i < BYTES_SEED_A/8) { - pk[u64 i] = coins[u64 i]; + t = coins[u64 i]; + pk[u64 i] = t; i += 1; } @@ -30,9 +31,75 @@ fn __indcpa_keypair_derand( i = 0; while (i < 2 * NNBAR / 8) { - sk[u64 i] = SE[u64 i]; + t = SE[u64 i]; + sk[u64 i] = t; i += 1; } return pk, sk; } + +inline +fn __indcpa_enc_derand( + #spill_to_mmx reg ptr u8[BYTES_CT - BYTES_SALT] ct, + #spill_to_mmx reg ptr u8[BYTES_SEC] u, + #spill_to_mmx reg ptr u8[BYTES_PK] pk, + #spill_to_mmx reg ptr u8[BYTES_SEED_SE] coins +) -> reg ptr u8[BYTES_CT - BYTES_SALT] { + reg u64 i t; + + // 0x96 || seed_SE + stack u8[1 + BYTES_SEED_SE] seedSE; + seedSE[0] = 0x96; + + // S' || E' || E'' + stack u16[2 * NNBAR + NBAR * NBAR] SEE; + stack u16[NNBAR] B; + reg ptr u16[NNBAR] Bp; + stack u16[NBAR * NBAR] C; + reg ptr u16[NBAR * NBAR] V; + + // stack u8[BYTES_CT - BYTES_SALT] ct; + + i = 0; + while (i < BYTES_SEED_SE/8) { + t = coins[u64 i]; + seedSE.[u64 1 + 8*i] = t; + i += 1; + } + + // B <- Unpack(b) + B = __unpack_B(B, pk[BYTES_SEED_A:D * N]); + C = __encode(C, u); + + () = #spill(ct, u, pk, coins); + + // gen input bit string for sampling S and E + SEE = __shake128_encap_r_opt(SEE, seedSE); + + // S' || E' + SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); + // E'' + SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); + + // B' = S'A + E'' + Bp = SEE[NNBAR:NNBAR]; + + () = #unspill(pk); + Bp = __SA_plus_E_opt(Bp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); + + // V = S'B + E'' + V = SEE[NNBAR*2:NBAR*NBAR]; + V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); + + // C = V + Encode(u) + C = __matrix_add(C, V); + + // c1 <- Pack(B') + () = #unspill(ct); + ct[0:D * N] = __pack_B(ct[0:D * N], Bp); + // c2 <- Pack(C) + ct[D * N: D * NBAR] = __pack_C(ct[D * N: D * NBAR], C); + + return ct; +} diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc index 63172e6a..b7540e59 100644 --- a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc @@ -11,7 +11,7 @@ inline fn __frodo_amd64_ref_keypair_derand( reg u64 pkp skp, #spill_to_mmx reg ptr u8[BYTES_SEC + BYTES_SEED_SE + BYTES_SEED_A] coins) { - reg u64 i; + reg u64 i t; // seedA || b stack u8[BYTES_PK] pk; @@ -23,14 +23,16 @@ fn __frodo_amd64_ref_keypair_derand( i = 0; while (i < BYTES_SEC/8) { - [skp + i*8] = coins[u64 i]; + t = coins[u64 i]; + [skp + i*8] = t; i += 1; } // copy seedSE i = 0; while (i < BYTES_SEED_SE/8) { - indcoins[u64 BYTES_SEED_A/8 + i] = coins[u64 BYTES_SEC/8 + i]; + t = coins[u64 BYTES_SEC/8 + i]; + indcoins[u64 BYTES_SEED_A/8 + i] = t; i += 1; } @@ -44,34 +46,38 @@ fn __frodo_amd64_ref_keypair_derand( i = 0; while (i < BYTES_PK/8) { - [skp + BYTES_SEC + i*8] = pk[u64 i]; + t = pk[u64 i]; + [skp + BYTES_SEC + i*8] = t; i += 1; } i = 0; while (i < 2 * NNBAR / 8) { - [skp + BYTES_SEC + BYTES_PK + i*8] = sk[u64 i]; + t = sk[u64 i]; + [skp + BYTES_SEC + BYTES_PK + i*8] = t; i += 1; } i = 0; while (i < BYTES_PK/8) { - [pkp + i*8] = pk[u64 i]; + t = pk[u64 i]; + [pkp + i*8] = t; i += 1; } i = 0; while (i < BYTES_SEC/8) { - [skp + BYTES_SK - BYTES_SEC + i*8] = pkh[u64 i]; + t = pkh[u64 i]; + [skp + BYTES_SK - BYTES_SEC + i*8] = t; i += 1; } } -#[returnaddress="stack"] +inline fn __frodo_amd64_ref_enc_derand( reg u64 ctp ssp pkp, #spill_to_mmx reg ptr u8[BYTES_SEC + BYTES_SALT] coins) { - reg u64 i j; + reg u64 i t; inline int k; // seedA || b @@ -82,93 +88,67 @@ fn __frodo_amd64_ref_enc_derand( // pkh || u || salt stack u8[BYTES_SEC * 2 + BYTES_SALT] pkh_u_salt; - // 0x96 || seedSE || k - stack u8[1 + BYTES_SEED_SE + BYTES_SEC] seedSE_k; - seedSE_k[0] = 0x96; - - // S' || E' || E'' - stack u16[2 * NNBAR + NBAR * NBAR] SEE; + // seedSE || k + stack u8[BYTES_SEED_SE + BYTES_SEC] seedSE_k; - stack u16[NNBAR] B; - reg ptr u16[NNBAR] Bp; - stack u16[NBAR * NBAR] C; - reg ptr u16[NBAR * NBAR] V; stack u8[BYTES_SEC] ss; // gen u || salt - for k = 0 to (BYTES_SEC + BYTES_SALT)/8 { - pkh_u_salt[u64 BYTES_SEC/8 + k] = coins[u64 k]; + i = 0; + while (i < (BYTES_SEC + BYTES_SALT)/8) { + t = coins[u64 i]; + pkh_u_salt[u64 BYTES_SEC/8 + i] = t; + i += 1; } - for k = 0 to BYTES_SALT/8 { - ct_k[u64 (D * N + D * NBAR)/8 + k] = pkh_u_salt[u64 (BYTES_SEC * 2)/8 + k]; + i = 0; + while (i < BYTES_SALT/8) { + t = coins[u64 BYTES_SEC/8 + i]; + ct_k[u64 (BYTES_CT - BYTES_SALT)/8 + i] = t; + i += 1; } // read pk - i = 0; j = 0; + i = 0; while (i < BYTES_PK/8) { - #declassify pk[u64 i] = [pkp + j]; + #declassify pk[u64 i] = [pkp + i*8]; i += 1; - j += 8; } - () = #spill(ctp, ssp, i, j); + () = #spill(ctp, ssp, coins); // pkh pkh_u_salt[0:BYTES_SEC] = __shake128_pkh_opt(pkh_u_salt[0:BYTES_SEC], pk); // seedSE || k - seedSE_k = __shake128_SE_k_opt(seedSE_k, pkh_u_salt); + seedSE_k = __shake128_SE_k_opt2(seedSE_k, pkh_u_salt); + + () = #unspill(coins); + ct_k[0:BYTES_CT - BYTES_SALT] = __indcpa_enc_derand(ct_k[0:BYTES_CT - BYTES_SALT], coins[0:BYTES_SEC], pk, seedSE_k[0:BYTES_SEED_SE]); // copy k - for k = 0 to BYTES_SEC/8 { - ct_k[u64 BYTES_CT/8 + k] = seedSE_k.[u64 1 + BYTES_SEED_SE + 8*k]; + i = 0; + while (i < BYTES_SEC/8) { + t = seedSE_k[u64 BYTES_SEED_SE/8 + i]; + ct_k[u64 BYTES_CT/8 + i] = t; + i += 1; } - // gen input bit string for sampling S and E - SEE = __shake128_encap_r_opt(SEE, seedSE_k[0 : 1 + BYTES_SEED_SE]); - - // S' || E' - SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); - // E'' - SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); - - // B' = S'A + E'' - Bp = SEE[NNBAR:NNBAR]; - - Bp = __SA_plus_E_opt(Bp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); - - // c1 <- Pack(B') - ct_k[0:D * N] = __pack_B(ct_k[0:D * N], Bp); - - // B <- Unpack(b) - B = __unpack_B(B, pk[BYTES_SEED_A:D * N]); - - // V = S'B + E'' - V = SEE[NNBAR*2:NBAR*NBAR]; - V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); - - // C = V + Encode(u) - C = __encode(C, pkh_u_salt[BYTES_SEC:BYTES_SEC]); - C = __matrix_add(C, V); - - // c2 <- Pack(C) - ct_k[D * N: D * NBAR] = __pack_C(ct_k[D * N: D * NBAR], C); - // ss <- shake(c1 || c2 || salt || k) ss = __shake128_ss_opt(ss, ct_k); - () = #unspill(i, j, ctp, ssp); - i = 0; j = 0; + () = #unspill(ctp, ssp); + i = 0; _ = #init_msf(); while (i < BYTES_CT/8) { - [ctp + j] = ct_k[u64 i]; + t = ct_k[u64 i]; + [ctp + i*8] = t; i += 1; - j += 8; } for k = 0 to BYTES_SEC/8 { - [ssp + 8*k] = ss[u64 k]; + t = ss[u64 k]; + [ssp + 8*k] = t; } } From 8bb5828c447e8785833bc6b90b03556b2ef3d6c7 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:50:25 +0800 Subject: [PATCH 16/19] indcpa dec --- .../frodo/common/amd64/ref/matrix.jinc | 24 ++++ .../frodo/frodo640shake/amd64/ref/indcpa.jinc | 22 ++++ .../frodo/frodo640shake/amd64/ref/kem.jinc | 115 ++++++------------ 3 files changed, 84 insertions(+), 77 deletions(-) diff --git a/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc b/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc index 535c338a..9ae4155b 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc @@ -82,6 +82,30 @@ fn __ct_verify_NBAR2(reg ptr u16[NBAR * NBAR] a b) -> stack u8 { return r; } +fn __ct_verify(reg ptr u8[BYTES_CT - BYTES_SALT] a b) -> stack u8 { + reg u64 i; + reg u16 ac tmp; + reg u8 r; + + i = 0; + ac = 0; + while (i < (BYTES_CT - BYTES_SALT)/2) { + tmp = a[u16 i]; + tmp ^= b[u16 i]; + ac |= tmp; + i += 1; + } + + tmp = ac * -1; + ac |= tmp; + ac >>= 15; + ac *= (-1); + + r = (8u) ac; + + return r; +} + #[returnaddress="stack"] fn __ct_select(reg ptr u8[BYTES_SEC] out a b, reg u8 selector) -> stack u8[BYTES_SEC] { reg u64 i; diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc index 7bc509bd..9971e509 100644 --- a/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc @@ -103,3 +103,25 @@ fn __indcpa_enc_derand( return ct; } + +inline +fn __indcpa_dec( + #spill_to_mmx reg ptr u8[BYTES_SEC] pt, + #spill_to_mmx reg ptr u8[BYTES_CT - BYTES_SALT] ct, + #spill_to_mmx reg ptr u8[2*NNBAR] sk +) -> reg ptr u8[BYTES_SEC] { + stack u16[NNBAR] Bp; + stack u16[NBAR * NBAR] M C; + + // B' <- Unpack(c1) + Bp = __unpack_B(Bp, ct[0:D * N]); + // C <- Unpack(c2) + C = __unpack_C(C, ct[D * N:D * NBAR]); + + // M = C - B'S + M = __mul_BS_opt(M, Bp, sk); + M = __matrix_sub(M, C); + pt = __decode(pt, M); + + return pt; +} diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc index b7540e59..8c5f5562 100644 --- a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc @@ -152,126 +152,87 @@ fn __frodo_amd64_ref_enc_derand( } } -#[returnaddress="stack"] fn _frodo_amd64_ref_dec(reg u64 ssp ctp skp) { #public stack u8[BYTES_PK] pk; stack u8[2 * NNBAR] ST; stack u8[BYTES_SEC] s; stack u8[BYTES_CT + BYTES_SEC] ct_k; - stack u16[NNBAR] B Bp; - reg ptr u16[NNBAR] Bpp; - stack u16[NBAR * NBAR] M C Cp; - reg ptr u16[NBAR * NBAR] V; + stack u8[BYTES_CT - BYTES_SALT] ct2; stack u8[BYTES_SEC * 2 + BYTES_SALT] pkh_u_salt; - stack u8[1 + BYTES_SEED_SE + BYTES_SEC] seedSE_k; + stack u8[BYTES_SEED_SE + BYTES_SEC] seedSE_k; stack u8[BYTES_SEC] ss; - // S' || E' || E'' - stack u16[2 * NNBAR + NBAR * NBAR] SEE; - - reg u8 s1 s2; - reg u64 i j t; - stack u64 s_ssp s_skp; - inline int k; + reg u8 s1; + reg u64 i t; ctp = ctp; skp = skp; - s_ssp = ssp; + ssp = ssp; // copy pkh - for k = 0 to BYTES_SEC/8 { - pkh_u_salt[u64 k] = [skp + BYTES_SK - BYTES_SEC + 8*k]; + i = 0; + while (i < BYTES_SEC/8) { + t = [skp + BYTES_SK - BYTES_SEC + i*8]; + pkh_u_salt[u64 i] = t; + i += 1; } - s_skp = skp; // read ct - i = 0; j = 0; + i = 0; while (i < BYTES_CT/8) { - t = [ctp + j]; + t = [ctp + i*8]; ct_k[u64 i] = t; i += 1; - j += 8; } - for k = 0 to BYTES_SEC/8 { - s[u64 k] = [skp + 8*k]; + i = 0; + while (i < BYTES_SEC/8) { + t = [skp + i*8]; + s[u64 i] = t; + i += 1; } - i = 0; j = 0; + i = 0; while (i < BYTES_PK/8) { - #declassify pk[u64 i] = [skp + BYTES_SEC + j]; + t = [skp + BYTES_SEC + i*8]; + #declassify pk[u64 i] = t; i += 1; - j += 8; } - i = 0; j = 0; + i = 0; while (i < 2 * NNBAR/8) { - ST[u64 i] = [skp + BYTES_SEC + BYTES_PK + j]; + t = [skp + BYTES_SEC + BYTES_PK + i*8]; + ST[u64 i] = t; i += 1; - j += 8; } - () = #spill(i); - // copy salt - for k = 0 to BYTES_SALT/8 { - pkh_u_salt[u64 (BYTES_SEC * 2)/8 + k] = ct_k[u64 (BYTES_CT - BYTES_SALT)/8 + k]; - } - - // B' <- Unpack(c1) - Bp = __unpack_B(Bp, ct_k[0:D * N]); - // C <- Unpack(c2) - C = __unpack_C(C, ct_k[D * N:D * NBAR]); - - // M = C - B'S - M = __mul_BS_opt(M, Bp, ST); - M = __matrix_sub(M, C); - - pkh_u_salt[BYTES_SEC:BYTES_SEC] = __decode(pkh_u_salt[BYTES_SEC:BYTES_SEC], M); - - seedSE_k[0] = 0x96; - seedSE_k = __shake128_SE_k_opt(seedSE_k, pkh_u_salt); - - SEE = __shake128_encap_r_opt(SEE, seedSE_k[0: 1 + BYTES_SEED_SE]); - - // S' || E' - SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); - // E'' - SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); - - // B'' = S'A + E' - Bpp = SEE[NNBAR:NNBAR]; - Bpp = __SA_plus_E_opt(Bpp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); - - // B'' (mod q) - () = #unspill(i); i = 0; - while (i < NNBAR) { - Bpp[i] &= (1 << D) - 1; + while (i < BYTES_SALT/8) { + t = ct_k[u64 (BYTES_CT - BYTES_SALT)/8 + i]; + pkh_u_salt[u64 (BYTES_SEC * 2)/8 + i] = t; i += 1; } - // - B = __unpack_B(B, pk[BYTES_SEED_A:BYTES_PK - BYTES_SEED_A]); + pkh_u_salt[BYTES_SEC:BYTES_SEC] = __indcpa_dec(pkh_u_salt[BYTES_SEC:BYTES_SEC], ct_k[0:BYTES_CT - BYTES_SALT], ST); - V = SEE[NNBAR*2:NBAR*NBAR]; - V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); - - Cp = __encode(Cp, pkh_u_salt[BYTES_SEC:BYTES_SEC]); - Cp = __matrix_add(Cp, V); + () = #spill(ssp); + seedSE_k = __shake128_SE_k_opt2(seedSE_k, pkh_u_salt); + ct2 = __indcpa_enc_derand(ct2, pkh_u_salt[BYTES_SEC:BYTES_SEC], pk, seedSE_k[0:BYTES_SEED_SE]); - s1 = __ct_verify_NNBAR(Bp, Bpp); - s2 = __ct_verify_NBAR2(C, Cp); - s1 |= s2; + s1 = __ct_verify(ct_k[0:BYTES_CT - BYTES_SALT], ct2); - ct_k[BYTES_CT:BYTES_SEC] = __ct_select(ct_k[BYTES_CT:BYTES_SEC], seedSE_k[1+BYTES_SEED_SE:BYTES_SEC], s, s1); + ct_k[BYTES_CT:BYTES_SEC] = __ct_select(ct_k[BYTES_CT:BYTES_SEC], seedSE_k[BYTES_SEED_SE:BYTES_SEC], s, s1); ss = __shake128_ss_opt(ss, ct_k); _ = #init_msf(); - ssp = s_ssp; - for k = 0 to BYTES_SEC/8 { - [ssp + 8*k] = ss[u64 k]; + () = #unspill(ssp); + i = 0; + while (i < BYTES_SEC/8) { + t = ss[u64 i]; + [ssp + i*8] = t; + i += 1; } } From c688898b8e190b3ac874a0589971eba6a5c371a6 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Mon, 24 Jun 2024 16:32:27 +0800 Subject: [PATCH 17/19] remove returnaddress="stack" --- src/crypto_kem/frodo/common/amd64/ref/matrix.jinc | 4 ---- src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc | 4 ---- src/crypto_kem/frodo/common/amd64/ref/shake128.jinc | 7 ------- src/crypto_kem/frodo/common/amd64/ref/shake256.jinc | 6 ------ 4 files changed, 21 deletions(-) diff --git a/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc b/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc index 9ae4155b..323e89cb 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/matrix.jinc @@ -14,7 +14,6 @@ fn __matrix_add(reg ptr u16[NBAR * NBAR] a b) -> stack u16[NBAR * NBAR] { return a; } -#[returnaddress="stack"] // a = b - a fn __matrix_sub(reg ptr u16[NBAR * NBAR] a b) -> stack u16[NBAR * NBAR] { reg u64 i; @@ -32,7 +31,6 @@ fn __matrix_sub(reg ptr u16[NBAR * NBAR] a b) -> stack u16[NBAR * NBAR] { return a; } -#[returnaddress="stack"] fn __ct_verify_NNBAR(reg ptr u16[NNBAR] a b) -> stack u8 { reg u64 i; reg u16 ac tmp; @@ -57,7 +55,6 @@ fn __ct_verify_NNBAR(reg ptr u16[NNBAR] a b) -> stack u8 { return r; } -#[returnaddress="stack"] fn __ct_verify_NBAR2(reg ptr u16[NBAR * NBAR] a b) -> stack u8 { reg u64 i; reg u16 ac tmp; @@ -106,7 +103,6 @@ fn __ct_verify(reg ptr u8[BYTES_CT - BYTES_SALT] a b) -> stack u8 { return r; } -#[returnaddress="stack"] fn __ct_select(reg ptr u8[BYTES_SEC] out a b, reg u8 selector) -> stack u8[BYTES_SEC] { reg u64 i; diff --git a/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc index 0008277a..4deee180 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc @@ -1,6 +1,5 @@ from Jade require "crypto_kem/frodo/common/amd64/ref/shake128.jinc" -#[returnaddress="stack"] fn __AS_plus_E(reg ptr u16[NNBAR] B, reg ptr u8[BYTES_SEED_A]seedA, reg ptr u16[NNBAR] S E) -> stack u16[NNBAR] { stack ptr u16[NNBAR] s_B; stack u16[N] A_row; @@ -64,7 +63,6 @@ fn __AS_plus_E(reg ptr u16[NNBAR] B, reg ptr u8[BYTES_SEED_A]seedA, reg ptr u16[ return B; } -#[returnaddress="stack"] fn __SA_plus_E(reg ptr u16[NNBAR] B, reg ptr u8[BYTES_SEED_A]seedA, reg ptr u16[NNBAR] S E) -> stack u16[NNBAR] { stack ptr u16[NNBAR] s_B; stack u16[N] A_row; @@ -123,7 +121,6 @@ fn __SA_plus_E(reg ptr u16[NNBAR] B, reg ptr u8[BYTES_SEED_A]seedA, reg ptr u16[ return B; } -#[returnaddress="stack"] fn __SB_plus_E(reg ptr u16[NBAR * NBAR] V, reg ptr u16[NNBAR] S B, reg ptr u16[NBAR * NBAR] E) -> stack u16[NBAR * NBAR] { reg u64 k tj; reg u16 tmp ac; @@ -160,7 +157,6 @@ fn __SB_plus_E(reg ptr u16[NBAR * NBAR] V, reg ptr u16[NNBAR] S B, reg ptr u16[N return V; } -#[returnaddress="stack"] fn __mul_BS(reg ptr u16[NBAR * NBAR] M, reg ptr u16[NNBAR]B S) -> stack u16[NBAR * NBAR] { reg u64 k tj; reg u16 tmp; diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake128.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake128.jinc index d287adb3..98cbb016 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/shake128.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/shake128.jinc @@ -2,7 +2,6 @@ from Jade require "common/keccak/keccak1600/amd64/ref1/keccak1600.jinc" param int SHAKE128_RATE = 168; -#[returnaddress="stack"] fn __shake128_gen_A(reg ptr u8[2 * N] out, reg const ptr u8[2 + BYTES_SEED_A] in) -> stack u8[2 * N] { stack ptr u8[2 * N] s_out; @@ -59,7 +58,6 @@ fn __shake128_gen_A(reg ptr u8[2 * N] out, reg const ptr u8[2 + BYTES_SEED_A] in return out; } -#[returnaddress="stack"] fn __shake128_seed_A(reg ptr u8[BYTES_SEED_A] out, reg const ptr u8[BYTES_SEED_A] in) -> stack u8[BYTES_SEED_A] { stack ptr u8[BYTES_SEED_A] s_out; @@ -93,7 +91,6 @@ fn __shake128_seed_A(reg ptr u8[BYTES_SEED_A] out, reg const ptr u8[BYTES_SEED_A return out; } -#[returnaddress="stack"] fn __shake128_r(reg ptr u8[4 * NNBAR] out, reg const ptr u8[1 + BYTES_SEED_SE] in) -> stack u8[4 * NNBAR] { stack ptr u8[4 * NNBAR] s_out; @@ -156,7 +153,6 @@ fn __shake128_r(reg ptr u8[4 * NNBAR] out, reg const ptr u8[1 + BYTES_SEED_SE] i return out; } -#[returnaddress="stack"] fn __shake128_pkh(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_PK] in) -> stack u8[BYTES_SEC] { stack ptr u8[BYTES_SEC] s_out; @@ -220,7 +216,6 @@ fn __shake128_pkh(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_PK] in) -> s return out; } -#[returnaddress="stack"] fn __shake128_SE_k(reg ptr u8[BYTES_SEED_SE + BYTES_SEC] out, reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) -> stack u8[BYTES_SEED_SE + BYTES_SEC] { stack ptr u8[BYTES_SEED_SE + BYTES_SEC] s_out; stack ptr u8[2 * BYTES_SEC + BYTES_SALT] s_in; @@ -262,7 +257,6 @@ fn __shake128_SE_k(reg ptr u8[BYTES_SEED_SE + BYTES_SEC] out, reg const ptr u8[2 } -#[returnaddress="stack"] fn __shake128_encap_r(reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] out, reg const ptr u8[1 + BYTES_SEED_SE] in) -> stack u8[2 * (2 * NNBAR + NBAR * NBAR)] { stack ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] s_out; stack ptr u8[1 + BYTES_SEED_SE] s_in; @@ -323,7 +317,6 @@ fn __shake128_encap_r(reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] out, reg const p return out; } -#[returnaddress="stack"] fn __shake128_ss(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_CT + BYTES_SEC] in) -> stack u8[BYTES_SEC] { stack ptr u8[BYTES_SEC] s_out; diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake256.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake256.jinc index 1ed5acf0..ab9dadc4 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/shake256.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/shake256.jinc @@ -2,7 +2,6 @@ from Jade require "common/keccak/keccak1600/amd64/ref1/keccak1600.jinc" param int SHAKE256_RATE = 136; -#[returnaddress="stack"] fn __shake256_seed_A(reg ptr u8[BYTES_SEED_A] out, reg const ptr u8[BYTES_SEED_A] in) -> stack u8[BYTES_SEED_A] { stack ptr u8[BYTES_SEED_A] s_out; @@ -44,7 +43,6 @@ fn __shake256_seed_A(reg ptr u8[BYTES_SEED_A] out, reg const ptr u8[BYTES_SEED_A return out; } -#[returnaddress="stack"] fn __shake256_r(reg ptr u8[4 * NNBAR] out, reg const ptr u8[1 + BYTES_SEED_SE] in) -> stack u8[4 * NNBAR] { stack ptr u8[4 * NNBAR] s_out; @@ -106,7 +104,6 @@ fn __shake256_r(reg ptr u8[4 * NNBAR] out, reg const ptr u8[1 + BYTES_SEED_SE] i return out; } -#[returnaddress="stack"] fn __shake256_pkh(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_PK] in) -> stack u8[BYTES_SEC] { stack ptr u8[BYTES_SEC] s_out; @@ -169,7 +166,6 @@ fn __shake256_pkh(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_PK] in) -> s return out; } -#[returnaddress="stack"] fn __shake256_SE_k(reg ptr u8[BYTES_SEED_SE + BYTES_SEC] out, reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) -> stack u8[BYTES_SEED_SE + BYTES_SEC] { stack ptr u8[BYTES_SEED_SE + BYTES_SEC] s_out; stack ptr u8[2 * BYTES_SEC + BYTES_SALT] s_in; @@ -214,7 +210,6 @@ fn __shake256_SE_k(reg ptr u8[BYTES_SEED_SE + BYTES_SEC] out, reg const ptr u8[2 } -#[returnaddress="stack"] fn __shake256_encap_r(reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] out, reg const ptr u8[1 + BYTES_SEED_SE] in) -> stack u8[2 * (2 * NNBAR + NBAR * NBAR)] { stack ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] s_out; stack ptr u8[1 + BYTES_SEED_SE] s_in; @@ -274,7 +269,6 @@ fn __shake256_encap_r(reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] out, reg const p return out; } -#[returnaddress="stack"] fn __shake256_ss(reg ptr u8[BYTES_SEC] out, reg const ptr u8[BYTES_CT + BYTES_SEC] in) -> stack u8[BYTES_SEC] { stack ptr u8[BYTES_SEC] s_out; From a51c1c6b23ea0d0e33ff3319c733da9bc2720561 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Mon, 24 Jun 2024 17:11:48 +0800 Subject: [PATCH 18/19] remove unnecessary require --- src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc | 2 -- src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc | 1 - 2 files changed, 3 deletions(-) diff --git a/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc index 4deee180..55a0377e 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/matrix_mul.jinc @@ -1,5 +1,3 @@ -from Jade require "crypto_kem/frodo/common/amd64/ref/shake128.jinc" - fn __AS_plus_E(reg ptr u16[NNBAR] B, reg ptr u8[BYTES_SEED_A]seedA, reg ptr u16[NNBAR] S E) -> stack u16[NNBAR] { stack ptr u16[NNBAR] s_B; stack u16[N] A_row; diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc index 9971e509..2c181c31 100644 --- a/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/indcpa.jinc @@ -1,4 +1,3 @@ - inline fn __indcpa_keypair_derand( #spill_to_mmx reg ptr u8[BYTES_SEED_A + BYTES_SEED_SE] coins From 5ff8d9092a3ce1b50cd6f3fcd61a1ae8a03ea3b3 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Mon, 24 Jun 2024 17:50:29 +0800 Subject: [PATCH 19/19] update frodo976shake accordingly --- .../frodo/common/amd64/ref/shake128_opt.jinc | 54 +--- .../frodo/common/amd64/ref/shake256_opt.jinc | 17 +- .../frodo/frodo640shake/amd64/ref/kem.jinc | 4 +- .../frodo/frodo976shake/amd64/ref/indcpa.jinc | 126 ++++++++ .../frodo/frodo976shake/amd64/ref/kem.jazz | 15 +- .../frodo/frodo976shake/amd64/ref/kem.jinc | 285 +++++++----------- 6 files changed, 256 insertions(+), 245 deletions(-) create mode 100644 src/crypto_kem/frodo/frodo976shake/amd64/ref/indcpa.jinc diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc index a7d481f7..657042f3 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/shake128_opt.jinc @@ -268,7 +268,7 @@ fn __shake128_pkh_opt( return out; } -fn __shake128_SE_k_opt2( +fn __shake128_SE_k_opt( #spill_to_mmx reg ptr u8[BYTES_SEED_SE + BYTES_SEC] out, #spill_to_mmx reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) -> reg ptr u8[BYTES_SEED_SE + BYTES_SEC] { @@ -319,58 +319,6 @@ fn __shake128_SE_k_opt2( } -fn __shake128_SE_k_opt( - #spill_to_mmx reg ptr u8[1 + BYTES_SEED_SE + BYTES_SEC] out, - #spill_to_mmx reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) --> reg ptr u8[1 + BYTES_SEED_SE + BYTES_SEC] { - #spill_to_mmx reg u64 i; - - stack u64[25] s_state; - reg ptr u64[25] state; - reg u64 offset t0 zero; - inline int INLEN OUTLEN; - - INLEN = 2 * BYTES_SEC + BYTES_SALT; - OUTLEN = BYTES_SEED_SE + BYTES_SEC; - - state = s_state; - - i = 0; - while (i < INLEN/8) { - t0 = in[u64 i]; - state[i] = t0; - - i += 1; - } - ?{}, zero = #set0(); - - i = INLEN/8; - while (i < 25) { - state[i] = zero; - i += 1; - } - - state[u8 INLEN] = 0x1f; - state[u8 SHAKE128_RATE-1] = 0x80; - - () = #spill(out); - - state = __keccakf1600_ref1(state); - - () = #unspill(out); - - i = 0; - while (i < OUTLEN/8) { - t0 = state[u64 i]; - offset = #LEA(1+8*i); - out.[u64 offset] = t0; - - i += 1; - } - - return out; -} - fn __shake128_encap_r_opt( #spill_to_mmx reg ptr u8[2 * (2 * NNBAR + NBAR * NBAR)] out, #spill_to_mmx reg const ptr u8[1 + BYTES_SEED_SE] in) diff --git a/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc b/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc index f89b82eb..a3514809 100644 --- a/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc +++ b/src/crypto_kem/frodo/common/amd64/ref/shake256_opt.jinc @@ -89,11 +89,11 @@ fn __shake256_r_opt( i = 0; while (i < OUTRND * SHAKE256_RATE/8) { - () = #spill(i, out); + () = #spill(i, j, out); state = __keccakf1600_ref1(state); - () = #unspill(i, out); + () = #unspill(i, j, out); j = 0; while (j < SHAKE256_RATE/8) { @@ -107,11 +107,11 @@ fn __shake256_r_opt( i += SHAKE256_RATE/8; } - () = #spill(i, out); + () = #spill(i, j, out); state = __keccakf1600_ref1(state); - () = #unspill(i, out); + () = #unspill(i, j, out); i = 0; while (i < (OUTLEN % SHAKE256_RATE) / 8) { @@ -196,14 +196,14 @@ fn __shake256_pkh_opt( } fn __shake256_SE_k_opt( - #spill_to_mmx reg ptr u8[1 + BYTES_SEED_SE + BYTES_SEC] out, + #spill_to_mmx reg ptr u8[BYTES_SEED_SE + BYTES_SEC] out, #spill_to_mmx reg const ptr u8[2 * BYTES_SEC + BYTES_SALT] in) --> reg ptr u8[1 + BYTES_SEED_SE + BYTES_SEC] { +-> reg ptr u8[BYTES_SEED_SE + BYTES_SEC] { #spill_to_mmx reg u64 i; stack u64[25] s_state; reg ptr u64[25] state; - reg u64 offset t0 zero; + reg u64 t0 zero; inline int INLEN OUTLEN; INLEN = 2 * BYTES_SEC + BYTES_SALT; @@ -238,8 +238,7 @@ fn __shake256_SE_k_opt( i = 0; while (i < OUTLEN/8) { t0 = state[u64 i]; - offset = #LEA(1+8*i); - out.[u64 offset] = t0; + out[u64 i] = t0; i += 1; } diff --git a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc index 8c5f5562..77dd9e06 100644 --- a/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc +++ b/src/crypto_kem/frodo/frodo640shake/amd64/ref/kem.jinc @@ -121,7 +121,7 @@ fn __frodo_amd64_ref_enc_derand( pkh_u_salt[0:BYTES_SEC] = __shake128_pkh_opt(pkh_u_salt[0:BYTES_SEC], pk); // seedSE || k - seedSE_k = __shake128_SE_k_opt2(seedSE_k, pkh_u_salt); + seedSE_k = __shake128_SE_k_opt(seedSE_k, pkh_u_salt); () = #unspill(coins); ct_k[0:BYTES_CT - BYTES_SALT] = __indcpa_enc_derand(ct_k[0:BYTES_CT - BYTES_SALT], coins[0:BYTES_SEC], pk, seedSE_k[0:BYTES_SEED_SE]); @@ -217,7 +217,7 @@ fn _frodo_amd64_ref_dec(reg u64 ssp ctp skp) { pkh_u_salt[BYTES_SEC:BYTES_SEC] = __indcpa_dec(pkh_u_salt[BYTES_SEC:BYTES_SEC], ct_k[0:BYTES_CT - BYTES_SALT], ST); () = #spill(ssp); - seedSE_k = __shake128_SE_k_opt2(seedSE_k, pkh_u_salt); + seedSE_k = __shake128_SE_k_opt(seedSE_k, pkh_u_salt); ct2 = __indcpa_enc_derand(ct2, pkh_u_salt[BYTES_SEC:BYTES_SEC], pk, seedSE_k[0:BYTES_SEED_SE]); s1 = __ct_verify(ct_k[0:BYTES_CT - BYTES_SALT], ct2); diff --git a/src/crypto_kem/frodo/frodo976shake/amd64/ref/indcpa.jinc b/src/crypto_kem/frodo/frodo976shake/amd64/ref/indcpa.jinc new file mode 100644 index 00000000..534481e9 --- /dev/null +++ b/src/crypto_kem/frodo/frodo976shake/amd64/ref/indcpa.jinc @@ -0,0 +1,126 @@ +inline +fn __indcpa_keypair_derand( + #spill_to_mmx reg ptr u8[BYTES_SEED_A + BYTES_SEED_SE] coins +) -> stack u8[BYTES_PK], stack u8[2*NNBAR] { + stack u8[BYTES_PK] pk; // seedA || b + stack u8[2*NNBAR] sk; // S_T + stack u16[2 * NNBAR] SE; + stack u16[NNBAR] B; + + reg u64 i t; + + i = 0; + while (i < BYTES_SEED_A/8) { + t = coins[u64 i]; + pk[u64 i] = t; + i += 1; + } + + () = #spill(coins); + // gen S || E + SE = __shake256_r_opt(SE, coins[BYTES_SEED_A:BYTES_SEED_SE]); + + SE = __sample_2NNBAR(SE); + + // B = A*S+E + B = __AS_plus_E_opt(B, pk[0:BYTES_SEED_A], SE[0:NNBAR], SE[NNBAR:NNBAR]); + + // pack + pk[BYTES_SEED_A:D * N] = __pack_B(pk[BYTES_SEED_A:D * N], B); + + i = 0; + while (i < 2 * NNBAR / 8) { + t = SE[u64 i]; + sk[u64 i] = t; + i += 1; + } + + return pk, sk; +} + +inline +fn __indcpa_enc_derand( + #spill_to_mmx reg ptr u8[BYTES_CT - BYTES_SALT] ct, + #spill_to_mmx reg ptr u8[BYTES_SEC] u, + #spill_to_mmx reg ptr u8[BYTES_PK] pk, + #spill_to_mmx reg ptr u8[BYTES_SEED_SE] coins +) -> reg ptr u8[BYTES_CT - BYTES_SALT] { + reg u64 i t; + + // 0x96 || seed_SE + stack u8[1 + BYTES_SEED_SE] seedSE; + seedSE[0] = 0x96; + + // S' || E' || E'' + stack u16[2 * NNBAR + NBAR * NBAR] SEE; + stack u16[NNBAR] B; + reg ptr u16[NNBAR] Bp; + stack u16[NBAR * NBAR] C; + reg ptr u16[NBAR * NBAR] V; + + // stack u8[BYTES_CT - BYTES_SALT] ct; + + i = 0; + while (i < BYTES_SEED_SE/8) { + t = coins[u64 i]; + seedSE.[u64 1 + 8*i] = t; + i += 1; + } + + // B <- Unpack(b) + B = __unpack_B(B, pk[BYTES_SEED_A:D * N]); + C = __encode(C, u); + + () = #spill(ct, u, pk, coins); + + // gen input bit string for sampling S and E + SEE = __shake256_encap_r_opt(SEE, seedSE); + + // S' || E' + SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); + // E'' + SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); + + // B' = S'A + E'' + Bp = SEE[NNBAR:NNBAR]; + + () = #unspill(pk); + Bp = __SA_plus_E_opt(Bp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); + + // V = S'B + E'' + V = SEE[NNBAR*2:NBAR*NBAR]; + V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); + + // C = V + Encode(u) + C = __matrix_add(C, V); + + // c1 <- Pack(B') + () = #unspill(ct); + ct[0:D * N] = __pack_B(ct[0:D * N], Bp); + // c2 <- Pack(C) + ct[D * N: D * NBAR] = __pack_C(ct[D * N: D * NBAR], C); + + return ct; +} + +inline +fn __indcpa_dec( + #spill_to_mmx reg ptr u8[BYTES_SEC] pt, + #spill_to_mmx reg ptr u8[BYTES_CT - BYTES_SALT] ct, + #spill_to_mmx reg ptr u8[2*NNBAR] sk +) -> reg ptr u8[BYTES_SEC] { + stack u16[NNBAR] Bp; + stack u16[NBAR * NBAR] M C; + + // B' <- Unpack(c1) + Bp = __unpack_B(Bp, ct[0:D * N]); + // C <- Unpack(c2) + C = __unpack_C(C, ct[D * N:D * NBAR]); + + // M = C - B'S + M = __mul_BS_opt(M, Bp, sk); + M = __matrix_sub(M, C); + pt = __decode(pt, M); + + return pt; +} diff --git a/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jazz b/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jazz index c64c8260..3edd6dd1 100644 --- a/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jazz +++ b/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jazz @@ -1,36 +1,41 @@ from Jade require "crypto_kem/frodo/common/frodo976_params.jinc" from Jade require "crypto_kem/frodo/frodo976shake/amd64/ref/kem.jinc" -export fn jade_kem_frodo_frodo976shake_amd64_ref_keypair_derand(#public reg u64 pkp skp coinsp) -> #public reg u64 { +export fn jade_kem_frodo_frodo976shake_amd64_ref_keypair_derand(reg u64 pkp skp coinsp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_keypair_derand(pkp, skp, coinsp); ?{}, r = #set0(); return r; } -export fn jade_kem_frodo_frodo976shake_amd64_ref_keypair(#public reg u64 pkp skp) -> #public reg u64 { +export fn jade_kem_frodo_frodo976shake_amd64_ref_keypair(reg u64 pkp skp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_keypair(pkp, skp); ?{}, r = #set0(); return r; } -export fn jade_kem_frodo_frodo976shake_amd64_ref_enc_derand(#public reg u64 ctp ssp pkp coinsp) -> #public reg u64 { +export fn jade_kem_frodo_frodo976shake_amd64_ref_enc_derand(reg u64 ctp ssp pkp coinsp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_enc_derand(ctp, ssp, pkp, coinsp); ?{}, r = #set0(); return r; } -export fn jade_kem_frodo_frodo976shake_amd64_ref_enc(#public reg u64 ctp ssp pkp) -> #public reg u64 { +export fn jade_kem_frodo_frodo976shake_amd64_ref_enc(reg u64 ctp ssp pkp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_enc(ctp, ssp, pkp); ?{}, r = #set0(); return r; } -export fn jade_kem_frodo_frodo976shake_amd64_ref_dec(#public reg u64 ssp ctp skp) -> #public reg u64 { +export fn jade_kem_frodo_frodo976shake_amd64_ref_dec(reg u64 ssp ctp skp) -> reg u64 { reg u64 r; + _ = #init_msf(); _frodo_amd64_ref_dec(ssp, ctp, skp); ?{}, r = #set0(); return r; diff --git a/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jinc b/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jinc index c1ed48cd..aa677759 100644 --- a/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jinc +++ b/src/crypto_kem/frodo/frodo976shake/amd64/ref/kem.jinc @@ -5,81 +5,80 @@ from Jade require "crypto_kem/frodo/common/amd64/ref/noise.jinc" from Jade require "crypto_kem/frodo/common/amd64/ref/matrix.jinc" from Jade require "crypto_kem/frodo/common/amd64/ref/matrix_mul_opt.jinc" from Jade require "crypto_kem/frodo/common/amd64/ref/pack.jinc" +require "./indcpa.jinc" // coins = s || seed SE || z +inline fn __frodo_amd64_ref_keypair_derand( reg u64 pkp skp, - #spill_to_mmx reg ptr u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins) { - stack u16[2 * NNBAR] SE; - stack u16[NNBAR] B; - - inline int k; - reg u64 i j; + #spill_to_mmx reg ptr u8[BYTES_SEC + BYTES_SEED_SE + BYTES_SEED_A] coins) { + reg u64 i t; // seedA || b stack u8[BYTES_PK] pk; + stack u8[BYTES_SEED_A + BYTES_SEED_SE] indcoins; + stack u8[BYTES_SEC] pkh; - // s || seedA || b || S_T || pkh - stack u8[BYTES_SK] sk; - - () = #spill(i, j, pkp, skp); + // S_T + stack u8[2*NNBAR] sk; - for k = 0 to BYTES_SEC/8 { - sk[u64 k] = coins[u64 k]; + i = 0; + while (i < BYTES_SEC/8) { + t = coins[u64 i]; + [skp + i*8] = t; + i += 1; } - // gen seedA - pk[0:BYTES_SEED_A] = __shake256_seed_A_opt(pk[0:BYTES_SEED_A], coins[BYTES_SEC + BYTES_SEED_SE:BYTES_SEED_A]); - - // gen S || E - SE = __shake256_r_opt(SE, coins[BYTES_SEC:BYTES_SEED_SE]); - SE = __sample_2NNBAR(SE); + // copy seedSE + i = 0; + while (i < BYTES_SEED_SE/8) { + t = coins[u64 BYTES_SEC/8 + i]; + indcoins[u64 BYTES_SEED_A/8 + i] = t; + i += 1; + } - () = #spill(coins); + () = #spill(pkp, skp, coins); + indcoins[0:BYTES_SEED_A] = __shake256_seed_A_opt(indcoins[0:BYTES_SEED_A], coins[BYTES_SEC + BYTES_SEED_SE:BYTES_SEED_A]); - // B = A*S+E - B = __AS_plus_E_opt(B, pk[0:BYTES_SEED_A], SE[0:NNBAR], SE[NNBAR:NNBAR]); + pk, sk = __indcpa_keypair_derand(indcoins); + pkh = __shake256_pkh_opt(pkh, pk); - // pack - pk[BYTES_SEED_A:D * N] = __pack_B(pk[BYTES_SEED_A:D * N], B); + () = #unspill(pkp, skp); - () = #unspill(i); i = 0; while (i < BYTES_PK/8) { - sk[u64 BYTES_SEC/8 + i] = pk[u64 i]; + t = pk[u64 i]; + [skp + BYTES_SEC + i*8] = t; i += 1; } i = 0; while (i < 2 * NNBAR / 8) { - sk[u64 BYTES_SEC/8 + BYTES_PK/8 + i] = SE[u64 i]; + t = sk[u64 i]; + [skp + BYTES_SEC + BYTES_PK + i*8] = t; i += 1; } - () = #spill(i); - - sk[BYTES_SEC + BYTES_PK + 2 * NNBAR : BYTES_SEC] = __shake256_pkh_opt(sk[BYTES_SEC + BYTES_PK + 2 * NNBAR:BYTES_SEC], pk); - () = #unspill(i, j, pkp, skp); - i = 0; j = 0; + i = 0; while (i < BYTES_PK/8) { - [pkp + j] = pk[u64 i]; + t = pk[u64 i]; + [pkp + i*8] = t; i += 1; - j += 8; } - i = 0; j = 0; - while (i < BYTES_SK/8) { - [skp + j] = sk[u64 i]; + i = 0; + while (i < BYTES_SEC/8) { + t = pkh[u64 i]; + [skp + BYTES_SK - BYTES_SEC + i*8] = t; i += 1; - j += 8; } } -#[returnaddress="stack"] +inline fn __frodo_amd64_ref_enc_derand( reg u64 ctp ssp pkp, #spill_to_mmx reg ptr u8[BYTES_SEC + BYTES_SALT] coins) { - reg u64 i j; + reg u64 i t; inline int k; // seedA || b @@ -90,40 +89,34 @@ fn __frodo_amd64_ref_enc_derand( // pkh || u || salt stack u8[BYTES_SEC * 2 + BYTES_SALT] pkh_u_salt; - // 0x96 || seedSE || k - stack u8[1 + BYTES_SEED_SE + BYTES_SEC] seedSE_k; - seedSE_k[0] = 0x96; - - // S' || E' || E'' - stack u16[2 * NNBAR + NBAR * NBAR] SEE; + // seedSE || k + stack u8[BYTES_SEED_SE + BYTES_SEC] seedSE_k; - stack u16[NNBAR] B; - reg ptr u16[NNBAR] Bp; - stack u16[NBAR * NBAR] C; - reg ptr u16[NBAR * NBAR] V; stack u8[BYTES_SEC] ss; - pkp = pkp; - () = #spill(ctp, ssp, i, j); - // gen u || salt - for k = 0 to (BYTES_SEC + BYTES_SALT)/8 { - pkh_u_salt[u64 BYTES_SEC/8 + k] = coins[u64 k]; + i = 0; + while (i < (BYTES_SEC + BYTES_SALT)/8) { + t = coins[u64 i]; + pkh_u_salt[u64 BYTES_SEC/8 + i] = t; + i += 1; } - for k = 0 to BYTES_SALT/8 { - ct_k[u64 (D * N + D * NBAR)/8 + k] = pkh_u_salt[u64 (BYTES_SEC * 2)/8 + k]; + i = 0; + while (i < BYTES_SALT/8) { + t = coins[u64 BYTES_SEC/8 + i]; + ct_k[u64 (BYTES_CT - BYTES_SALT)/8 + i] = t; + i += 1; } - () = #unspill(i, j); // read pk - i = 0; j = 0; + i = 0; while (i < BYTES_PK/8) { - #declassify pk[u64 i] = [pkp + j]; + #declassify pk[u64 i] = [pkp + i*8]; i += 1; - j += 8; } - () = #spill(i, j); + + () = #spill(ctp, ssp, coins); // pkh pkh_u_salt[0:BYTES_SEC] = __shake256_pkh_opt(pkh_u_salt[0:BYTES_SEC], pk); @@ -131,175 +124,116 @@ fn __frodo_amd64_ref_enc_derand( // seedSE || k seedSE_k = __shake256_SE_k_opt(seedSE_k, pkh_u_salt); + () = #unspill(coins); + ct_k[0:BYTES_CT - BYTES_SALT] = __indcpa_enc_derand(ct_k[0:BYTES_CT - BYTES_SALT], coins[0:BYTES_SEC], pk, seedSE_k[0:BYTES_SEED_SE]); + // copy k - for k = 0 to BYTES_SEC/8 { - ct_k[u64 BYTES_CT/8 + k] = seedSE_k.[u64 1 + BYTES_SEED_SE + 8*k]; + i = 0; + while (i < BYTES_SEC/8) { + t = seedSE_k[u64 BYTES_SEED_SE/8 + i]; + ct_k[u64 BYTES_CT/8 + i] = t; + i += 1; } - // gen input bit string for sampling S and E - SEE = __shake256_encap_r_opt(SEE, seedSE_k[0 : 1 + BYTES_SEED_SE]); - - // S' || E' - SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); - // E'' - SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); - - // B' = S'A + E'' - Bp = SEE[NNBAR:NNBAR]; - Bp = __SA_plus_E_opt(Bp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); - - // c1 <- Pack(B') - ct_k[0:D * N] = __pack_B(ct_k[0:D * N], Bp); - - // B <- Unpack(b) - B = __unpack_B(B, pk[BYTES_SEED_A:D * N]); - - // V = S'B + E'' - V = SEE[NNBAR*2:NBAR*NBAR]; - V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); - - // C = V + Encode(u) - C = __encode(C, pkh_u_salt[BYTES_SEC:BYTES_SEC]); - C = __matrix_add(C, V); - - // c2 <- Pack(C) - ct_k[D * N: D * NBAR] = __pack_C(ct_k[D * N: D * NBAR], C); - // ss <- shake(c1 || c2 || salt || k) ss = __shake256_ss_opt(ss, ct_k); - () = #unspill(i, j, ctp, ssp); - i = 0; j = 0; + () = #unspill(ctp, ssp); + i = 0; + _ = #init_msf(); while (i < BYTES_CT/8) { - [ctp + j] = ct_k[u64 i]; + t = ct_k[u64 i]; + [ctp + i*8] = t; i += 1; - j += 8; } for k = 0 to BYTES_SEC/8 { - [ssp + 8*k] = ss[u64 k]; + t = ss[u64 k]; + [ssp + 8*k] = t; } } -#[returnaddress="stack"] fn _frodo_amd64_ref_dec(reg u64 ssp ctp skp) { #public stack u8[BYTES_PK] pk; stack u8[2 * NNBAR] ST; stack u8[BYTES_SEC] s; stack u8[BYTES_CT + BYTES_SEC] ct_k; - stack u16[NNBAR] B Bp; - reg ptr u16[NNBAR] Bpp; - stack u16[NBAR * NBAR] M C Cp; - reg ptr u16[NBAR * NBAR] V; + stack u8[BYTES_CT - BYTES_SALT] ct2; stack u8[BYTES_SEC * 2 + BYTES_SALT] pkh_u_salt; - stack u8[1 + BYTES_SEED_SE + BYTES_SEC] seedSE_k; + stack u8[BYTES_SEED_SE + BYTES_SEC] seedSE_k; stack u8[BYTES_SEC] ss; - // S' || E' || E'' - stack u16[2 * NNBAR + NBAR * NBAR] SEE; - - reg u8 s1 s2; - reg u64 i j t; - stack u64 s_ssp s_skp; - inline int k; + reg u8 s1; + reg u64 i t; ctp = ctp; skp = skp; - s_ssp = ssp; + ssp = ssp; // copy pkh - for k = 0 to BYTES_SEC/8 { - pkh_u_salt[u64 k] = [skp + BYTES_SK - BYTES_SEC + 8*k]; + i = 0; + while (i < BYTES_SEC/8) { + t = [skp + BYTES_SK - BYTES_SEC + i*8]; + pkh_u_salt[u64 i] = t; + i += 1; } - s_skp = skp; // read ct - i = 0; j = 0; + i = 0; while (i < BYTES_CT/8) { - t = [ctp + j]; + t = [ctp + i*8]; ct_k[u64 i] = t; i += 1; - j += 8; } - for k = 0 to BYTES_SEC/8 { - s[u64 k] = [skp + 8*k]; + i = 0; + while (i < BYTES_SEC/8) { + t = [skp + i*8]; + s[u64 i] = t; + i += 1; } - i = 0; j = 0; + i = 0; while (i < BYTES_PK/8) { - #declassify pk[u64 i] = [skp + BYTES_SEC + j]; + t = [skp + BYTES_SEC + i*8]; + #declassify pk[u64 i] = t; i += 1; - j += 8; } - i = 0; j = 0; + i = 0; while (i < 2 * NNBAR/8) { - ST[u64 i] = [skp + BYTES_SEC + BYTES_PK + j]; + t = [skp + BYTES_SEC + BYTES_PK + i*8]; + ST[u64 i] = t; i += 1; - j += 8; } - () = #spill(i); - // copy salt - for k = 0 to BYTES_SALT/8 { - pkh_u_salt[u64 (BYTES_SEC * 2)/8 + k] = ct_k[u64 (BYTES_CT - BYTES_SALT)/8 + k]; - } - - // B' <- Unpack(c1) - Bp = __unpack_B(Bp, ct_k[0:D * N]); - // C <- Unpack(c2) - C = __unpack_C(C, ct_k[D * N:D * NBAR]); - - // M = C - B'S - M = __mul_BS_opt(M, Bp, ST); - M = __matrix_sub(M, C); - - pkh_u_salt[BYTES_SEC:BYTES_SEC] = __decode(pkh_u_salt[BYTES_SEC:BYTES_SEC], M); - - seedSE_k[0] = 0x96; - seedSE_k = __shake256_SE_k_opt(seedSE_k, pkh_u_salt); - - SEE = __shake256_encap_r_opt(SEE, seedSE_k[0: 1 + BYTES_SEED_SE]); - - // S' || E' - SEE[0:2 * NNBAR] = __sample_2NNBAR(SEE[0:2 * NNBAR]); - // E'' - SEE[NNBAR * 2:NBAR * NBAR] = __sample_NBAR2(SEE[NNBAR * 2:NBAR * NBAR]); - - // B'' = S'A + E' - Bpp = SEE[NNBAR:NNBAR]; - Bpp = __SA_plus_E_opt(Bpp, pk[0:BYTES_SEED_A], SEE[0:NNBAR]); - - // B'' (mod q) - () = #unspill(i); i = 0; - while (i < NNBAR) { - Bpp[i] &= (1 << D) - 1; + while (i < BYTES_SALT/8) { + t = ct_k[u64 (BYTES_CT - BYTES_SALT)/8 + i]; + pkh_u_salt[u64 (BYTES_SEC * 2)/8 + i] = t; i += 1; } - // - B = __unpack_B(B, pk[BYTES_SEED_A:BYTES_PK - BYTES_SEED_A]); - - V = SEE[NNBAR*2:NBAR*NBAR]; - V = __SB_plus_E_opt(V, SEE[0:NNBAR], B); + pkh_u_salt[BYTES_SEC:BYTES_SEC] = __indcpa_dec(pkh_u_salt[BYTES_SEC:BYTES_SEC], ct_k[0:BYTES_CT - BYTES_SALT], ST); - Cp = __encode(Cp, pkh_u_salt[BYTES_SEC:BYTES_SEC]); - Cp = __matrix_add(Cp, V); + () = #spill(ssp); + seedSE_k = __shake256_SE_k_opt(seedSE_k, pkh_u_salt); + ct2 = __indcpa_enc_derand(ct2, pkh_u_salt[BYTES_SEC:BYTES_SEC], pk, seedSE_k[0:BYTES_SEED_SE]); - s1 = __ct_verify_NNBAR(Bp, Bpp); - s2 = __ct_verify_NBAR2(C, Cp); - s1 |= s2; + s1 = __ct_verify(ct_k[0:BYTES_CT - BYTES_SALT], ct2); - ct_k[BYTES_CT:BYTES_SEC] = __ct_select(ct_k[BYTES_CT:BYTES_SEC], seedSE_k[1+BYTES_SEED_SE:BYTES_SEC], s, s1); + ct_k[BYTES_CT:BYTES_SEC] = __ct_select(ct_k[BYTES_CT:BYTES_SEC], seedSE_k[BYTES_SEED_SE:BYTES_SEC], s, s1); ss = __shake256_ss_opt(ss, ct_k); - ssp = s_ssp; - for k = 0 to BYTES_SEC/8 { - [ssp + 8*k] = ss[u64 k]; + _ = #init_msf(); + () = #unspill(ssp); + i = 0; + while (i < BYTES_SEC/8) { + t = ss[u64 i]; + [ssp + i*8] = t; + i += 1; } } @@ -316,7 +250,7 @@ fn _frodo_amd64_ref_keypair(reg u64 pkp skp) { fn _frodo_amd64_ref_keypair_derand(reg u64 pkp skp coinsp) { #public stack u8[BYTES_SEED_A + BYTES_SEED_SE + BYTES_SEC] coins; - reg u64 i; stack u64 s_i; + reg u64 i; pkp = pkp; skp = skp; @@ -327,7 +261,6 @@ fn _frodo_amd64_ref_keypair_derand(reg u64 pkp skp coinsp) { i += 1; } - s_i = i; __frodo_amd64_ref_keypair_derand(pkp, skp, coins); }