Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision 81a0bf97e2012c369f800f26e2e3d3651cde7a35 authored by Matthias J. Kannwischer on 29 July 2021, 07:22:18 UTC, committed by Matthias J. Kannwischer on 02 August 2021, 03:06:46 UTC
Update NTRU Prime; add new round 3 parameter sets
1 parent 819f906
  • Files
  • Changes
  • b53fadd
  • /
  • crypto_sign
  • /
  • dilithium3
  • /
  • avx2
  • /
  • rounding.c
Raw File Download
Permalinks

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:81a0bf97e2012c369f800f26e2e3d3651cde7a35
directory badge Iframe embedding
swh:1:dir:9c813eae6eadba6c9d9764148c68f4f4f263bd9f
content badge Iframe embedding
swh:1:cnt:1fbe15f5d95565572e85cdcd05d76bff753b9a9d
Citations

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
rounding.c
#include "consts.h"
#include "params.h"
#include "rejsample.h"
#include "rounding.h"
#include <immintrin.h>
#include <stdint.h>
#include <string.h>

#define _mm256_blendv_epi32(a,b,mask) \
    _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \
                                         _mm256_castsi256_ps(b), \
                                         _mm256_castsi256_ps(mask)))

/*************************************************
* Name:        power2round
*
* Description: For finite field elements a, compute a0, a1 such that
*              a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}.
*              Assumes a to be positive standard representative.
*
* Arguments:   - __m256i *a1: output array of length N/8 with high bits
*              - __m256i *a0: output array of length N/8 with low bits a0
*              - const __m256i *a: input array of length N/8
*
**************************************************/
void PQCLEAN_DILITHIUM3_AVX2_power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a) {
    unsigned int i;
    __m256i f, f0, f1;
    const __m256i mask = _mm256_set1_epi32(-(1 << D));
    const __m256i half = _mm256_set1_epi32((1 << (D - 1)) - 1);

    for (i = 0; i < N / 8; ++i) {
        f = _mm256_load_si256(&a[i]);
        f1 = _mm256_add_epi32(f, half);
        f0 = _mm256_and_si256(f1, mask);
        f1 = _mm256_srli_epi32(f1, D);
        f0 = _mm256_sub_epi32(f, f0);
        _mm256_store_si256(&a1[i], f1);
        _mm256_store_si256(&a0[i], f0);
    }
}

/*************************************************
* Name:        decompose
*
* Description: For finite field element a, compute high and low parts a0, a1 such
*              that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except
*              if a1 = (Q-1)/ALPHA where we set a1 = 0 and
*              -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be positive standard
*              representative.
*
* Arguments:   - __m256i *a1: output array of length N/8 with high parts
*              - __m256i *a0: output array of length N/8 with low parts a0
*              - const __m256i *a: input array of length N/8
*
**************************************************/
void PQCLEAN_DILITHIUM3_AVX2_decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a) {
    unsigned int i;
    __m256i f, f0, f1;
    const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM3_AVX2_qdata.vec[_8XQ / 8]);
    const __m256i hq = _mm256_srli_epi32(q, 1);
    const __m256i v = _mm256_set1_epi32(1025);
    const __m256i alpha = _mm256_set1_epi32(2 * GAMMA2);
    const __m256i off = _mm256_set1_epi32(127);
    const __m256i shift = _mm256_set1_epi32(512);
    const __m256i mask = _mm256_set1_epi32(15);

    for (i = 0; i < N / 8; i++) {
        f = _mm256_load_si256(&a[i]);
        f1 = _mm256_add_epi32(f, off);
        f1 = _mm256_srli_epi32(f1, 7);
        f1 = _mm256_mulhi_epu16(f1, v);
        f1 = _mm256_mulhrs_epi16(f1, shift);
        f1 = _mm256_and_si256(f1, mask);
        f0 = _mm256_mullo_epi32(f1, alpha);
        f0 = _mm256_sub_epi32(f, f0);
        f = _mm256_cmpgt_epi32(f0, hq);
        f = _mm256_and_si256(f, q);
        f0 = _mm256_sub_epi32(f0, f);
        _mm256_store_si256(&a1[i], f1);
        _mm256_store_si256(&a0[i], f0);
    }
}


/*************************************************
* Name:        make_hint
*
* Description: Compute indices of polynomial coefficients whose low bits
*              overflow into the high bits.
*
* Arguments:   - uint8_t *hint: hint array
*              - const __m256i *a0: low bits of input elements
*              - const __m256i *a1: high bits of input elements
*
* Returns number of overflowing low bits
**************************************************/
unsigned int PQCLEAN_DILITHIUM3_AVX2_make_hint_avx(uint8_t hint[N], const __m256i *restrict a0, const __m256i *restrict a1) {
    unsigned int i, n = 0;
    __m256i f0, f1, g0, g1;
    uint32_t bad;
    uint64_t idx;
    const __m256i low = _mm256_set1_epi32(-GAMMA2);
    const __m256i high = _mm256_set1_epi32(GAMMA2);

    for (i = 0; i < N / 8; ++i) {
        f0 = _mm256_load_si256(&a0[i]);
        f1 = _mm256_load_si256(&a1[i]);
        g0 = _mm256_abs_epi32(f0);
        g0 = _mm256_cmpgt_epi32(g0, high);
        g1 = _mm256_cmpeq_epi32(f0, low);
        g1 = _mm256_sign_epi32(g1, f1);
        g0 = _mm256_or_si256(g0, g1);

        bad = _mm256_movemask_ps((__m256)g0);
        memcpy(&idx, PQCLEAN_DILITHIUM3_AVX2_idxlut[bad], 8);
        idx += (uint64_t)0x0808080808080808 * i;
        memcpy(&hint[n], &idx, 8);
        n += _mm_popcnt_u32(bad);
    }

    return n;
}

/*************************************************
* Name:        use_hint
*
* Description: Correct high parts according to hint.
*
* Arguments:   - __m256i *b: output array of length N/8 with corrected high parts
*              - const __m256i *a: input array of length N/8
*              - const __m256i *a: input array of length N/8 with hint bits
*
**************************************************/
void PQCLEAN_DILITHIUM3_AVX2_use_hint_avx(__m256i *b, const __m256i *a, const __m256i *restrict hint) {
    unsigned int i;
    __m256i a0[N / 8];
    __m256i f, g, h, t;
    const __m256i zero = _mm256_setzero_si256();
    const __m256i mask = _mm256_set1_epi32(15);

    PQCLEAN_DILITHIUM3_AVX2_decompose_avx(b, a0, a);
    for (i = 0; i < N / 8; i++) {
        f = _mm256_load_si256(&a0[i]);
        g = _mm256_load_si256(&b[i]);
        h = _mm256_load_si256(&hint[i]);
        t = _mm256_blendv_epi32(zero, h, f);
        t = _mm256_slli_epi32(t, 1);
        h = _mm256_sub_epi32(h, t);
        g = _mm256_add_epi32(g, h);
        g = _mm256_and_si256(g, mask);
        _mm256_store_si256(&b[i], g);
    }
}
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Contact— JavaScript license information— Web API