Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision 71f0daa874b1226215c219deecd3e230ed170e5a authored by Richard Petri on 24 November 2023, 07:58:50 UTC, committed by Richard Petri on 24 November 2023, 08:02:41 UTC
Add biscuit
1 parent d436546
  • Files
  • Changes
  • 7643cf5
  • /
  • crypto_sign
  • /
  • ov-Ip
  • /
  • m4f
  • /
  • gf256mat_gauss_elim_44.S
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:71f0daa874b1226215c219deecd3e230ed170e5a
directory badge
swh:1:dir:97578ea584158724e0796ec661129b2ca0b595c8
content badge
swh:1:cnt:251c449151103430e6c0b9150f4a1e651bd262b0

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
gf256mat_gauss_elim_44.S
.syntax unified
.cpu cortex-m4
.thumb

#include "gf256_madd.i"

.macro gf256_inv g, f, delta, r, v, mdelta, tmp, tmp2, tmp3
    lsl.w \g, \g, #1
    mov.w \f, #0x11b
    mov.w \delta, #1
    mov.w \r, #0x100
    mov.w \v, #0

    // 15 instructions
    .rept 15
        eor.w \tmp, \f, \g
        eor.w \tmp2, \v, \r

        lsrs.w \tmp3, \g, #8
        itt ne
        eorne.n \g, \g, \f
        eorne.w \r, \r, \v

        neg \mdelta, \delta
        tst \tmp3, \mdelta, lsr#31

        nop.n
        ittte ne
        addne \delta, \mdelta, #1
        eorne \f, \f, \tmp
        eorne \v, \v, \tmp2
        addeq.w \delta, #1

        lsl \g, \g, #1
        lsr \v, \v, #1
    .endr
    and \g, \v, #0xff
.endm


// computes _gf256v_mul_scalar_u32( ai  , pivot , w);
.macro madd_row pivot, aj, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, c01010101, pconst, fbx0, fbx1, fbx2, fbx3, fbx4, fbx5, fbx6, fbx7, matf0, matf1, matf2, matf3, matf4, matf5, matf6, matf7, matf8, matf9, matf10, matf11
    gf256_madd_precompb \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \pivot, \c01010101, \pconst, \tmp8

    ldr.w \tmp4, [\aj, #4*0]
    ldr.w \tmp5, [\aj, #4*1]
    ldr.w \tmp6, [\aj, #4*2]
    ldr.w \tmp7, [\aj, #4*3]

    vmov.w \tmp0, \matf0
    vmov.w \tmp1, \matf1
    vmov.w \tmp2, \matf2
    vmov.w \tmp3, \matf3

    gf256_madd 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    str.w \tmp4, [\aj, #4*0]
    str.w \tmp5, [\aj, #4*1]
    str.w \tmp6, [\aj, #4*2]
    str.w \tmp7, [\aj, #4*3]

    ldr.w \tmp4, [\aj, #4*4]
    ldr.w \tmp5, [\aj, #4*5]
    ldr.w \tmp6, [\aj, #4*6]
    ldr.w \tmp7, [\aj, #4*7]

    vmov.w \tmp0, \matf4
    vmov.w \tmp1, \matf5
    vmov.w \tmp2, \matf6
    vmov.w \tmp3, \matf7

    gf256_madd 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    str.w \tmp4, [\aj, #4*4]
    str.w \tmp5, [\aj, #4*5]
    str.w \tmp6, [\aj, #4*6]
    str.w \tmp7, [\aj, #4*7]

    ldr.w \tmp4, [\aj, #4*8]
    ldr.w \tmp5, [\aj, #4*9]
    ldr.w \tmp6, [\aj, #4*10]
    ldr.w \tmp7, [\aj, #4*11]

    vmov.w \tmp0, \matf8
    vmov.w \tmp1, \matf9
    vmov.w \tmp2, \matf10
    vmov.w \tmp3, \matf11


    gf256_madd 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    str.w \tmp4, [\aj, #4*8]
    str.w \tmp5, [\aj, #4*9]
    str.w \tmp6, [\aj, #4*10]
    str.w \tmp7, [\aj, #4*11]
.endm



// computes _gf256v_mul_scalar_u32( ai  , pivot , w);
.macro madd_row_second pivot, aj, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, c01010101, pconst, fbx0, fbx1, fbx2, fbx3, fbx4, fbx5, fbx6, fbx7, matf0, matf1, matf2, matf3, matf4, matf5, matf6, matf7, matf8, matf9, matf10, matf11
    gf256_madd_precompb \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \pivot, \c01010101, \pconst, \tmp8

    @ ldr.w \tmp4, [\aj, #4*0]
    @ ldr.w \tmp5, [\aj, #4*1]
    @ ldr.w \tmp6, [\aj, #4*2]
    @ ldr.w \tmp7, [\aj, #4*3]

    @ vmov.w \tmp0, \matf0
    @ vmov.w \tmp1, \matf1
    @ vmov.w \tmp2, \matf2
    @ vmov.w \tmp3, \matf3

    @ gf256_madd 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    @ str.w \tmp4, [\aj, #4*0]
    @ str.w \tmp5, [\aj, #4*1]
    @ str.w \tmp6, [\aj, #4*2]
    @ str.w \tmp7, [\aj, #4*3]

    @ ldr.w \tmp4, [\aj, #4*4]
    @ ldr.w \tmp5, [\aj, #4*5]
    ldr.w \tmp6, [\aj, #4*6]
    ldr.w \tmp7, [\aj, #4*7]

    @ vmov.w \tmp0, \matf4
    @ vmov.w \tmp1, \matf5
    vmov.w \tmp2, \matf6
    vmov.w \tmp3, \matf7

    //gf256_madd 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9
    gf256_madd 2, \tmp6, \tmp7, xxx, xxx, \tmp2, \tmp3, xxx, xxx, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9


    @ str.w \tmp4, [\aj, #4*4]
    @ str.w \tmp5, [\aj, #4*5]
    str.w \tmp6, [\aj, #4*6]
    str.w \tmp7, [\aj, #4*7]

    ldr.w \tmp4, [\aj, #4*8]
    ldr.w \tmp5, [\aj, #4*9]
    ldr.w \tmp6, [\aj, #4*10]
    ldr.w \tmp7, [\aj, #4*11]

    vmov.w \tmp0, \matf8
    vmov.w \tmp1, \matf9
    vmov.w \tmp2, \matf10
    vmov.w \tmp3, \matf11


    gf256_madd 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    str.w \tmp4, [\aj, #4*8]
    str.w \tmp5, [\aj, #4*9]
    str.w \tmp6, [\aj, #4*10]
    str.w \tmp7, [\aj, #4*11]
.endm

// computes (r8 is assumbed to be at sp+0):
// r8 &= gf256_is_nonzero(ai[i]);
// uint8_t pivot = ai[i];
// pivot = gf256_inv( pivot );
// _gf256v_mul_scalar_u32( ai  , pivot , w);
.macro invert_pivot_and_multiply ai, pivotidx, pivot, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, c01010101, pconst, matf0, matf1, matf2, matf3, matf4, matf5, matf6, matf7, matf8, matf9, matf10, matf11, fbx0, fbx1, fbx2, fbx3, fbx4, fbx5, fbx6, fbx7
    // invert ai[i]
    ldrb.w \pivot, [\ai, \pivotidx]
    ldrb.w \tmp0, [sp, #0]
    cmp.n \pivot, #0
    it eq
    moveq.w \tmp0, #0
    strb.w \tmp0, [sp, #0]


    gf256_inv \pivot, \tmp0, \tmp1, \tmp2, \tmp3, \tmp4, \tmp5, \tmp6, \tmp7
    gf256_madd_precompb \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \pivot, \c01010101, \pconst, \tmp8

    ldr.w \tmp0, [\ai, #4*0]
    ldr.w \tmp1, [\ai, #4*1]
    ldr.w \tmp2, [\ai, #4*2]
    ldr.w \tmp3, [\ai, #4*3]


    gf256_mul_u32 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    vmov.w \matf0, \tmp4
    vmov.w \matf1, \tmp5
    vmov.w \matf2, \tmp6
    vmov.w \matf3, \tmp7
    str.w \tmp4, [\ai, #4*0]
    str.w \tmp5, [\ai, #4*1]
    str.w \tmp6, [\ai, #4*2]
    str.w \tmp7, [\ai, #4*3]

    ldr.w \tmp0, [\ai, #4*4]
    ldr.w \tmp1, [\ai, #4*5]
    ldr.w \tmp2, [\ai, #4*6]
    ldr.w \tmp3, [\ai, #4*7]

    gf256_mul_u32 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    vmov.w \matf4, \tmp4
    vmov.w \matf5, \tmp5
    vmov.w \matf6, \tmp6
    vmov.w \matf7, \tmp7
    str.w \tmp4, [\ai, #4*4]
    str.w \tmp5, [\ai, #4*5]
    str.w \tmp6, [\ai, #4*6]
    str.w \tmp7, [\ai, #4*7]

    ldr.w \tmp0, [\ai, #4*8]
    ldr.w \tmp1, [\ai, #4*9]
    ldr.w \tmp2, [\ai, #4*10]
    ldr.w \tmp3, [\ai, #4*11]
    gf256_mul_u32 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    vmov.w \matf8, \tmp4
    vmov.w \matf9, \tmp5
    vmov.w \matf10, \tmp6
    vmov.w \matf11, \tmp7
    str.w \tmp4, [\ai, #4*8]
    str.w \tmp5, [\ai, #4*9]
    str.w \tmp6, [\ai, #4*10]
    str.w \tmp7, [\ai, #4*11]
.endm


.macro invert_pivot_and_multiply_second ai, pivotidx, pivot, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, c01010101, pconst, matf0, matf1, matf2, matf3, matf4, matf5, matf6, matf7, matf8, matf9, matf10, matf11, fbx0, fbx1, fbx2, fbx3, fbx4, fbx5, fbx6, fbx7
    // invert ai[i]
    ldrb.w \pivot, [\ai, \pivotidx]
    ldrb.w \tmp0, [sp, #0]
    cmp.n \pivot, #0
    it eq
    moveq.w \tmp0, #0
    strb.w \tmp0, [sp, #0]


    gf256_inv \pivot, \tmp0, \tmp1, \tmp2, \tmp3, \tmp4, \tmp5, \tmp6, \tmp7
    gf256_madd_precompb \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \pivot, \c01010101, \pconst, \tmp8

    @ ldr.w \tmp0, [\ai, #4*0]
    @ ldr.w \tmp1, [\ai, #4*1]
    @ ldr.w \tmp2, [\ai, #4*2]
    @ ldr.w \tmp3, [\ai, #4*3]


    @ gf256_mul_u32 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    @ vmov.w \matf0, \tmp4
    @ vmov.w \matf1, \tmp5
    @ vmov.w \matf2, \tmp6
    @ vmov.w \matf3, \tmp7
    @ str.w \tmp4, [\ai, #4*0]
    @ str.w \tmp5, [\ai, #4*1]
    @ str.w \tmp6, [\ai, #4*2]
    @ str.w \tmp7, [\ai, #4*3]

    @ ldr.w \tmp0, [\ai, #4*4]
    @ ldr.w \tmp1, [\ai, #4*5]
    ldr.w \tmp2, [\ai, #4*6]
    ldr.w \tmp3, [\ai, #4*7]

    //gf256_mul_u32 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9
    gf256_mul_u32 2, \tmp6, \tmp7, xxx, xxx, \tmp2, \tmp3, xxx, xxx, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    @ vmov.w \matf4, \tmp4
    @ vmov.w \matf5, \tmp5
    vmov.w \matf6, \tmp6
    vmov.w \matf7, \tmp7
    @ str.w \tmp4, [\ai, #4*4]
    @ str.w \tmp5, [\ai, #4*5]
    str.w \tmp6, [\ai, #4*6]
    str.w \tmp7, [\ai, #4*7]

    ldr.w \tmp0, [\ai, #4*8]
    ldr.w \tmp1, [\ai, #4*9]
    ldr.w \tmp2, [\ai, #4*10]
    ldr.w \tmp3, [\ai, #4*11]
    gf256_mul_u32 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    vmov.w \matf8, \tmp4
    vmov.w \matf9, \tmp5
    vmov.w \matf10, \tmp6
    vmov.w \matf11, \tmp7
    str.w \tmp4, [\ai, #4*8]
    str.w \tmp5, [\ai, #4*9]
    str.w \tmp6, [\ai, #4*10]
    str.w \tmp7, [\ai, #4*11]
.endm

.macro conditional_add ai, ii, aj, ajmax, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7
    // for the last row, we don't have rows left to add, so we skip the inner loop
    cmp.w \ii, #43
    beq.w 2f

    add.w \aj, \ai, #48

    cmp.n \ii, #36
    ite lt
    movlt.w \ajmax, #8
    rsbge.w \ajmax, \ii, #44
    mov.w \tmp0, #48
    mla.w \ajmax, \ajmax, \tmp0, \ai

    1:
        ldrb.n \tmp0, [\ai, \ii]
        cmp.n \tmp0, #0

        # got 48/4 = 12 words to mult
        .rept 3 // 8 words
            ldr.w \tmp1, [\aj, #4*1]
            ldr.w \tmp2, [\aj, #4*2]
            ldr.w \tmp3, [\aj, #4*3]
            ldr.w \tmp0, [\aj], #16

            ldr.w \tmp4, [\ai, #4*0]
            ldr.w \tmp5, [\ai, #4*1]
            ldr.w \tmp6, [\ai, #4*2]
            ldr.w \tmp7, [\ai, #4*3]

            nop.n
            itttt eq
            eoreq \tmp4, \tmp4, \tmp0
            eoreq \tmp5, \tmp5, \tmp1
            eoreq \tmp6, \tmp6, \tmp2
            eoreq \tmp7, \tmp7, \tmp3

            str.w \tmp5, [\ai, #4*1]
            str.w \tmp6, [\ai, #4*2]
            str.w \tmp7, [\ai, #4*3]
            str.w \tmp4, [\ai], #16
        .endr

        sub.w \ai, \ai, #48
        cmp.w \aj, \ajmax
        bne.w 1b
    2:
.endm

.global gf256mat_gauss_elim_row_echolen_m4f_44
.type gf256mat_gauss_elim_row_echolen_m4f_44, %function
.align 2
gf256mat_gauss_elim_row_echolen_m4f_44:
    push.w {r4-r11, r14}
    vpush.w {s16-s31}
    sub.w sp, sp, #4
    mov.w r14, #1
    str.w r14, [sp]

    ii .req s16
    jj .req s17
    ai .req s18

    mov.w r2, #0
    mov.w r1, r0
    mov.w r12, #0x01010101
    mov.w r14, #0x1b
    vmov.w ii, r2
    vmov.w ai, r1

    3:
        conditional_add r1, r2, r0, r3, r4, r5, r6, r7, r8, r9, r10, r11

        vmov.w r0, ii
        cmp.w r0, #24
        bge second_half
        invert_pivot_and_multiply r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r2, r0, r12, r14, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s24, s25, s26, s27, s28, s29, s30, s31
        vmov.w r0, ii

        add.w r3, r0, #1
        vmov.w jj, r3

        1:
            add.w r1, r1, #48
            vmov.w r0, ii
            ldrb.w r2, [r1, r0]

            madd_row r2, r1, r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r14, s24, s25, s26, s27, s28, s29, s30, s31, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11

            vmov r3, jj
            add.w r3, #1
            vmov jj, r3
            cmp.w r3, #44
            bne 1b

        b.w 2f
        second_half:
        invert_pivot_and_multiply_second r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r2, r0, r12, r14, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s24, s25, s26, s27, s28, s29, s30, s31
        vmov.w r0, ii
        cmp.w r0, #43
        beq 2f

        add.w r3, r0, #1
        vmov.w jj, r3

        1:
            add.w r1, r1, #48
            vmov.w r0, ii
            ldrb.w r2, [r1, r0]

            madd_row_second r2, r1, r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r14, s24, s25, s26, s27, s28, s29, s30, s31, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11

            vmov r3, jj
            add.w r3, #1
            vmov jj, r3
            cmp.w r3, #44
            bne 1b
        2:
    vmov.w r2, ii
    add.w r2, #1
    vmov.w ii, r2
    vmov.w r1, ai
    add.w r1, #48
    vmov.w ai, r1

    cmp.w r2, #44
    bne.w 3b

    ldr.w r0, [sp]
    add.w sp, sp, #4
    vpop.w {s16-s31}
    pop {r4-r11, pc}
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API