Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision 71f0daa874b1226215c219deecd3e230ed170e5a authored by Richard Petri on 24 November 2023, 07:58:50 UTC, committed by Richard Petri on 24 November 2023, 08:02:41 UTC
Add biscuit
1 parent d436546
  • Files
  • Changes
  • 7643cf5
  • /
  • crypto_sign
  • /
  • ov-Ip
  • /
  • m4f
  • /
  • gf256mat_inv_22.S
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:71f0daa874b1226215c219deecd3e230ed170e5a
directory badge
swh:1:dir:97578ea584158724e0796ec661129b2ca0b595c8
content badge
swh:1:cnt:a671218633afc7c74c4412f7cb89316c6a929ae7

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
gf256mat_inv_22.S
.syntax unified
.cpu cortex-m4
.thumb

#include "gf256_madd.i"

.macro gf256_inv g, f, delta, r, v, mdelta, tmp, tmp2, tmp3
    lsl.w \g, \g, #1
    mov.w \f, #0x11b
    mov.w \delta, #1
    mov.w \r, #0x100
    mov.w \v, #0

    // 15 instructions
    .rept 15
        eor \tmp, \f, \g
        eor \tmp2, \v, \r

        lsrs \tmp3, \g, #8
        itt ne
        eorne \g, \g, \f
        eorne \r, \r, \v

        neg \mdelta, \delta
        tst \tmp3, \mdelta, lsr#31

        ittte ne
        addne \delta, \mdelta, #1
        eorne \f, \f, \tmp
        eorne \v, \v, \tmp2
        addeq \delta, #1

        lsl \g, \g, #1
        lsr \v, \v, #1
    .endr
    and \g, \v, #0xff
.endm


.global gf256mat_inv_m4f_22_init
.type gf256mat_inv_m4f_22_init, %function
.align 2
gf256mat_inv_m4f_22_init:
    push {r4-r11, r14}
    inputoutput_fpu .req s18
    extmat_fpu  .req s19
    ai .req r3
    ii .req r1
    aj0 .req r9
    aj1 .req r6
    aj2 .req r7
    aj3 .req r8
    vmov.w inputoutput_fpu, r1
    vmov.w extmat_fpu, r0



    // set the entire extended matrix to zero
    mov.w r1, #0
    mov.w r2, #22*22*2
    bl.w memset

    mov.w r14, #1

    // copy over the input matrix to the left half of the extended matrix
    vmov.w r0, inputoutput_fpu
    vmov.w ai, extmat_fpu
    mov.w ii, #22
    1:
    ldr.w aj1, [r0, #4]
    ldr.w aj2, [r0, #8]
    ldr.w aj3, [r0, #12]
    ldr.w aj0, [r0], #16


    str.w aj1, [ai, #4]
    str.w aj2, [ai, #8]
    str.w aj3, [ai, #12]
    str.w aj0, [ai], #16

    ldrh.w aj1, [r0, #4]
    ldr.w aj0, [r0], #6

    strh.w aj1, [ai, #4]
    str.w aj0, [ai], #6


    add r12, ai, #22
    sub r12, ii
    strb r14, [r12]

    add.w ai, #22


    subs.w ii, ii, #1
    bne.w 1b


    .unreq ai
    .unreq ii
    .unreq aj0
    .unreq aj1
    .unreq aj2
    .unreq aj3
    .unreq inputoutput_fpu
    .unreq extmat_fpu
    pop {r4-r11, pc}


.global gf256mat_inv_m4f_22_extract
.type gf256mat_inv_m4f_22_extract, %function
.align 2
gf256mat_inv_m4f_22_extract:
    push {r4-r11, r14}
    ai .req r3
    ii .req r1
    aj0 .req r9
    aj1 .req r6
    aj2 .req r7
    aj3 .req r8

    inputoutput_fpu .req s11
    extmat_fpu  .req s0
    vmov.w inputoutput_fpu, r0
    vmov.w extmat_fpu, r1

    vmov.w ai, extmat_fpu
    vmov.w r2, inputoutput_fpu
    mov.w ii, #22
    add.w ai, #22
    1:
    ldr.w aj1, [ai, #4]
    ldr.w aj2, [ai, #8]
    ldr.w aj3, [ai, #12]
    ldr.w aj0, [ai], #16


    str.w aj1, [r2, #4]
    str.w aj2, [r2, #8]
    str.w aj3, [r2, #12]
    str.w aj0, [r2], #16

    ldrh.w aj1, [ai, #4]
    ldr.w aj0, [ai], #6+22

    strh.w aj1, [r2, #4]
    str.w aj0, [r2], #6


    subs.w ii, #1
    bne.w 1b

    pop {r4-r11, pc}

    .unreq ai
    .unreq ii
    .unreq aj0
    .unreq aj1
    .unreq aj2
    .unreq aj3
    .unreq inputoutput_fpu
    .unreq extmat_fpu

// computes _gf256v_mul_scalar_u32( ai  , pivot , w);
.macro madd_row pivot, aj, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, c01010101, pconst, fbx0, fbx1, fbx2, fbx3, fbx4, fbx5, fbx6, fbx7, matf0, matf1, matf2, matf3, matf4, matf5, matf6, matf7, matf8, matf9, matf10
    gf256_madd_precompb \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \pivot, \c01010101, \pconst, \tmp8

    ldr.w \tmp4, [\aj, #4*0]
    ldr.w \tmp5, [\aj, #4*1]
    ldr.w \tmp6, [\aj, #4*2]
    ldr.w \tmp7, [\aj, #4*3]

    vmov.w \tmp0, \matf0
    vmov.w \tmp1, \matf1
    vmov.w \tmp2, \matf2
    vmov.w \tmp3, \matf3

    gf256_madd 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    str.w \tmp4, [\aj, #4*0]
    str.w \tmp5, [\aj, #4*1]
    str.w \tmp6, [\aj, #4*2]
    str.w \tmp7, [\aj, #4*3]

    ldr.w \tmp4, [\aj, #4*4]
    ldr.w \tmp5, [\aj, #4*5]
    ldr.w \tmp6, [\aj, #4*6]
    ldr.w \tmp7, [\aj, #4*7]

    vmov.w \tmp0, \matf4
    vmov.w \tmp1, \matf5
    vmov.w \tmp2, \matf6
    vmov.w \tmp3, \matf7

    gf256_madd 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    str.w \tmp4, [\aj, #4*4]
    str.w \tmp5, [\aj, #4*5]
    str.w \tmp6, [\aj, #4*6]
    str.w \tmp7, [\aj, #4*7]

    ldr.w \tmp4, [\aj, #4*8]
    ldr.w \tmp5, [\aj, #4*9]
    ldr.w \tmp6, [\aj, #4*10]

    vmov.w \tmp0, \matf8
    vmov.w \tmp1, \matf9
    vmov.w \tmp2, \matf10

    gf256_madd 3, \tmp4, \tmp5, \tmp6, xxx, \tmp0, \tmp1, \tmp2, xxx, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    str.w \tmp4, [\aj, #4*8]
    str.w \tmp5, [\aj, #4*9]
    str.w \tmp6, [\aj, #4*10]
.endm

// computes (r8 is assumbed to be at sp+0):
// r8 &= gf256_is_nonzero(ai[i]);
// uint8_t pivot = ai[i];
// pivot = gf256_inv( pivot );
// _gf256v_mul_scalar_u32( ai  , pivot , w);
.macro invert_pivot_and_multiply ai, pivotidx, pivot, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, c01010101, pconst, matf0, matf1, matf2, matf3, matf4, matf5, matf6, matf7, matf8, matf9, matf10, fbx0, fbx1, fbx2, fbx3, fbx4, fbx5, fbx6, fbx7
    // invert ai[i]
    ldrb.w \pivot, [\ai, \pivotidx]
    ldrb.w \tmp0, [sp, #0]
    cmp.n \pivot, #0
    it eq
    moveq.w \tmp0, #0
    strb.w \tmp0, [sp, #0]


    gf256_inv \pivot, \tmp0, \tmp1, \tmp2, \tmp3, \tmp4, \tmp5, \tmp6, \tmp7
    gf256_madd_precompb \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \pivot, \c01010101, \pconst, \tmp8

    ldr.w \tmp0, [\ai, #4*0]
    ldr.w \tmp1, [\ai, #4*1]
    ldr.w \tmp2, [\ai, #4*2]
    ldr.w \tmp3, [\ai, #4*3]


    gf256_mul_u32 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    vmov.w \matf0, \tmp4
    vmov.w \matf1, \tmp5
    vmov.w \matf2, \tmp6
    vmov.w \matf3, \tmp7
    str.w \tmp4, [\ai, #4*0]
    str.w \tmp5, [\ai, #4*1]
    str.w \tmp6, [\ai, #4*2]
    str.w \tmp7, [\ai, #4*3]

    ldr.w \tmp0, [\ai, #4*4]
    ldr.w \tmp1, [\ai, #4*5]
    ldr.w \tmp2, [\ai, #4*6]
    ldr.w \tmp3, [\ai, #4*7]

    gf256_mul_u32 4, \tmp4, \tmp5, \tmp6, \tmp7, \tmp0, \tmp1, \tmp2, \tmp3, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    vmov.w \matf4, \tmp4
    vmov.w \matf5, \tmp5
    vmov.w \matf6, \tmp6
    vmov.w \matf7, \tmp7
    str.w \tmp4, [\ai, #4*4]
    str.w \tmp5, [\ai, #4*5]
    str.w \tmp6, [\ai, #4*6]
    str.w \tmp7, [\ai, #4*7]

    ldr.w \tmp0, [\ai, #4*8]
    ldr.w \tmp1, [\ai, #4*9]
    ldr.w \tmp2, [\ai, #4*10]

    gf256_mul_u32 3, \tmp4, \tmp5, \tmp6, xxx, \tmp0, \tmp1, \tmp2, xxx, \fbx0, \fbx1, \fbx2, \fbx3, \fbx4, \fbx5, \fbx6, \fbx7, \c01010101, \tmp8, \tmp9

    vmov.w \matf8, \tmp4
    vmov.w \matf9, \tmp5
    vmov.w \matf10, \tmp6
    str.w \tmp4, [\ai, #4*8]
    str.w \tmp5, [\ai, #4*9]
    str.w \tmp6, [\ai, #4*10]
.endm

.macro conditional_add ai, ii, aj, ajmax, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7
    // for the last row, we don't have rows left to add, so we skip the inner loop
    cmp.w \ii, #21
    beq.w last

    add.w \aj, \ai, #2*22
    rsb.w \ajmax, \ii, #22
    mov.w \tmp0, #2*22
    mla.w \ajmax, \ajmax, \tmp0, \ai

    1:
        ldrb.n \tmp0, [\ai, \ii]
        cmp.n \tmp0, #0

        # got 22*2/4 = 11 words to mult
        .rept 2 // 8 words
            ldr.w \tmp1, [\aj, #4*1]
            ldr.w \tmp2, [\aj, #4*2]
            ldr.w \tmp3, [\aj, #4*3]
            ldr.w \tmp0, [\aj], #16

            ldr.w \tmp4, [\ai, #4*0]
            ldr.w \tmp5, [\ai, #4*1]
            ldr.w \tmp6, [\ai, #4*2]
            ldr.w \tmp7, [\ai, #4*3]

            itttt eq
            eoreq \tmp4, \tmp4, \tmp0
            eoreq \tmp5, \tmp5, \tmp1
            eoreq \tmp6, \tmp6, \tmp2
            eoreq \tmp7, \tmp7, \tmp3

            str.w \tmp5, [\ai, #4*1]
            str.w \tmp6, [\ai, #4*2]
            str.w \tmp7, [\ai, #4*3]
            str.w \tmp4, [\ai], #16
        .endr
        // last 3 words
        ldr.w \tmp2, [\aj, #4*2]
        ldr.w \tmp1, [\aj, #4*1]
        ldr.w \tmp0, [\aj], #12
        ldr.w \tmp4, [\ai, #4*0]
        ldr.w \tmp5, [\ai, #4*1]
        ldr.w \tmp6, [\ai, #4*2]

        ittt eq
        eoreq \tmp4, \tmp4, \tmp0
        eoreq \tmp5, \tmp5, \tmp1
        eoreq \tmp6, \tmp6, \tmp2

        str.w \tmp5, [\ai, #4*1]
        str.w \tmp6, [\ai, #4*2]
        str.w \tmp4, [\ai], #-2*16

        cmp.w \aj, \ajmax
        bne.w 1b
    last:
.endm


.global gf256mat_inv_m4f_22_gauss
.type gf256mat_inv_m4f_22_gauss, %function
.align 2
gf256mat_inv_m4f_22_gauss:
    push.w {r4-r11, r14}
    vpush.w {s16-s31}
    sub.w sp, sp, #24
    mov.w r2, #0
    str.w r2, [sp, #4]

    // return code (0 if not invertible)
    mov.w r3, #1
    str.w r3, [sp, #0]

    // ai
    str.w r0, [sp, #16]
    // mat
    str.w r0, [sp, #20]


    mov.w r12, #0x01010101
    mov.w r14, #0x1b

    mov.w r1, r0
    2:
        str.w r0, [sp, #8]

        conditional_add r1, r2, r0, r3, r4, r5, r6, r7, r8, r9, r10, r11

        invert_pivot_and_multiply r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r2, r0, r12, r14, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s24, s25, s26, s27, s28, s29, s30, s31

        ldr.w r0, [sp, #8]
        ldr.w r2, [sp, #4]
        add.w r1, r0, #22*22*2
        mov.w r3, #22*2
        mla.w r3, r2, r3, r0

        str.w r1, [sp, #8]
        str.w r3, [sp, #12]

        1:
            // check if i == j
            ldr.w r3, [sp, #12]
            cmp.w r3, r0
            beq.w skip

            // load pivot
            ldr r2, [sp, #4]
            ldrb.n r2, [r0, r2]

            // gf256v_madd( aj , ai , aj[i] , w);
            madd_row r2, r0, r1, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r14, s24, s25, s26, s27, s28, s29, s30, s31, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10

        skip:
        ldr.w r1, [sp, #8]
        add.w r0, #22*2
        cmp.w r0, r1
        bne.w 1b

    ldr.w r1, [sp, #16]
    add.w r1, #2*22
    str.w r1, [sp, #16]

    ldr.w r2, [sp, #4]
    add.w r2, #1
    str.w r2, [sp, #4]

    // mat
    ldr.w r0, [sp, #20]
    cmp.w r2, #22
    bne.w 2b


    ldr.w r0, [sp, #0]
    add.w sp, sp, #24

    vpop {s16-s31}
    pop {r4-r11, pc}

.global gf256mat_inv_m4f_22
.type gf256mat_inv_m4f_22, %function
.align 2
gf256mat_inv_m4f_22:
    push.w {r4-r11, r14}
    vpush.w {s16-s19}
    vmov s16, r0
    vmov s17, r1

    sub.w sp, sp, #22*22*2
    mov.w r0, sp
    // initialize matrix
    bl.w gf256mat_inv_m4f_22_init

    mov.w r0, sp
    // actual Gauss elimination
    bl.w gf256mat_inv_m4f_22_gauss

    vmov.w s17, r0
    mov.w r1, sp
    vmov.w r0, s16
    // extract result
    bl.w gf256mat_inv_m4f_22_extract

    add.w sp, sp, #22*22*2
    vmov.w r0, s17
    vpop.w {s16-s19}
    pop {r4-r11, pc}
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API