https://github.com/torvalds/linux
Revision 4fca50d440cc5d4dc570ad5484cc0b70b381bc2a authored by Jan Kara on 08 September 2022, 09:21:24 UTC, committed by Theodore Ts'o on 22 September 2022, 02:11:34 UTC
One of the side-effects of mb_optimize_scan was that the optimized functions to select next group to try were called even before we tried the goal group. As a result we no longer allocate files close to corresponding inodes as well as we don't try to expand currently allocated extent in the same group. This results in reaim regression with workfile.disk workload of upto 8% with many clients on my test machine: baseline mb_optimize_scan Hmean disk-1 2114.16 ( 0.00%) 2099.37 ( -0.70%) Hmean disk-41 87794.43 ( 0.00%) 83787.47 * -4.56%* Hmean disk-81 148170.73 ( 0.00%) 135527.05 * -8.53%* Hmean disk-121 177506.11 ( 0.00%) 166284.93 * -6.32%* Hmean disk-161 220951.51 ( 0.00%) 207563.39 * -6.06%* Hmean disk-201 208722.74 ( 0.00%) 203235.59 ( -2.63%) Hmean disk-241 222051.60 ( 0.00%) 217705.51 ( -1.96%) Hmean disk-281 252244.17 ( 0.00%) 241132.72 * -4.41%* Hmean disk-321 255844.84 ( 0.00%) 245412.84 * -4.08%* Also this is causing huge regression (time increased by a factor of 5 or so) when untarring archive with lots of small files on some eMMC storage cards. Fix the problem by making sure we try goal group first. Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning") CC: stable@kernel.org Reported-and-tested-by: Stefan Wahren <stefan.wahren@i2se.com> Tested-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> Link: https://lore.kernel.org/all/20220727105123.ckwrhbilzrxqpt24@quack3/ Link: https://lore.kernel.org/all/0d81a7c2-46b7-6010-62a4-3e6cfc1628d6@i2se.com/ Signed-off-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20220908092136.11770-1-jack@suse.cz Signed-off-by: Theodore Ts'o <tytso@mit.edu>
1 parent 7e18e42
Tip revision: 4fca50d440cc5d4dc570ad5484cc0b70b381bc2a authored by Jan Kara on 08 September 2022, 09:21:24 UTC
ext4: make mballoc try target group first even with mb_optimize_scan
ext4: make mballoc try target group first even with mb_optimize_scan
Tip revision: 4fca50d
rmd160.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Cryptographic API.
*
* RIPEMD-160 - RACE Integrity Primitives Evaluation Message Digest.
*
* Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC
*
* Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <asm/byteorder.h>
#include "ripemd.h"
struct rmd160_ctx {
u64 byte_count;
u32 state[5];
__le32 buffer[16];
};
#define K1 RMD_K1
#define K2 RMD_K2
#define K3 RMD_K3
#define K4 RMD_K4
#define K5 RMD_K5
#define KK1 RMD_K6
#define KK2 RMD_K7
#define KK3 RMD_K8
#define KK4 RMD_K9
#define KK5 RMD_K1
#define F1(x, y, z) (x ^ y ^ z) /* XOR */
#define F2(x, y, z) (z ^ (x & (y ^ z))) /* x ? y : z */
#define F3(x, y, z) ((x | ~y) ^ z)
#define F4(x, y, z) (y ^ (z & (x ^ y))) /* z ? x : y */
#define F5(x, y, z) (x ^ (y | ~z))
#define ROUND(a, b, c, d, e, f, k, x, s) { \
(a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k); \
(a) = rol32((a), (s)) + (e); \
(c) = rol32((c), 10); \
}
static void rmd160_transform(u32 *state, const __le32 *in)
{
u32 aa, bb, cc, dd, ee, aaa, bbb, ccc, ddd, eee;
/* Initialize left lane */
aa = state[0];
bb = state[1];
cc = state[2];
dd = state[3];
ee = state[4];
/* Initialize right lane */
aaa = state[0];
bbb = state[1];
ccc = state[2];
ddd = state[3];
eee = state[4];
/* round 1: left lane */
ROUND(aa, bb, cc, dd, ee, F1, K1, in[0], 11);
ROUND(ee, aa, bb, cc, dd, F1, K1, in[1], 14);
ROUND(dd, ee, aa, bb, cc, F1, K1, in[2], 15);
ROUND(cc, dd, ee, aa, bb, F1, K1, in[3], 12);
ROUND(bb, cc, dd, ee, aa, F1, K1, in[4], 5);
ROUND(aa, bb, cc, dd, ee, F1, K1, in[5], 8);
ROUND(ee, aa, bb, cc, dd, F1, K1, in[6], 7);
ROUND(dd, ee, aa, bb, cc, F1, K1, in[7], 9);
ROUND(cc, dd, ee, aa, bb, F1, K1, in[8], 11);
ROUND(bb, cc, dd, ee, aa, F1, K1, in[9], 13);
ROUND(aa, bb, cc, dd, ee, F1, K1, in[10], 14);
ROUND(ee, aa, bb, cc, dd, F1, K1, in[11], 15);
ROUND(dd, ee, aa, bb, cc, F1, K1, in[12], 6);
ROUND(cc, dd, ee, aa, bb, F1, K1, in[13], 7);
ROUND(bb, cc, dd, ee, aa, F1, K1, in[14], 9);
ROUND(aa, bb, cc, dd, ee, F1, K1, in[15], 8);
/* round 2: left lane" */
ROUND(ee, aa, bb, cc, dd, F2, K2, in[7], 7);
ROUND(dd, ee, aa, bb, cc, F2, K2, in[4], 6);
ROUND(cc, dd, ee, aa, bb, F2, K2, in[13], 8);
ROUND(bb, cc, dd, ee, aa, F2, K2, in[1], 13);
ROUND(aa, bb, cc, dd, ee, F2, K2, in[10], 11);
ROUND(ee, aa, bb, cc, dd, F2, K2, in[6], 9);
ROUND(dd, ee, aa, bb, cc, F2, K2, in[15], 7);
ROUND(cc, dd, ee, aa, bb, F2, K2, in[3], 15);
ROUND(bb, cc, dd, ee, aa, F2, K2, in[12], 7);
ROUND(aa, bb, cc, dd, ee, F2, K2, in[0], 12);
ROUND(ee, aa, bb, cc, dd, F2, K2, in[9], 15);
ROUND(dd, ee, aa, bb, cc, F2, K2, in[5], 9);
ROUND(cc, dd, ee, aa, bb, F2, K2, in[2], 11);
ROUND(bb, cc, dd, ee, aa, F2, K2, in[14], 7);
ROUND(aa, bb, cc, dd, ee, F2, K2, in[11], 13);
ROUND(ee, aa, bb, cc, dd, F2, K2, in[8], 12);
/* round 3: left lane" */
ROUND(dd, ee, aa, bb, cc, F3, K3, in[3], 11);
ROUND(cc, dd, ee, aa, bb, F3, K3, in[10], 13);
ROUND(bb, cc, dd, ee, aa, F3, K3, in[14], 6);
ROUND(aa, bb, cc, dd, ee, F3, K3, in[4], 7);
ROUND(ee, aa, bb, cc, dd, F3, K3, in[9], 14);
ROUND(dd, ee, aa, bb, cc, F3, K3, in[15], 9);
ROUND(cc, dd, ee, aa, bb, F3, K3, in[8], 13);
ROUND(bb, cc, dd, ee, aa, F3, K3, in[1], 15);
ROUND(aa, bb, cc, dd, ee, F3, K3, in[2], 14);
ROUND(ee, aa, bb, cc, dd, F3, K3, in[7], 8);
ROUND(dd, ee, aa, bb, cc, F3, K3, in[0], 13);
ROUND(cc, dd, ee, aa, bb, F3, K3, in[6], 6);
ROUND(bb, cc, dd, ee, aa, F3, K3, in[13], 5);
ROUND(aa, bb, cc, dd, ee, F3, K3, in[11], 12);
ROUND(ee, aa, bb, cc, dd, F3, K3, in[5], 7);
ROUND(dd, ee, aa, bb, cc, F3, K3, in[12], 5);
/* round 4: left lane" */
ROUND(cc, dd, ee, aa, bb, F4, K4, in[1], 11);
ROUND(bb, cc, dd, ee, aa, F4, K4, in[9], 12);
ROUND(aa, bb, cc, dd, ee, F4, K4, in[11], 14);
ROUND(ee, aa, bb, cc, dd, F4, K4, in[10], 15);
ROUND(dd, ee, aa, bb, cc, F4, K4, in[0], 14);
ROUND(cc, dd, ee, aa, bb, F4, K4, in[8], 15);
ROUND(bb, cc, dd, ee, aa, F4, K4, in[12], 9);
ROUND(aa, bb, cc, dd, ee, F4, K4, in[4], 8);
ROUND(ee, aa, bb, cc, dd, F4, K4, in[13], 9);
ROUND(dd, ee, aa, bb, cc, F4, K4, in[3], 14);
ROUND(cc, dd, ee, aa, bb, F4, K4, in[7], 5);
ROUND(bb, cc, dd, ee, aa, F4, K4, in[15], 6);
ROUND(aa, bb, cc, dd, ee, F4, K4, in[14], 8);
ROUND(ee, aa, bb, cc, dd, F4, K4, in[5], 6);
ROUND(dd, ee, aa, bb, cc, F4, K4, in[6], 5);
ROUND(cc, dd, ee, aa, bb, F4, K4, in[2], 12);
/* round 5: left lane" */
ROUND(bb, cc, dd, ee, aa, F5, K5, in[4], 9);
ROUND(aa, bb, cc, dd, ee, F5, K5, in[0], 15);
ROUND(ee, aa, bb, cc, dd, F5, K5, in[5], 5);
ROUND(dd, ee, aa, bb, cc, F5, K5, in[9], 11);
ROUND(cc, dd, ee, aa, bb, F5, K5, in[7], 6);
ROUND(bb, cc, dd, ee, aa, F5, K5, in[12], 8);
ROUND(aa, bb, cc, dd, ee, F5, K5, in[2], 13);
ROUND(ee, aa, bb, cc, dd, F5, K5, in[10], 12);
ROUND(dd, ee, aa, bb, cc, F5, K5, in[14], 5);
ROUND(cc, dd, ee, aa, bb, F5, K5, in[1], 12);
ROUND(bb, cc, dd, ee, aa, F5, K5, in[3], 13);
ROUND(aa, bb, cc, dd, ee, F5, K5, in[8], 14);
ROUND(ee, aa, bb, cc, dd, F5, K5, in[11], 11);
ROUND(dd, ee, aa, bb, cc, F5, K5, in[6], 8);
ROUND(cc, dd, ee, aa, bb, F5, K5, in[15], 5);
ROUND(bb, cc, dd, ee, aa, F5, K5, in[13], 6);
/* round 1: right lane */
ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[5], 8);
ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[14], 9);
ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[7], 9);
ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[0], 11);
ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[9], 13);
ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[2], 15);
ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[11], 15);
ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[4], 5);
ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[13], 7);
ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[6], 7);
ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[15], 8);
ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[8], 11);
ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[1], 14);
ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[10], 14);
ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[3], 12);
ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[12], 6);
/* round 2: right lane */
ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[6], 9);
ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[11], 13);
ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[3], 15);
ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[7], 7);
ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[0], 12);
ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[13], 8);
ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[5], 9);
ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[10], 11);
ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[14], 7);
ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[15], 7);
ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[8], 12);
ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[12], 7);
ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[4], 6);
ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[9], 15);
ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[1], 13);
ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[2], 11);
/* round 3: right lane */
ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[15], 9);
ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[5], 7);
ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[1], 15);
ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[3], 11);
ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[7], 8);
ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[14], 6);
ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[6], 6);
ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[9], 14);
ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[11], 12);
ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[8], 13);
ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[12], 5);
ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[2], 14);
ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[10], 13);
ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[0], 13);
ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[4], 7);
ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[13], 5);
/* round 4: right lane */
ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[8], 15);
ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[6], 5);
ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[4], 8);
ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[1], 11);
ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[3], 14);
ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[11], 14);
ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[15], 6);
ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[0], 14);
ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[5], 6);
ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[12], 9);
ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[2], 12);
ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[13], 9);
ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[9], 12);
ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[7], 5);
ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[10], 15);
ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[14], 8);
/* round 5: right lane */
ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[12], 8);
ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[15], 5);
ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[10], 12);
ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[4], 9);
ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[1], 12);
ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[5], 5);
ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[8], 14);
ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[7], 6);
ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[6], 8);
ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[2], 13);
ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[13], 6);
ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[14], 5);
ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[0], 15);
ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[3], 13);
ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[9], 11);
ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[11], 11);
/* combine results */
ddd += cc + state[1]; /* final result for state[0] */
state[1] = state[2] + dd + eee;
state[2] = state[3] + ee + aaa;
state[3] = state[4] + aa + bbb;
state[4] = state[0] + bb + ccc;
state[0] = ddd;
}
static int rmd160_init(struct shash_desc *desc)
{
struct rmd160_ctx *rctx = shash_desc_ctx(desc);
rctx->byte_count = 0;
rctx->state[0] = RMD_H0;
rctx->state[1] = RMD_H1;
rctx->state[2] = RMD_H2;
rctx->state[3] = RMD_H3;
rctx->state[4] = RMD_H4;
memset(rctx->buffer, 0, sizeof(rctx->buffer));
return 0;
}
static int rmd160_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct rmd160_ctx *rctx = shash_desc_ctx(desc);
const u32 avail = sizeof(rctx->buffer) - (rctx->byte_count & 0x3f);
rctx->byte_count += len;
/* Enough space in buffer? If so copy and we're done */
if (avail > len) {
memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail),
data, len);
goto out;
}
memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail),
data, avail);
rmd160_transform(rctx->state, rctx->buffer);
data += avail;
len -= avail;
while (len >= sizeof(rctx->buffer)) {
memcpy(rctx->buffer, data, sizeof(rctx->buffer));
rmd160_transform(rctx->state, rctx->buffer);
data += sizeof(rctx->buffer);
len -= sizeof(rctx->buffer);
}
memcpy(rctx->buffer, data, len);
out:
return 0;
}
/* Add padding and return the message digest. */
static int rmd160_final(struct shash_desc *desc, u8 *out)
{
struct rmd160_ctx *rctx = shash_desc_ctx(desc);
u32 i, index, padlen;
__le64 bits;
__le32 *dst = (__le32 *)out;
static const u8 padding[64] = { 0x80, };
bits = cpu_to_le64(rctx->byte_count << 3);
/* Pad out to 56 mod 64 */
index = rctx->byte_count & 0x3f;
padlen = (index < 56) ? (56 - index) : ((64+56) - index);
rmd160_update(desc, padding, padlen);
/* Append length */
rmd160_update(desc, (const u8 *)&bits, sizeof(bits));
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_le32p(&rctx->state[i]);
/* Wipe context */
memset(rctx, 0, sizeof(*rctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = RMD160_DIGEST_SIZE,
.init = rmd160_init,
.update = rmd160_update,
.final = rmd160_final,
.descsize = sizeof(struct rmd160_ctx),
.base = {
.cra_name = "rmd160",
.cra_driver_name = "rmd160-generic",
.cra_blocksize = RMD160_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init rmd160_mod_init(void)
{
return crypto_register_shash(&alg);
}
static void __exit rmd160_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
subsys_initcall(rmd160_mod_init);
module_exit(rmd160_mod_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
MODULE_DESCRIPTION("RIPEMD-160 Message Digest");
MODULE_ALIAS_CRYPTO("rmd160");
Computing file changes ...