https://github.com/torvalds/linux
Revision 4fca50d440cc5d4dc570ad5484cc0b70b381bc2a authored by Jan Kara on 08 September 2022, 09:21:24 UTC, committed by Theodore Ts'o on 22 September 2022, 02:11:34 UTC
One of the side-effects of mb_optimize_scan was that the optimized functions to select next group to try were called even before we tried the goal group. As a result we no longer allocate files close to corresponding inodes as well as we don't try to expand currently allocated extent in the same group. This results in reaim regression with workfile.disk workload of upto 8% with many clients on my test machine: baseline mb_optimize_scan Hmean disk-1 2114.16 ( 0.00%) 2099.37 ( -0.70%) Hmean disk-41 87794.43 ( 0.00%) 83787.47 * -4.56%* Hmean disk-81 148170.73 ( 0.00%) 135527.05 * -8.53%* Hmean disk-121 177506.11 ( 0.00%) 166284.93 * -6.32%* Hmean disk-161 220951.51 ( 0.00%) 207563.39 * -6.06%* Hmean disk-201 208722.74 ( 0.00%) 203235.59 ( -2.63%) Hmean disk-241 222051.60 ( 0.00%) 217705.51 ( -1.96%) Hmean disk-281 252244.17 ( 0.00%) 241132.72 * -4.41%* Hmean disk-321 255844.84 ( 0.00%) 245412.84 * -4.08%* Also this is causing huge regression (time increased by a factor of 5 or so) when untarring archive with lots of small files on some eMMC storage cards. Fix the problem by making sure we try goal group first. Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning") CC: stable@kernel.org Reported-and-tested-by: Stefan Wahren <stefan.wahren@i2se.com> Tested-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> Link: https://lore.kernel.org/all/20220727105123.ckwrhbilzrxqpt24@quack3/ Link: https://lore.kernel.org/all/0d81a7c2-46b7-6010-62a4-3e6cfc1628d6@i2se.com/ Signed-off-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20220908092136.11770-1-jack@suse.cz Signed-off-by: Theodore Ts'o <tytso@mit.edu>
1 parent 7e18e42
Tip revision: 4fca50d440cc5d4dc570ad5484cc0b70b381bc2a authored by Jan Kara on 08 September 2022, 09:21:24 UTC
ext4: make mballoc try target group first even with mb_optimize_scan
ext4: make mballoc try target group first even with mb_optimize_scan
Tip revision: 4fca50d
crc32c_generic.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Cryptographic API.
*
* CRC32C chksum
*
*@Article{castagnoli-crc,
* author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
* title = {{Optimization of Cyclic Redundancy-Check Codes with 24
* and 32 Parity Bits}},
* journal = IEEE Transactions on Communication,
* year = {1993},
* volume = {41},
* number = {6},
* pages = {},
* month = {June},
*}
* Used by the iSCSI driver, possibly others, and derived from
* the iscsi-crc.c module of the linux-iscsi driver at
* http://linux-iscsi.sourceforge.net.
*
* Following the example of lib/crc32, this function is intended to be
* flexible and useful for all users. Modules that currently have their
* own crc32c, but hopefully may be able to use this one are:
* net/sctp (please add all your doco to here if you change to
* use this one!)
* <endoflist>
*
* Copyright (c) 2004 Cisco Systems, Inc.
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
*/
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/crc32.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
struct chksum_ctx {
u32 key;
};
struct chksum_desc_ctx {
u32 crc;
};
/*
* Steps through buffer one byte at a time, calculates reflected
* crc using table.
*/
static int chksum_init(struct shash_desc *desc)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = mctx->key;
return 0;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set
* the seed.
*/
static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
{
struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
if (keylen != sizeof(mctx->key))
return -EINVAL;
mctx->key = get_unaligned_le32(key);
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = __crc32c_le(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
put_unaligned_le32(~ctx->crc, out);
return 0;
}
static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
{
put_unaligned_le32(~__crc32c_le(*crcp, data, len), out);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
return __chksum_finup(&mctx->key, data, length, out);
}
static int crc32c_cra_init(struct crypto_tfm *tfm)
{
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
mctx->key = ~0;
return 0;
}
static struct shash_alg alg = {
.digestsize = CHKSUM_DIGEST_SIZE,
.setkey = chksum_setkey,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-generic",
.cra_priority = 100,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = crc32c_cra_init,
}
};
static int __init crc32c_mod_init(void)
{
return crypto_register_shash(&alg);
}
static void __exit crc32c_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
subsys_initcall(crc32c_mod_init);
module_exit(crc32c_mod_fini);
MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crc32c");
MODULE_ALIAS_CRYPTO("crc32c-generic");
Computing file changes ...