Revision b91e1302ad9b80c174a4855533f7e3aa2873355e authored by Linus Torvalds on 27 December 2016, 19:40:38 UTC, committed by Linus Torvalds on 29 December 2016, 19:03:15 UTC
In commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Nick Piggin made our page locking no longer unconditionally touch the hashed page waitqueue, which not only helps performance in general, but is particularly helpful on NUMA machines where the hashed wait queues can bounce around a lot. However, the "clear lock bit atomically and then test the waiters bit" sequence turns out to be much more expensive than it needs to be, because you get a nasty stall when trying to access the same word that just got updated atomically. On architectures where locking is done with LL/SC, this would be trivial to fix with a new primitive that clears one bit and tests another atomically, but that ends up not working on x86, where the only atomic operations that return the result end up being cmpxchg and xadd. The atomic bit operations return the old value of the same bit we changed, not the value of an unrelated bit. On x86, we could put the lock bit in the high bit of the byte, and use "xadd" with that bit (where the overflow ends up not touching other bits), and look at the other bits of the result. However, an even simpler model is to just use a regular atomic "and" to clear the lock bit, and then the sign bit in eflags will indicate the resulting state of the unrelated bit #7. So by moving the PageWaiters bit up to bit #7, we can atomically clear the lock bit and test the waiters bit on x86 too. And architectures with LL/SC (which is all the usual RISC suspects), the particular bit doesn't matter, so they are fine with this approach too. This avoids the extra access to the same atomic word, and thus avoids the costly stall at page unlock time. The only downside is that the interface ends up being a bit odd and specialized: clear a bit in a byte, and test the sign bit. Nick doesn't love the resulting name of the new primitive, but I'd rather make the name be descriptive and very clear about the limitation imposed by trying to work across all relevant architectures than make it be some generic thing that doesn't make the odd semantics explicit. So this introduces the new architecture primitive clear_bit_unlock_is_negative_byte(); and adds the trivial implementation for x86. We have a generic non-optimized fallback (that just does a "clear_bit()"+"test_bit(7)" combination) which can be overridden by any architecture that can do better. According to Nick, Power has the same hickup x86 has, for example, but some other architectures may not even care. All these optimizations mean that my page locking stress-test (which is just executing a lot of small short-lived shell scripts: "make test" in the git source tree) no longer makes our page locking look horribly bad. Before all these optimizations, just the unlock_page() costs were just over 3% of all CPU overhead on "make test". After this, it's down to 0.66%, so just a quarter of the cost it used to be. (The difference on NUMA is bigger, but there this micro-optimization is likely less noticeable, since the big issue on NUMA was not the accesses to 'struct page', but the waitqueue accesses that were already removed by Nick's earlier commit). Acked-by: Nick Piggin <npiggin@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Bob Peterson <rpeterso@redhat.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Andrew Lutomirski <luto@kernel.org> Cc: Andreas Gruenbacher <agruenba@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 2d706e7
balloc.c
/*
* linux/fs/ufs/balloc.c
*
* Copyright (C) 1998
* Daniel Pirkl <daniel.pirkl@email.cz>
* Charles University, Faculty of Mathematics and Physics
*
* UFS2 write support Evgeniy Dushistov <dushistov@mail.ru>, 2007
*/
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/time.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/capability.h>
#include <linux/bitops.h>
#include <linux/bio.h>
#include <asm/byteorder.h>
#include "ufs_fs.h"
#include "ufs.h"
#include "swab.h"
#include "util.h"
#define INVBLOCK ((u64)-1L)
static u64 ufs_add_fragments(struct inode *, u64, unsigned, unsigned);
static u64 ufs_alloc_fragments(struct inode *, unsigned, u64, unsigned, int *);
static u64 ufs_alloccg_block(struct inode *, struct ufs_cg_private_info *, u64, int *);
static u64 ufs_bitmap_search (struct super_block *, struct ufs_cg_private_info *, u64, unsigned);
static unsigned char ufs_fragtable_8fpb[], ufs_fragtable_other[];
static void ufs_clusteracct(struct super_block *, struct ufs_cg_private_info *, unsigned, int);
/*
* Free 'count' fragments from fragment number 'fragment'
*/
void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned cgno, bit, end_bit, bbase, blkmap, i;
u64 blkno;
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
UFSD("ENTER, fragment %llu, count %u\n",
(unsigned long long)fragment, count);
if (ufs_fragnum(fragment) + count > uspi->s_fpg)
ufs_error (sb, "ufs_free_fragments", "internal error");
mutex_lock(&UFS_SB(sb)->s_lock);
cgno = ufs_dtog(uspi, fragment);
bit = ufs_dtogd(uspi, fragment);
if (cgno >= uspi->s_ncg) {
ufs_panic (sb, "ufs_free_fragments", "freeing blocks are outside device");
goto failed;
}
ucpi = ufs_load_cylinder (sb, cgno);
if (!ucpi)
goto failed;
ucg = ubh_get_ucg (UCPI_UBH(ucpi));
if (!ufs_cg_chkmagic(sb, ucg)) {
ufs_panic (sb, "ufs_free_fragments", "internal error, bad magic number on cg %u", cgno);
goto failed;
}
end_bit = bit + count;
bbase = ufs_blknum (bit);
blkmap = ubh_blkmap (UCPI_UBH(ucpi), ucpi->c_freeoff, bbase);
ufs_fragacct (sb, blkmap, ucg->cg_frsum, -1);
for (i = bit; i < end_bit; i++) {
if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, i))
ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, i);
else
ufs_error (sb, "ufs_free_fragments",
"bit already cleared for fragment %u", i);
}
fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
uspi->cs_total.cs_nffree += count;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
blkmap = ubh_blkmap (UCPI_UBH(ucpi), ucpi->c_freeoff, bbase);
ufs_fragacct(sb, blkmap, ucg->cg_frsum, 1);
/*
* Trying to reassemble free fragments into block
*/
blkno = ufs_fragstoblks (bbase);
if (ubh_isblockset(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno)) {
fs32_sub(sb, &ucg->cg_cs.cs_nffree, uspi->s_fpb);
uspi->cs_total.cs_nffree -= uspi->s_fpb;
fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, uspi->s_fpb);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, 1);
fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree++;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1);
if (uspi->fs_magic != UFS2_MAGIC) {
unsigned cylno = ufs_cbtocylno (bbase);
fs16_add(sb, &ubh_cg_blks(ucpi, cylno,
ufs_cbtorpos(bbase)), 1);
fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
}
}
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
if (sb->s_flags & MS_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT\n");
return;
failed:
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT (FAILED)\n");
return;
}
/*
* Free 'count' fragments from fragment number 'fragment' (free whole blocks)
*/
void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned overflow, cgno, bit, end_bit, i;
u64 blkno;
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
UFSD("ENTER, fragment %llu, count %u\n",
(unsigned long long)fragment, count);
if ((fragment & uspi->s_fpbmask) || (count & uspi->s_fpbmask)) {
ufs_error (sb, "ufs_free_blocks", "internal error, "
"fragment %llu, count %u\n",
(unsigned long long)fragment, count);
goto failed;
}
mutex_lock(&UFS_SB(sb)->s_lock);
do_more:
overflow = 0;
cgno = ufs_dtog(uspi, fragment);
bit = ufs_dtogd(uspi, fragment);
if (cgno >= uspi->s_ncg) {
ufs_panic (sb, "ufs_free_blocks", "freeing blocks are outside device");
goto failed_unlock;
}
end_bit = bit + count;
if (end_bit > uspi->s_fpg) {
overflow = bit + count - uspi->s_fpg;
count -= overflow;
end_bit -= overflow;
}
ucpi = ufs_load_cylinder (sb, cgno);
if (!ucpi)
goto failed_unlock;
ucg = ubh_get_ucg (UCPI_UBH(ucpi));
if (!ufs_cg_chkmagic(sb, ucg)) {
ufs_panic (sb, "ufs_free_blocks", "internal error, bad magic number on cg %u", cgno);
goto failed_unlock;
}
for (i = bit; i < end_bit; i += uspi->s_fpb) {
blkno = ufs_fragstoblks(i);
if (ubh_isblockset(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno)) {
ufs_error(sb, "ufs_free_blocks", "freeing free fragment");
}
ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, 1);
fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree++;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1);
if (uspi->fs_magic != UFS2_MAGIC) {
unsigned cylno = ufs_cbtocylno(i);
fs16_add(sb, &ubh_cg_blks(ucpi, cylno,
ufs_cbtorpos(i)), 1);
fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
}
}
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
if (sb->s_flags & MS_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
if (overflow) {
fragment += count;
count = overflow;
goto do_more;
}
ufs_mark_sb_dirty(sb);
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT\n");
return;
failed_unlock:
mutex_unlock(&UFS_SB(sb)->s_lock);
failed:
UFSD("EXIT (FAILED)\n");
return;
}
/*
* Modify inode page cache in such way:
* have - blocks with b_blocknr equal to oldb...oldb+count-1
* get - blocks with b_blocknr equal to newb...newb+count-1
* also we suppose that oldb...oldb+count-1 blocks
* situated at the end of file.
*
* We can come here from ufs_writepage or ufs_prepare_write,
* locked_page is argument of these functions, so we already lock it.
*/
static void ufs_change_blocknr(struct inode *inode, sector_t beg,
unsigned int count, sector_t oldb,
sector_t newb, struct page *locked_page)
{
const unsigned blks_per_page =
1 << (PAGE_SHIFT - inode->i_blkbits);
const unsigned mask = blks_per_page - 1;
struct address_space * const mapping = inode->i_mapping;
pgoff_t index, cur_index, last_index;
unsigned pos, j, lblock;
sector_t end, i;
struct page *page;
struct buffer_head *head, *bh;
UFSD("ENTER, ino %lu, count %u, oldb %llu, newb %llu\n",
inode->i_ino, count,
(unsigned long long)oldb, (unsigned long long)newb);
BUG_ON(!locked_page);
BUG_ON(!PageLocked(locked_page));
cur_index = locked_page->index;
end = count + beg;
last_index = end >> (PAGE_SHIFT - inode->i_blkbits);
for (i = beg; i < end; i = (i | mask) + 1) {
index = i >> (PAGE_SHIFT - inode->i_blkbits);
if (likely(cur_index != index)) {
page = ufs_get_locked_page(mapping, index);
if (!page)/* it was truncated */
continue;
if (IS_ERR(page)) {/* or EIO */
ufs_error(inode->i_sb, __func__,
"read of page %llu failed\n",
(unsigned long long)index);
continue;
}
} else
page = locked_page;
head = page_buffers(page);
bh = head;
pos = i & mask;
for (j = 0; j < pos; ++j)
bh = bh->b_this_page;
if (unlikely(index == last_index))
lblock = end & mask;
else
lblock = blks_per_page;
do {
if (j >= lblock)
break;
pos = (i - beg) + j;
if (!buffer_mapped(bh))
map_bh(bh, inode->i_sb, oldb + pos);
if (!buffer_uptodate(bh)) {
ll_rw_block(REQ_OP_READ, 0, 1, &bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
ufs_error(inode->i_sb, __func__,
"read of block failed\n");
break;
}
}
UFSD(" change from %llu to %llu, pos %u\n",
(unsigned long long)(pos + oldb),
(unsigned long long)(pos + newb), pos);
bh->b_blocknr = newb + pos;
clean_bdev_bh_alias(bh);
mark_buffer_dirty(bh);
++j;
bh = bh->b_this_page;
} while (bh != head);
if (likely(cur_index != index))
ufs_put_locked_page(page);
}
UFSD("EXIT\n");
}
static void ufs_clear_frags(struct inode *inode, sector_t beg, unsigned int n,
int sync)
{
struct buffer_head *bh;
sector_t end = beg + n;
for (; beg < end; ++beg) {
bh = sb_getblk(inode->i_sb, beg);
lock_buffer(bh);
memset(bh->b_data, 0, inode->i_sb->s_blocksize);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
unlock_buffer(bh);
if (IS_SYNC(inode) || sync)
sync_dirty_buffer(bh);
brelse(bh);
}
}
u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
u64 goal, unsigned count, int *err,
struct page *locked_page)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
unsigned cgno, oldcount, newcount;
u64 tmp, request, result;
UFSD("ENTER, ino %lu, fragment %llu, goal %llu, count %u\n",
inode->i_ino, (unsigned long long)fragment,
(unsigned long long)goal, count);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
usb1 = ubh_get_usb_first(uspi);
*err = -ENOSPC;
mutex_lock(&UFS_SB(sb)->s_lock);
tmp = ufs_data_ptr_to_cpu(sb, p);
if (count + ufs_fragnum(fragment) > uspi->s_fpb) {
ufs_warning(sb, "ufs_new_fragments", "internal warning"
" fragment %llu, count %u",
(unsigned long long)fragment, count);
count = uspi->s_fpb - ufs_fragnum(fragment);
}
oldcount = ufs_fragnum (fragment);
newcount = oldcount + count;
/*
* Somebody else has just allocated our fragments
*/
if (oldcount) {
if (!tmp) {
ufs_error(sb, "ufs_new_fragments", "internal error, "
"fragment %llu, tmp %llu\n",
(unsigned long long)fragment,
(unsigned long long)tmp);
mutex_unlock(&UFS_SB(sb)->s_lock);
return INVBLOCK;
}
if (fragment < UFS_I(inode)->i_lastfrag) {
UFSD("EXIT (ALREADY ALLOCATED)\n");
mutex_unlock(&UFS_SB(sb)->s_lock);
return 0;
}
}
else {
if (tmp) {
UFSD("EXIT (ALREADY ALLOCATED)\n");
mutex_unlock(&UFS_SB(sb)->s_lock);
return 0;
}
}
/*
* There is not enough space for user on the device
*/
if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT (FAILED)\n");
return 0;
}
if (goal >= uspi->s_size)
goal = 0;
if (goal == 0)
cgno = ufs_inotocg (inode->i_ino);
else
cgno = ufs_dtog(uspi, goal);
/*
* allocate new fragment
*/
if (oldcount == 0) {
result = ufs_alloc_fragments (inode, cgno, goal, count, err);
if (result) {
ufs_clear_frags(inode, result + oldcount,
newcount - oldcount, locked_page != NULL);
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result);
write_sequnlock(&UFS_I(inode)->meta_lock);
*err = 0;
UFS_I(inode)->i_lastfrag =
max(UFS_I(inode)->i_lastfrag, fragment + count);
}
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
/*
* resize block
*/
result = ufs_add_fragments(inode, tmp, oldcount, newcount);
if (result) {
*err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
/*
* allocate new block and move data
*/
switch (fs32_to_cpu(sb, usb1->fs_optim)) {
case UFS_OPTSPACE:
request = newcount;
if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree
> uspi->s_dsize * uspi->s_minfree / (2 * 100))
break;
usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
break;
default:
usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
case UFS_OPTTIME:
request = uspi->s_fpb;
if (uspi->cs_total.cs_nffree < uspi->s_dsize *
(uspi->s_minfree - 2) / 100)
break;
usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
break;
}
result = ufs_alloc_fragments (inode, cgno, goal, request, err);
if (result) {
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
ufs_change_blocknr(inode, fragment - oldcount, oldcount,
uspi->s_sbbase + tmp,
uspi->s_sbbase + result, locked_page);
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result);
write_sequnlock(&UFS_I(inode)->meta_lock);
*err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
mutex_unlock(&UFS_SB(sb)->s_lock);
if (newcount < request)
ufs_free_fragments (inode, result + newcount, request - newcount);
ufs_free_fragments (inode, tmp, oldcount);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT (FAILED)\n");
return 0;
}
static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
unsigned oldcount, unsigned newcount)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned cgno, fragno, fragoff, count, fragsize, i;
UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n",
(unsigned long long)fragment, oldcount, newcount);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
count = newcount - oldcount;
cgno = ufs_dtog(uspi, fragment);
if (fs32_to_cpu(sb, UFS_SB(sb)->fs_cs(cgno).cs_nffree) < count)
return 0;
if ((ufs_fragnum (fragment) + newcount) > uspi->s_fpb)
return 0;
ucpi = ufs_load_cylinder (sb, cgno);
if (!ucpi)
return 0;
ucg = ubh_get_ucg (UCPI_UBH(ucpi));
if (!ufs_cg_chkmagic(sb, ucg)) {
ufs_panic (sb, "ufs_add_fragments",
"internal error, bad magic number on cg %u", cgno);
return 0;
}
fragno = ufs_dtogd(uspi, fragment);
fragoff = ufs_fragnum (fragno);
for (i = oldcount; i < newcount; i++)
if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
return 0;
/*
* Block can be extended
*/
ucg->cg_time = cpu_to_fs32(sb, get_seconds());
for (i = newcount; i < (uspi->s_fpb - fragoff); i++)
if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
break;
fragsize = i - oldcount;
if (!fs32_to_cpu(sb, ucg->cg_frsum[fragsize]))
ufs_panic (sb, "ufs_add_fragments",
"internal error or corrupted bitmap on cg %u", cgno);
fs32_sub(sb, &ucg->cg_frsum[fragsize], 1);
if (fragsize != count)
fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1);
for (i = oldcount; i < newcount; i++)
ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i);
fs32_sub(sb, &ucg->cg_cs.cs_nffree, count);
fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
uspi->cs_total.cs_nffree -= count;
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
if (sb->s_flags & MS_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
return fragment;
}
#define UFS_TEST_FREE_SPACE_CG \
ucg = (struct ufs_cylinder_group *) UFS_SB(sb)->s_ucg[cgno]->b_data; \
if (fs32_to_cpu(sb, ucg->cg_cs.cs_nbfree)) \
goto cg_found; \
for (k = count; k < uspi->s_fpb; k++) \
if (fs32_to_cpu(sb, ucg->cg_frsum[k])) \
goto cg_found;
static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
u64 goal, unsigned count, int *err)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned oldcg, i, j, k, allocsize;
u64 result;
UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
inode->i_ino, cgno, (unsigned long long)goal, count);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
oldcg = cgno;
/*
* 1. searching on preferred cylinder group
*/
UFS_TEST_FREE_SPACE_CG
/*
* 2. quadratic rehash
*/
for (j = 1; j < uspi->s_ncg; j *= 2) {
cgno += j;
if (cgno >= uspi->s_ncg)
cgno -= uspi->s_ncg;
UFS_TEST_FREE_SPACE_CG
}
/*
* 3. brute force search
* We start at i = 2 ( 0 is checked at 1.step, 1 at 2.step )
*/
cgno = (oldcg + 1) % uspi->s_ncg;
for (j = 2; j < uspi->s_ncg; j++) {
cgno++;
if (cgno >= uspi->s_ncg)
cgno = 0;
UFS_TEST_FREE_SPACE_CG
}
UFSD("EXIT (FAILED)\n");
return 0;
cg_found:
ucpi = ufs_load_cylinder (sb, cgno);
if (!ucpi)
return 0;
ucg = ubh_get_ucg (UCPI_UBH(ucpi));
if (!ufs_cg_chkmagic(sb, ucg))
ufs_panic (sb, "ufs_alloc_fragments",
"internal error, bad magic number on cg %u", cgno);
ucg->cg_time = cpu_to_fs32(sb, get_seconds());
if (count == uspi->s_fpb) {
result = ufs_alloccg_block (inode, ucpi, goal, err);
if (result == INVBLOCK)
return 0;
goto succed;
}
for (allocsize = count; allocsize < uspi->s_fpb; allocsize++)
if (fs32_to_cpu(sb, ucg->cg_frsum[allocsize]) != 0)
break;
if (allocsize == uspi->s_fpb) {
result = ufs_alloccg_block (inode, ucpi, goal, err);
if (result == INVBLOCK)
return 0;
goal = ufs_dtogd(uspi, result);
for (i = count; i < uspi->s_fpb; i++)
ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
i = uspi->s_fpb - count;
fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
uspi->cs_total.cs_nffree += i;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i);
fs32_add(sb, &ucg->cg_frsum[i], 1);
goto succed;
}
result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
if (result == INVBLOCK)
return 0;
for (i = 0; i < count; i++)
ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
fs32_sub(sb, &ucg->cg_cs.cs_nffree, count);
uspi->cs_total.cs_nffree -= count;
fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
fs32_sub(sb, &ucg->cg_frsum[allocsize], 1);
if (count != allocsize)
fs32_add(sb, &ucg->cg_frsum[allocsize - count], 1);
succed:
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
if (sb->s_flags & MS_SYNCHRONOUS)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
result += cgno * uspi->s_fpg;
UFSD("EXIT3, result %llu\n", (unsigned long long)result);
return result;
}
static u64 ufs_alloccg_block(struct inode *inode,
struct ufs_cg_private_info *ucpi,
u64 goal, int *err)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_cylinder_group * ucg;
u64 result, blkno;
UFSD("ENTER, goal %llu\n", (unsigned long long)goal);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
ucg = ubh_get_ucg(UCPI_UBH(ucpi));
if (goal == 0) {
goal = ucpi->c_rotor;
goto norot;
}
goal = ufs_blknum (goal);
goal = ufs_dtogd(uspi, goal);
/*
* If the requested block is available, use it.
*/
if (ubh_isblockset(UCPI_UBH(ucpi), ucpi->c_freeoff, ufs_fragstoblks(goal))) {
result = goal;
goto gotit;
}
norot:
result = ufs_bitmap_search (sb, ucpi, goal, uspi->s_fpb);
if (result == INVBLOCK)
return INVBLOCK;
ucpi->c_rotor = result;
gotit:
blkno = ufs_fragstoblks(result);
ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, -1);
fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree--;
fs32_sub(sb, &UFS_SB(sb)->fs_cs(ucpi->c_cgx).cs_nbfree, 1);
if (uspi->fs_magic != UFS2_MAGIC) {
unsigned cylno = ufs_cbtocylno((unsigned)result);
fs16_sub(sb, &ubh_cg_blks(ucpi, cylno,
ufs_cbtorpos((unsigned)result)), 1);
fs32_sub(sb, &ubh_cg_blktot(ucpi, cylno), 1);
}
UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
static unsigned ubh_scanc(struct ufs_sb_private_info *uspi,
struct ufs_buffer_head *ubh,
unsigned begin, unsigned size,
unsigned char *table, unsigned char mask)
{
unsigned rest, offset;
unsigned char *cp;
offset = begin & ~uspi->s_fmask;
begin >>= uspi->s_fshift;
for (;;) {
if ((offset + size) < uspi->s_fsize)
rest = size;
else
rest = uspi->s_fsize - offset;
size -= rest;
cp = ubh->bh[begin]->b_data + offset;
while ((table[*cp++] & mask) == 0 && --rest)
;
if (rest || !size)
break;
begin++;
offset = 0;
}
return (size + rest);
}
/*
* Find a block of the specified size in the specified cylinder group.
* @sp: pointer to super block
* @ucpi: pointer to cylinder group info
* @goal: near which block we want find new one
* @count: specified size
*/
static u64 ufs_bitmap_search(struct super_block *sb,
struct ufs_cg_private_info *ucpi,
u64 goal, unsigned count)
{
/*
* Bit patterns for identifying fragments in the block map
* used as ((map & mask_arr) == want_arr)
*/
static const int mask_arr[9] = {
0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff
};
static const int want_arr[9] = {
0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
};
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned start, length, loc;
unsigned pos, want, blockmap, mask, end;
u64 result;
UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx,
(unsigned long long)goal, count);
if (goal)
start = ufs_dtogd(uspi, goal) >> 3;
else
start = ucpi->c_frotor >> 3;
length = ((uspi->s_fpg + 7) >> 3) - start;
loc = ubh_scanc(uspi, UCPI_UBH(ucpi), ucpi->c_freeoff + start, length,
(uspi->s_fpb == 8) ? ufs_fragtable_8fpb : ufs_fragtable_other,
1 << (count - 1 + (uspi->s_fpb & 7)));
if (loc == 0) {
length = start + 1;
loc = ubh_scanc(uspi, UCPI_UBH(ucpi), ucpi->c_freeoff, length,
(uspi->s_fpb == 8) ? ufs_fragtable_8fpb :
ufs_fragtable_other,
1 << (count - 1 + (uspi->s_fpb & 7)));
if (loc == 0) {
ufs_error(sb, "ufs_bitmap_search",
"bitmap corrupted on cg %u, start %u,"
" length %u, count %u, freeoff %u\n",
ucpi->c_cgx, start, length, count,
ucpi->c_freeoff);
return INVBLOCK;
}
start = 0;
}
result = (start + length - loc) << 3;
ucpi->c_frotor = result;
/*
* found the byte in the map
*/
for (end = result + 8; result < end; result += uspi->s_fpb) {
blockmap = ubh_blkmap(UCPI_UBH(ucpi), ucpi->c_freeoff, result);
blockmap <<= 1;
mask = mask_arr[count];
want = want_arr[count];
for (pos = 0; pos <= uspi->s_fpb - count; pos++) {
if ((blockmap & mask) == want) {
UFSD("EXIT, result %llu\n",
(unsigned long long)result);
return result + pos;
}
mask <<= 1;
want <<= 1;
}
}
ufs_error(sb, "ufs_bitmap_search", "block not in map on cg %u\n",
ucpi->c_cgx);
UFSD("EXIT (FAILED)\n");
return INVBLOCK;
}
static void ufs_clusteracct(struct super_block * sb,
struct ufs_cg_private_info * ucpi, unsigned blkno, int cnt)
{
struct ufs_sb_private_info * uspi;
int i, start, end, forw, back;
uspi = UFS_SB(sb)->s_uspi;
if (uspi->s_contigsumsize <= 0)
return;
if (cnt > 0)
ubh_setbit(UCPI_UBH(ucpi), ucpi->c_clusteroff, blkno);
else
ubh_clrbit(UCPI_UBH(ucpi), ucpi->c_clusteroff, blkno);
/*
* Find the size of the cluster going forward.
*/
start = blkno + 1;
end = start + uspi->s_contigsumsize;
if ( end >= ucpi->c_nclusterblks)
end = ucpi->c_nclusterblks;
i = ubh_find_next_zero_bit (UCPI_UBH(ucpi), ucpi->c_clusteroff, end, start);
if (i > end)
i = end;
forw = i - start;
/*
* Find the size of the cluster going backward.
*/
start = blkno - 1;
end = start - uspi->s_contigsumsize;
if (end < 0 )
end = -1;
i = ubh_find_last_zero_bit (UCPI_UBH(ucpi), ucpi->c_clusteroff, start, end);
if ( i < end)
i = end;
back = start - i;
/*
* Account for old cluster and the possibly new forward and
* back clusters.
*/
i = back + forw + 1;
if (i > uspi->s_contigsumsize)
i = uspi->s_contigsumsize;
fs32_add(sb, (__fs32*)ubh_get_addr(UCPI_UBH(ucpi), ucpi->c_clustersumoff + (i << 2)), cnt);
if (back > 0)
fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH(ucpi), ucpi->c_clustersumoff + (back << 2)), cnt);
if (forw > 0)
fs32_sub(sb, (__fs32*)ubh_get_addr(UCPI_UBH(ucpi), ucpi->c_clustersumoff + (forw << 2)), cnt);
}
static unsigned char ufs_fragtable_8fpb[] = {
0x00, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x04, 0x01, 0x01, 0x01, 0x03, 0x02, 0x03, 0x04, 0x08,
0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, 0x02, 0x03, 0x03, 0x02, 0x04, 0x05, 0x08, 0x10,
0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, 0x04, 0x05, 0x05, 0x06, 0x08, 0x09, 0x10, 0x20,
0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0A,
0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, 0x08, 0x09, 0x09, 0x0A, 0x10, 0x11, 0x20, 0x40,
0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, 0x05, 0x05, 0x05, 0x07, 0x09, 0x09, 0x11, 0x21,
0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0A,
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, 0x02, 0x03, 0x03, 0x02, 0x06, 0x07, 0x0A, 0x12,
0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x05, 0x07, 0x06, 0x07, 0x04, 0x0C,
0x08, 0x09, 0x09, 0x0A, 0x09, 0x09, 0x0A, 0x0C, 0x10, 0x11, 0x11, 0x12, 0x20, 0x21, 0x40, 0x80,
};
static unsigned char ufs_fragtable_other[] = {
0x00, 0x16, 0x16, 0x2A, 0x16, 0x16, 0x26, 0x4E, 0x16, 0x16, 0x16, 0x3E, 0x2A, 0x3E, 0x4E, 0x8A,
0x16, 0x16, 0x16, 0x3E, 0x16, 0x16, 0x36, 0x5E, 0x16, 0x16, 0x16, 0x3E, 0x3E, 0x3E, 0x5E, 0x9E,
0x16, 0x16, 0x16, 0x3E, 0x16, 0x16, 0x36, 0x5E, 0x16, 0x16, 0x16, 0x3E, 0x3E, 0x3E, 0x5E, 0x9E,
0x2A, 0x3E, 0x3E, 0x2A, 0x3E, 0x3E, 0x2E, 0x6E, 0x3E, 0x3E, 0x3E, 0x3E, 0x2A, 0x3E, 0x6E, 0xAA,
0x16, 0x16, 0x16, 0x3E, 0x16, 0x16, 0x36, 0x5E, 0x16, 0x16, 0x16, 0x3E, 0x3E, 0x3E, 0x5E, 0x9E,
0x16, 0x16, 0x16, 0x3E, 0x16, 0x16, 0x36, 0x5E, 0x16, 0x16, 0x16, 0x3E, 0x3E, 0x3E, 0x5E, 0x9E,
0x26, 0x36, 0x36, 0x2E, 0x36, 0x36, 0x26, 0x6E, 0x36, 0x36, 0x36, 0x3E, 0x2E, 0x3E, 0x6E, 0xAE,
0x4E, 0x5E, 0x5E, 0x6E, 0x5E, 0x5E, 0x6E, 0x4E, 0x5E, 0x5E, 0x5E, 0x7E, 0x6E, 0x7E, 0x4E, 0xCE,
0x16, 0x16, 0x16, 0x3E, 0x16, 0x16, 0x36, 0x5E, 0x16, 0x16, 0x16, 0x3E, 0x3E, 0x3E, 0x5E, 0x9E,
0x16, 0x16, 0x16, 0x3E, 0x16, 0x16, 0x36, 0x5E, 0x16, 0x16, 0x16, 0x3E, 0x3E, 0x3E, 0x5E, 0x9E,
0x16, 0x16, 0x16, 0x3E, 0x16, 0x16, 0x36, 0x5E, 0x16, 0x16, 0x16, 0x3E, 0x3E, 0x3E, 0x5E, 0x9E,
0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x7E, 0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x7E, 0xBE,
0x2A, 0x3E, 0x3E, 0x2A, 0x3E, 0x3E, 0x2E, 0x6E, 0x3E, 0x3E, 0x3E, 0x3E, 0x2A, 0x3E, 0x6E, 0xAA,
0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x7E, 0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x3E, 0x7E, 0xBE,
0x4E, 0x5E, 0x5E, 0x6E, 0x5E, 0x5E, 0x6E, 0x4E, 0x5E, 0x5E, 0x5E, 0x7E, 0x6E, 0x7E, 0x4E, 0xCE,
0x8A, 0x9E, 0x9E, 0xAA, 0x9E, 0x9E, 0xAE, 0xCE, 0x9E, 0x9E, 0x9E, 0xBE, 0xAA, 0xBE, 0xCE, 0x8A,
};
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...