* [PATCH v2 1/2] ext4: Add helper function to mark group as corrupted
2015-03-16 13:25 [PATCH v2 0/2] ext4: Add pollable sysfs entry for block threshold events Beata Michalska
@ 2015-03-16 13:25 ` Beata Michalska
2015-03-16 13:25 ` [PATCH v2 2/2] ext4: Add pollable sysfs entry for block threshold events Beata Michalska
2015-03-16 13:28 ` [PATCH v2 0/2] " Christoph Hellwig
2 siblings, 0 replies; 7+ messages in thread
From: Beata Michalska @ 2015-03-16 13:25 UTC (permalink / raw)
To: lczerner, adilger.kernel; +Cc: tytso, linux-ext4, linux-kernel, kyungmin.park
Add ext4_mark_group_corrupted helper function to
simplify the code and to keep the logic in one place.
Signed-off-by: Beata Michalska <b.michalska@samsung.com>
---
fs/ext4/balloc.c | 17 ++++-------------
fs/ext4/ext4.h | 9 +++++++++
fs/ext4/ialloc.c | 5 +----
fs/ext4/mballoc.c | 13 +++----------
fs/ext4/super.c | 2 +-
5 files changed, 18 insertions(+), 28 deletions(-)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 83a6f49..5e6a9ca 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -193,10 +193,7 @@ static int ext4_init_block_bitmap(struct super_block *sb,
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
grp = ext4_get_group_info(sb, block_group);
- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
- percpu_counter_sub(&sbi->s_freeclusters_counter,
- grp->bb_free);
- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ ext4_mark_group_corrupted(sbi, grp);
if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
int count;
count = ext4_free_inodes_count(sb, gdp);
@@ -252,7 +249,7 @@ unsigned ext4_free_clusters_after_init(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
- return num_clusters_in_group(sb, block_group) -
+ return num_clusters_in_group(sb, block_group) -
ext4_num_overhead_clusters(sb, block_group, gdp);
}
@@ -379,20 +376,14 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
block_group, blk);
- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
- percpu_counter_sub(&sbi->s_freeclusters_counter,
- grp->bb_free);
- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ ext4_mark_group_corrupted(sbi, grp);
return;
}
if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
desc, bh))) {
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
- percpu_counter_sub(&sbi->s_freeclusters_counter,
- grp->bb_free);
- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ ext4_mark_group_corrupted(sbi, grp);
return;
}
set_buffer_verified(bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f63c3d5..e10a94c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2535,6 +2535,15 @@ static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
}
+static inline
+void ext4_mark_group_corrupted(struct ext4_sb_info *sbi,
+ struct ext4_group_info *grp)
+{
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter, grp->bb_free);
+ set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+}
+
/*
* Returns true if the filesystem is busy enough that attempts to
* access the block group locks has run into contention.
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ac644c3..ebe0499 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -79,10 +79,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
- percpu_counter_sub(&sbi->s_freeclusters_counter,
- grp->bb_free);
- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ ext4_mark_group_corrupted(sbi, grp);
if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
int count;
count = ext4_free_inodes_count(sb, gdp);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d1e602..b8817f3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -760,10 +760,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
* corrupt and update bb_free using bitmap value
*/
grp->bb_free = free;
- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
- percpu_counter_sub(&sbi->s_freeclusters_counter,
- grp->bb_free);
- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ ext4_mark_group_corrupted(sbi, grp);
}
mb_set_largest_free_order(sb, grp);
@@ -1448,12 +1445,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
"freeing already freed block "
"(bit %u); block bitmap corrupt.",
block);
- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
- percpu_counter_sub(&sbi->s_freeclusters_counter,
- e4b->bd_info->bb_free);
/* Mark the block group as corrupt. */
- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
- &e4b->bd_info->bb_state);
+ ext4_mark_group_corrupted(sbi, e4b->bd_info);
mb_regenerate_buddy(e4b);
goto done;
}
@@ -2362,7 +2355,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
}
sbi->s_group_info = new_groupinfo;
sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
- ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
+ ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
sbi->s_group_info_size);
return 0;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e061e66..b4dfba3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4153,7 +4153,7 @@ no_journal:
}
block = ext4_count_free_clusters(sb);
- ext4_free_blocks_count_set(sbi->s_es,
+ ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL);
--
1.7.9.5
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 2/2] ext4: Add pollable sysfs entry for block threshold events
2015-03-16 13:25 [PATCH v2 0/2] ext4: Add pollable sysfs entry for block threshold events Beata Michalska
2015-03-16 13:25 ` [PATCH v2 1/2] ext4: Add helper function to mark group as corrupted Beata Michalska
@ 2015-03-16 13:25 ` Beata Michalska
2015-03-16 13:28 ` [PATCH v2 0/2] " Christoph Hellwig
2 siblings, 0 replies; 7+ messages in thread
From: Beata Michalska @ 2015-03-16 13:25 UTC (permalink / raw)
To: lczerner, adilger.kernel; +Cc: tytso, linux-ext4, linux-kernel, kyungmin.park
Add support for pollable sysfs entry for available
logical blocks threshold, allowing the userspace
to wait for the notification whenever the threshold
is reached instead of periodically calling the statfs.
This is supposed to work as a single-shot notifiaction
to reduce the number of triggered events.
Signed-off-by: Beata Michalska <b.michalska@samsung.com>
---
Documentation/filesystems/ext4.txt | 11 +++++++
fs/ext4/balloc.c | 28 ++++++++---------
fs/ext4/ext4.h | 5 +++
fs/ext4/super.c | 59 +++++++++++++++++++++++++++++++++++-
4 files changed, 88 insertions(+), 15 deletions(-)
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 6c0108e..54d8f4d 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -446,6 +446,17 @@ Files in /sys/fs/ext4/<devname>
..............................................................................
File Content
+ available_blks_thres This RW file represents a threshold of available
+ logical blocks within the filesystem. Once
+ enabled, a notification shall be triggered
+ whenever the number of available blocks drops
+ below the specified limit, awakening any
+ process awaiting events (through poll, i.e.)
+ on this very file. Note, that this works only
+ as a single-shot notification: once triggered,
+ the notifications will get disabled, restoring
+ the threshold to its default value (-1).
+
delayed_allocation_blocks This file is read-only and shows the number of
blocks that are dirty in the page cache, but
which do not have their location in the
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 5e6a9ca..9c1be88 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -519,12 +519,12 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
s64 nclusters, unsigned int flags)
{
- s64 free_clusters, dirty_clusters, rsv, resv_clusters;
+ s64 free_clusters, dirty_clusters, rsv, resv_clusters, bound_clusters;
struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
- free_clusters = percpu_counter_read_positive(fcc);
- dirty_clusters = percpu_counter_read_positive(dcc);
+ free_clusters = percpu_counter_sum_positive(fcc);
+ dirty_clusters = percpu_counter_sum_positive(dcc);
resv_clusters = atomic64_read(&sbi->s_resv_clusters);
/*
@@ -534,34 +534,34 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
rsv = (ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits) +
resv_clusters;
- if (free_clusters - (nclusters + rsv + dirty_clusters) <
- EXT4_FREECLUSTERS_WATERMARK) {
- free_clusters = percpu_counter_sum_positive(fcc);
- dirty_clusters = percpu_counter_sum_positive(dcc);
- }
+ bound_clusters = nclusters + rsv + dirty_clusters;
/* Check whether we have space after accounting for current
* dirty clusters & root reserved clusters.
*/
- if (free_clusters >= (rsv + nclusters + dirty_clusters))
- return 1;
+ if (free_clusters >= bound_clusters)
+ goto done;
/* Hm, nope. Are (enough) root reserved clusters available? */
if (uid_eq(sbi->s_resuid, current_fsuid()) ||
(!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
capable(CAP_SYS_RESOURCE) ||
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
-
if (free_clusters >= (nclusters + dirty_clusters +
resv_clusters))
- return 1;
+ goto done;
}
/* No free blocks. Let's see if we can dip into reserved pool */
if (flags & EXT4_MB_USE_RESERVED) {
- if (free_clusters >= (nclusters + dirty_clusters))
- return 1;
+ if (free_clusters >= (bound_clusters - rsv))
+ goto done;
}
return 0;
+
+done:
+ ext4_available_blks_thres_notify(sbi,
+ EXT4_C2B(sbi, free_clusters - bound_clusters));
+ return 1;
}
int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e10a94c..84103bc 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1309,6 +1309,7 @@ struct ext4_sb_info {
unsigned long s_sectors_written_start;
u64 s_kbytes_written;
+ atomic64_t available_blks_thres;
/* the size of zero-out chunk */
unsigned int s_extent_max_zeroout_kb;
@@ -2207,6 +2208,9 @@ extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
extern const char *ext4_decode_error(struct super_block *sb, int errno,
char nbuf[16]);
+extern void ext4_verify_available_blks_thres(struct ext4_sb_info *sbi);
+extern void ext4_available_blks_thres_notify(struct ext4_sb_info *sbi,
+ s64 ablocks);
extern __printf(4, 5)
void __ext4_error(struct super_block *, const char *, unsigned int,
@@ -2542,6 +2546,7 @@ void ext4_mark_group_corrupted(struct ext4_sb_info *sbi,
if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
percpu_counter_sub(&sbi->s_freeclusters_counter, grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ ext4_verify_available_blks_thres(sbi);
}
/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b4dfba3..ee45ae0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2558,10 +2558,63 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a,
if (parse_strtoull(buf, -1ULL, &val))
return -EINVAL;
ret = ext4_reserve_clusters(sbi, val);
-
+ ext4_verify_available_blks_thres(sbi);
return ret ? ret : count;
}
+void ext4_available_blks_thres_notify(struct ext4_sb_info *sbi, s64 ablocks)
+{
+ s64 available_blks_thres = atomic64_read(&sbi->available_blks_thres);
+
+ if (available_blks_thres >= 0 && ablocks <= available_blks_thres) {
+ sysfs_notify(&sbi->s_kobj, NULL, "available_blks_thres");
+ /* Prevent flooding notifications */
+ atomic64_set(&sbi->available_blks_thres, ~0LLU);
+ }
+}
+
+void ext4_verify_available_blks_thres(struct ext4_sb_info *sbi)
+{
+ struct ext4_super_block *es = sbi->s_es;
+ unsigned long long ablocks;
+
+ if (atomic64_read(&sbi->available_blks_thres) < 0)
+ /* No limit set -> no notification needed */
+ return;
+
+ /* Verify the limit has not been reached. If so notify the watchers */
+ ablocks = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
+ percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
+ ablocks -= (ext4_r_blocks_count(es) +
+ EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters)));
+ ext4_available_blks_thres_notify(sbi, ablocks);
+}
+
+static ssize_t available_blks_thres_show(struct ext4_attr *a,
+ struct ext4_sb_info *sbi, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lld\n",
+ atomic64_read(&sbi->available_blks_thres));
+
+}
+
+static ssize_t available_blks_thres_store(struct ext4_attr *a,
+ struct ext4_sb_info *sbi,
+ const char *buf, size_t count)
+{
+ struct ext4_super_block *es = sbi->s_es;
+ unsigned long long bcount, val;
+
+ bcount = ext4_blocks_count(es);
+ if (parse_strtoull(buf, bcount, &val))
+ return -EINVAL;
+ if (val != atomic64_read(&sbi->available_blks_thres)) {
+ atomic64_set(&sbi->available_blks_thres, val);
+ ext4_verify_available_blks_thres(sbi);
+ }
+ return count;
+}
+
static ssize_t trigger_test_error(struct ext4_attr *a,
struct ext4_sb_info *sbi,
const char *buf, size_t count)
@@ -2631,6 +2684,7 @@ EXT4_RO_ATTR(delayed_allocation_blocks);
EXT4_RO_ATTR(session_write_kbytes);
EXT4_RO_ATTR(lifetime_write_kbytes);
EXT4_RW_ATTR(reserved_clusters);
+EXT4_RW_ATTR(available_blks_thres);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
inode_readahead_blks_store, s_inode_readahead_blks);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
@@ -2658,6 +2712,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(session_write_kbytes),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(reserved_clusters),
+ ATTR_LIST(available_blks_thres),
ATTR_LIST(inode_readahead_blks),
ATTR_LIST(inode_goal),
ATTR_LIST(mb_stats),
@@ -4174,6 +4229,8 @@ no_journal:
goto failed_mount6;
}
+ atomic64_set(&sbi->available_blks_thres, ~0LLU);
+
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
if (!ext4_fill_flex_info(sb)) {
ext4_msg(sb, KERN_ERR,
--
1.7.9.5
^ permalink raw reply [flat|nested] 7+ messages in thread