diff options
| author | Yufan Chen <ericterminal@gmail.com> | 2026-03-30 23:34:28 +0800 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2026-04-15 02:15:01 -0700 |
| commit | 5686459423d03d192134166f0ec45f98bb2d5d57 (patch) | |
| tree | 0d9d045a1c3aaf3921344f460594e9dbbce3b09f /fs/ocfs2 | |
| parent | e3a84be1ec2fd6f06e54bb31642412864d65280f (diff) | |
| download | lwn-5686459423d03d192134166f0ec45f98bb2d5d57.tar.gz lwn-5686459423d03d192134166f0ec45f98bb2d5d57.zip | |
ocfs2/heartbeat: fix slot mapping rollback leaks on error paths
o2hb_map_slot_data() allocates hr_tmp_block, hr_slots, hr_slot_data, and
pages in stages. If a later allocation fails, the current code returns
without unwinding the earlier allocations.
o2hb_region_dev_store() also leaves slot mapping resources behind when
setup aborts, and it keeps hr_aborted_start/hr_node_deleted set across
retries. That leaves stale state behind after a failed start.
Factor the slot cleanup into o2hb_unmap_slot_data(), use it from both
o2hb_map_slot_data() and o2hb_region_release(), and call it from the
dev_store() rollback after stopping a started heartbeat thread. While
freeing pages, clear each hr_slot_data entry as it is released, and reset
the start state before each new setup attempt.
This closes the slot mapping leak on allocation/setup failure paths and
keeps failed setup attempts retryable.
Link: https://lkml.kernel.org/r/20260330153428.19586-1-yufan.chen@linux.dev
Signed-off-by: Yufan Chen <ericterminal@gmail.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2')
| -rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 83 |
1 files changed, 56 insertions, 27 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index fe1949578336..d12784aaaa4b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1488,33 +1488,45 @@ static struct o2hb_region *to_o2hb_region(struct config_item *item) return item ? container_of(item, struct o2hb_region, hr_item) : NULL; } -/* drop_item only drops its ref after killing the thread, nothing should - * be using the region anymore. this has to clean up any state that - * attributes might have built up. */ -static void o2hb_region_release(struct config_item *item) +static void o2hb_unmap_slot_data(struct o2hb_region *reg) { int i; struct page *page; - struct o2hb_region *reg = to_o2hb_region(item); - - mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg)); - - kfree(reg->hr_tmp_block); if (reg->hr_slot_data) { for (i = 0; i < reg->hr_num_pages; i++) { page = reg->hr_slot_data[i]; - if (page) + if (page) { __free_page(page); + reg->hr_slot_data[i] = NULL; + } } kfree(reg->hr_slot_data); + reg->hr_slot_data = NULL; } + kfree(reg->hr_slots); + reg->hr_slots = NULL; + + kfree(reg->hr_tmp_block); + reg->hr_tmp_block = NULL; +} + +/* drop_item only drops its ref after killing the thread, nothing should + * be using the region anymore. this has to clean up any state that + * attributes might have built up. + */ +static void o2hb_region_release(struct config_item *item) +{ + struct o2hb_region *reg = to_o2hb_region(item); + + mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg)); + + o2hb_unmap_slot_data(reg); + if (reg->hr_bdev_file) fput(reg->hr_bdev_file); - kfree(reg->hr_slots); - debugfs_remove_recursive(reg->hr_debug_dir); kfree(reg->hr_db_livenodes); kfree(reg->hr_db_regnum); @@ -1667,6 +1679,7 @@ static void o2hb_init_region_params(struct o2hb_region *reg) static int o2hb_map_slot_data(struct o2hb_region *reg) { int i, j; + int ret = -ENOMEM; unsigned int last_slot; unsigned int spp = reg->hr_slots_per_page; struct page *page; @@ -1674,14 +1687,14 @@ static int o2hb_map_slot_data(struct o2hb_region *reg) struct o2hb_disk_slot *slot; reg->hr_tmp_block = kmalloc(reg->hr_block_bytes, GFP_KERNEL); - if (reg->hr_tmp_block == NULL) - return -ENOMEM; + if (!reg->hr_tmp_block) + goto out; reg->hr_slots = kzalloc_objs(struct o2hb_disk_slot, reg->hr_blocks); - if (reg->hr_slots == NULL) - return -ENOMEM; + if (!reg->hr_slots) + goto out; - for(i = 0; i < reg->hr_blocks; i++) { + for (i = 0; i < reg->hr_blocks; i++) { slot = ®->hr_slots[i]; slot->ds_node_num = i; INIT_LIST_HEAD(&slot->ds_live_item); @@ -1695,12 +1708,12 @@ static int o2hb_map_slot_data(struct o2hb_region *reg) reg->hr_slot_data = kzalloc_objs(struct page *, reg->hr_num_pages); if (!reg->hr_slot_data) - return -ENOMEM; + goto out; - for(i = 0; i < reg->hr_num_pages; i++) { + for (i = 0; i < reg->hr_num_pages; i++) { page = alloc_page(GFP_KERNEL); if (!page) - return -ENOMEM; + goto out; reg->hr_slot_data[i] = page; @@ -1720,6 +1733,10 @@ static int o2hb_map_slot_data(struct o2hb_region *reg) } return 0; + +out: + o2hb_unmap_slot_data(reg); + return ret; } /* Read in all the slots available and populate the tracking @@ -1809,9 +1826,11 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, "blocksize %u incorrect for device, expected %d", reg->hr_block_bytes, sectsize); ret = -EINVAL; - goto out3; + goto out; } + reg->hr_aborted_start = 0; + reg->hr_node_deleted = 0; o2hb_init_region_params(reg); /* Generation of zero is invalid */ @@ -1823,13 +1842,13 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, ret = o2hb_map_slot_data(reg); if (ret) { mlog_errno(ret); - goto out3; + goto out; } ret = o2hb_populate_slot_data(reg); if (ret) { mlog_errno(ret); - goto out3; + goto out; } INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout); @@ -1860,7 +1879,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, if (IS_ERR(hb_task)) { ret = PTR_ERR(hb_task); mlog_errno(ret); - goto out3; + goto out; } spin_lock(&o2hb_live_lock); @@ -1877,12 +1896,12 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, if (reg->hr_aborted_start) { ret = -EIO; - goto out3; + goto out; } if (reg->hr_node_deleted) { ret = -EINVAL; - goto out3; + goto out; } /* Ok, we were woken. Make sure it wasn't by drop_item() */ @@ -1901,8 +1920,18 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%pg)\n", config_item_name(®->hr_item), reg_bdev(reg)); -out3: +out: if (ret < 0) { + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; + reg->hr_task = NULL; + spin_unlock(&o2hb_live_lock); + + if (hb_task) + kthread_stop(hb_task); + + o2hb_unmap_slot_data(reg); + fput(reg->hr_bdev_file); reg->hr_bdev_file = NULL; } |
