summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_zone_space_resv.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_zone_space_resv.c')
-rw-r--r--fs/xfs/xfs_zone_space_resv.c263
1 files changed, 263 insertions, 0 deletions
diff --git a/fs/xfs/xfs_zone_space_resv.c b/fs/xfs/xfs_zone_space_resv.c
new file mode 100644
index 000000000000..93c9a7721139
--- /dev/null
+++ b/fs/xfs/xfs_zone_space_resv.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023-2025 Christoph Hellwig.
+ * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
+ */
+#include "xfs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_zone_alloc.h"
+#include "xfs_zone_priv.h"
+#include "xfs_zones.h"
+
+/*
+ * Note: the zoned allocator does not support a rtextsize > 1, so this code and
+ * the allocator itself uses file system blocks interchangeable with realtime
+ * extents without doing the otherwise required conversions.
+ */
+
+/*
+ * Per-task space reservation.
+ *
+ * Tasks that need to wait for GC to free up space allocate one of these
+ * on-stack and adds it to the per-mount zi_reclaim_reservations lists.
+ * The GC thread will then wake the tasks in order when space becomes available.
+ */
+struct xfs_zone_reservation {
+ struct list_head entry;
+ struct task_struct *task;
+ xfs_filblks_t count_fsb;
+};
+
+/*
+ * Calculate the number of reserved blocks.
+ *
+ * XC_FREE_RTEXTENTS counts the user available capacity, to which the file
+ * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
+ * available for writes without waiting for GC.
+ *
+ * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
+ * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
+ * is further restricted by at least one zone as well as the optional
+ * persistently reserved blocks. This allows the allocator to run more
+ * smoothly by not always triggering GC.
+ */
+uint64_t
+xfs_zoned_default_resblks(
+ struct xfs_mount *mp,
+ enum xfs_free_counter ctr)
+{
+ switch (ctr) {
+ case XC_FREE_RTEXTENTS:
+ return (uint64_t)XFS_RESERVED_ZONES *
+ mp->m_groups[XG_TYPE_RTG].blocks +
+ mp->m_sb.sb_rtreserved;
+ case XC_FREE_RTAVAILABLE:
+ return (uint64_t)XFS_GC_ZONES *
+ mp->m_groups[XG_TYPE_RTG].blocks;
+ default:
+ ASSERT(0);
+ return 0;
+ }
+}
+
+void
+xfs_zoned_resv_wake_all(
+ struct xfs_mount *mp)
+{
+ struct xfs_zone_info *zi = mp->m_zone_info;
+ struct xfs_zone_reservation *reservation;
+
+ spin_lock(&zi->zi_reservation_lock);
+ list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
+ wake_up_process(reservation->task);
+ spin_unlock(&zi->zi_reservation_lock);
+}
+
+void
+xfs_zoned_add_available(
+ struct xfs_mount *mp,
+ xfs_filblks_t count_fsb)
+{
+ struct xfs_zone_info *zi = mp->m_zone_info;
+ struct xfs_zone_reservation *reservation;
+
+ if (list_empty_careful(&zi->zi_reclaim_reservations)) {
+ xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
+ return;
+ }
+
+ spin_lock(&zi->zi_reservation_lock);
+ xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
+ count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
+ list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
+ if (reservation->count_fsb > count_fsb)
+ break;
+ wake_up_process(reservation->task);
+ count_fsb -= reservation->count_fsb;
+
+ }
+ spin_unlock(&zi->zi_reservation_lock);
+}
+
+static int
+xfs_zoned_space_wait_error(
+ struct xfs_mount *mp)
+{
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ return 0;
+}
+
+static int
+xfs_zoned_reserve_available(
+ struct xfs_inode *ip,
+ xfs_filblks_t count_fsb,
+ unsigned int flags)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_zone_info *zi = mp->m_zone_info;
+ struct xfs_zone_reservation reservation = {
+ .task = current,
+ .count_fsb = count_fsb,
+ };
+ int error;
+
+ /*
+ * If there are no waiters, try to directly grab the available blocks
+ * from the percpu counter.
+ *
+ * If the caller wants to dip into the reserved pool also bypass the
+ * wait list. This relies on the fact that we have a very graciously
+ * sized reserved pool that always has enough space. If the reserved
+ * allocations fail we're in trouble.
+ */
+ if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
+ (flags & XFS_ZR_RESERVED))) {
+ error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
+ flags & XFS_ZR_RESERVED);
+ if (error != -ENOSPC)
+ return error;
+ }
+
+ if (flags & XFS_ZR_NOWAIT)
+ return -EAGAIN;
+
+ spin_lock(&zi->zi_reservation_lock);
+ list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
+ while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
+ set_current_state(TASK_KILLABLE);
+
+ error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
+ flags & XFS_ZR_RESERVED);
+ if (error != -ENOSPC)
+ break;
+
+ /*
+ * Make sure to start GC if it is not running already. As we
+ * check the rtavailable count when filling up zones, GC is
+ * normally already running at this point, but in some setups
+ * with very few zones we may completely run out of non-
+ * reserved blocks in between filling zones.
+ */
+ if (!xfs_is_zonegc_running(mp))
+ wake_up_process(zi->zi_gc_thread);
+
+ /*
+ * If there is no reclaimable group left and we aren't still
+ * processing a pending GC request give up as we're fully out
+ * of space.
+ */
+ if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) &&
+ !xfs_is_zonegc_running(mp))
+ break;
+
+ spin_unlock(&zi->zi_reservation_lock);
+ schedule();
+ spin_lock(&zi->zi_reservation_lock);
+ }
+ list_del(&reservation.entry);
+ spin_unlock(&zi->zi_reservation_lock);
+
+ __set_current_state(TASK_RUNNING);
+ return error;
+}
+
+/*
+ * Implement greedy space allocation for short writes by trying to grab all
+ * that is left after locking out other threads from trying to do the same.
+ *
+ * This isn't exactly optimal and can hopefully be replaced by a proper
+ * percpu_counter primitive one day.
+ */
+static int
+xfs_zoned_reserve_extents_greedy(
+ struct xfs_inode *ip,
+ xfs_filblks_t *count_fsb,
+ unsigned int flags)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_zone_info *zi = mp->m_zone_info;
+ s64 len = *count_fsb;
+ int error = -ENOSPC;
+
+ spin_lock(&zi->zi_reservation_lock);
+ len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
+ if (len > 0) {
+ *count_fsb = len;
+ error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
+ flags & XFS_ZR_RESERVED);
+ }
+ spin_unlock(&zi->zi_reservation_lock);
+ return error;
+}
+
+int
+xfs_zoned_space_reserve(
+ struct xfs_inode *ip,
+ xfs_filblks_t count_fsb,
+ unsigned int flags,
+ struct xfs_zone_alloc_ctx *ac)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int error;
+
+ ASSERT(ac->reserved_blocks == 0);
+ ASSERT(ac->open_zone == NULL);
+
+ error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
+ flags & XFS_ZR_RESERVED);
+ if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
+ error = xfs_zoned_reserve_extents_greedy(ip, &count_fsb, flags);
+ if (error)
+ return error;
+
+ error = xfs_zoned_reserve_available(ip, count_fsb, flags);
+ if (error) {
+ xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
+ return error;
+ }
+ ac->reserved_blocks = count_fsb;
+ return 0;
+}
+
+void
+xfs_zoned_space_unreserve(
+ struct xfs_inode *ip,
+ struct xfs_zone_alloc_ctx *ac)
+{
+ if (ac->reserved_blocks > 0) {
+ struct xfs_mount *mp = ip->i_mount;
+
+ xfs_zoned_add_available(mp, ac->reserved_blocks);
+ xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
+ }
+ if (ac->open_zone)
+ xfs_open_zone_put(ac->open_zone);
+}