From 9dceccc5822f2ecea12a89f24d7cad1f3e5eab7c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 11 Apr 2023 19:00:16 -0700 Subject: xfs: use the directory name hash function for dir scrubbing The directory code has a directory-specific hash computation function that includes a modified hash function for case-insensitive lookups. Hence we must use that function (and not the raw da_hashname) when checking the dabtree structure. Found by accidentally breaking xfs/188 to create an abnormally huge case-insensitive directory and watching scrub break. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/scrub/dir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index b6081a3e1b91..1b1830576dcd 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -201,6 +201,7 @@ xchk_dir_rec( struct xchk_da_btree *ds, int level) { + struct xfs_name dname = { }; struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; struct xfs_mount *mp = ds->state->mp; struct xfs_inode *dp = ds->dargs.dp; @@ -297,7 +298,11 @@ xchk_dir_rec( xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); goto out_relse; } - calc_hash = xfs_da_hashname(dent->name, dent->namelen); + + /* Does the directory hash match? */ + dname.name = dent->name; + dname.len = dent->namelen; + calc_hash = xfs_dir2_hashname(mp, &dname); if (calc_hash != hash) xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); -- cgit v1.2.3 From 4c233b5c4f29dff11eeb64b2b1cc0831b9904a4f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 11 Apr 2023 19:00:17 -0700 Subject: xfs: streamline the directory iteration code for scrub Currently, online scrub reuses the xfs_readdir code to walk every entry in a directory. This isn't awesome for performance, since we end up cycling the directory ILOCK needlessly and coding around the particular quirks of the VFS dir_context interface. Create a streamlined version of readdir that keeps the ILOCK (since the walk function isn't going to copy stuff to userspace), skips a whole lot of directory walk cursor checks (since we start at 0 and walk to the end) and has a sane way to return error codes. Note: Porting the dotdot checking code is left for a subsequent patch. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/Makefile | 1 + fs/xfs/scrub/dir.c | 188 ++++++++----------------- fs/xfs/scrub/parent.c | 73 +++------- fs/xfs/scrub/readdir.c | 375 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/readdir.h | 19 +++ 5 files changed, 473 insertions(+), 183 deletions(-) create mode 100644 fs/xfs/scrub/readdir.c create mode 100644 fs/xfs/scrub/readdir.h (limited to 'fs/xfs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 3bdbc838c4d1..ac9d03cd2623 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -158,6 +158,7 @@ xfs-y += $(addprefix scrub/, \ ialloc.o \ inode.o \ parent.o \ + readdir.o \ refcount.o \ rmap.o \ scrub.o \ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 1b1830576dcd..f1cbe7b22688 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -18,6 +18,7 @@ #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/dabtree.h" +#include "scrub/readdir.h" /* Set us up to scrub directories. */ int @@ -31,30 +32,21 @@ xchk_setup_directory( /* Scrub a directory entry. */ -struct xchk_dir_ctx { - /* VFS fill-directory iterator */ - struct dir_context dir_iter; - - struct xfs_scrub *sc; -}; - -/* Check that an inode's mode matches a given DT_ type. */ +/* Check that an inode's mode matches a given XFS_DIR3_FT_* type. */ STATIC int xchk_dir_check_ftype( - struct xchk_dir_ctx *sdc, + struct xfs_scrub *sc, xfs_fileoff_t offset, xfs_ino_t inum, - int dtype) + int ftype) { - struct xfs_mount *mp = sdc->sc->mp; + struct xfs_mount *mp = sc->mp; struct xfs_inode *ip; - int ino_dtype; int error = 0; if (!xfs_has_ftype(mp)) { - if (dtype != DT_UNKNOWN && dtype != DT_DIR) - xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, - offset); + if (ftype != XFS_DIR3_FT_UNKNOWN && ftype != XFS_DIR3_FT_DIR) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); goto out; } @@ -71,21 +63,17 @@ xchk_dir_check_ftype( * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a * cross referencing error. Any other error is an operational error. */ - error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); + error = xfs_iget(mp, sc->tp, inum, 0, 0, &ip); if (error == -EINVAL || error == -ENOENT) { error = -EFSCORRUPTED; - xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, 0, &error); + xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); goto out; } - if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, - &error)) + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, offset, &error)) goto out; - /* Convert mode to the DT_* values that dir_emit uses. */ - ino_dtype = xfs_dir3_get_dtype(mp, - xfs_mode_to_ftype(VFS_I(ip)->i_mode)); - if (ino_dtype != dtype) - xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); xfs_irele(ip); out: return error; @@ -94,105 +82,85 @@ out: /* * Scrub a single directory entry. * - * We use the VFS directory iterator (i.e. readdir) to call this - * function for every directory entry in a directory. Once we're here, - * we check the inode number to make sure it's sane, then we check that - * we can look up this filename. Finally, we check the ftype. + * Check the inode number to make sure it's sane, then we check that we can + * look up this filename. Finally, we check the ftype. */ -STATIC bool +STATIC int xchk_dir_actor( - struct dir_context *dir_iter, - const char *name, - int namelen, - loff_t pos, - u64 ino, - unsigned type) + struct xfs_scrub *sc, + struct xfs_inode *dp, + xfs_dir2_dataptr_t dapos, + const struct xfs_name *name, + xfs_ino_t ino, + void *priv) { - struct xfs_mount *mp; - struct xfs_inode *ip; - struct xchk_dir_ctx *sdc; - struct xfs_name xname; + struct xfs_mount *mp = dp->i_mount; xfs_ino_t lookup_ino; xfs_dablk_t offset; bool checked_ftype = false; int error = 0; - sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter); - ip = sdc->sc->ip; - mp = ip->i_mount; offset = xfs_dir2_db_to_da(mp->m_dir_geo, - xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos)); + xfs_dir2_dataptr_to_db(mp->m_dir_geo, dapos)); - if (xchk_should_terminate(sdc->sc, &error)) - return !error; + if (xchk_should_terminate(sc, &error)) + return error; /* Does this inode number make sense? */ if (!xfs_verify_dir_ino(mp, ino)) { - xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); - goto out; + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + return -ECANCELED; } /* Does this name make sense? */ - if (!xfs_dir2_namecheck(name, namelen)) { - xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); - goto out; + if (!xfs_dir2_namecheck(name->name, name->len)) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + return -ECANCELED; } - if (!strncmp(".", name, namelen)) { + if (!strncmp(".", name->name, name->len)) { /* If this is "." then check that the inum matches the dir. */ - if (xfs_has_ftype(mp) && type != DT_DIR) - xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, - offset); + if (xfs_has_ftype(mp) && name->type != XFS_DIR3_FT_DIR) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); checked_ftype = true; - if (ino != ip->i_ino) - xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, - offset); - } else if (!strncmp("..", name, namelen)) { + if (ino != dp->i_ino) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + } else if (!strncmp("..", name->name, name->len)) { /* * If this is ".." in the root inode, check that the inum * matches this dir. */ - if (xfs_has_ftype(mp) && type != DT_DIR) - xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, - offset); + if (xfs_has_ftype(mp) && name->type != XFS_DIR3_FT_DIR) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); checked_ftype = true; - if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino) - xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, - offset); + if (dp->i_ino == mp->m_sb.sb_rootino && ino != dp->i_ino) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); } /* Verify that we can look up this name by hash. */ - xname.name = name; - xname.len = namelen; - xname.type = XFS_DIR3_FT_UNKNOWN; - - error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL); + error = xchk_dir_lookup(sc, dp, name, &lookup_ino); /* ENOENT means the hash lookup failed and the dir is corrupt */ if (error == -ENOENT) error = -EFSCORRUPTED; - if (!xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, - &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) goto out; if (lookup_ino != ino) { - xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); - goto out; + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + return -ECANCELED; } /* Verify the file type. This function absorbs error codes. */ if (!checked_ftype) { - error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type); + error = xchk_dir_check_ftype(sc, offset, lookup_ino, + name->type); if (error) goto out; } + out: - /* - * A negative error code returned here is supposed to cause the - * dir_emit caller (xfs_readdir) to abort the directory iteration - * and return zero to xchk_directory. - */ - if (error == 0 && sdc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - return false; - return !error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return -ECANCELED; + return error; } /* Scrub a directory btree record. */ @@ -808,14 +776,7 @@ int xchk_directory( struct xfs_scrub *sc) { - struct xchk_dir_ctx sdc = { - .dir_iter.actor = xchk_dir_actor, - .dir_iter.pos = 0, - .sc = sc, - }; - size_t bufsize; - loff_t oldpos; - int error = 0; + int error; if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) return -ENOENT; @@ -823,7 +784,7 @@ xchk_directory( /* Plausible size? */ if (sc->ip->i_disk_size < xfs_dir2_sf_hdr_size(0)) { xchk_ino_set_corrupt(sc, sc->ip->i_ino); - goto out; + return 0; } /* Check directory tree structure */ @@ -832,7 +793,7 @@ xchk_directory( return error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - return error; + return 0; /* Check the freespace. */ error = xchk_directory_blocks(sc); @@ -840,44 +801,11 @@ xchk_directory( return error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - return error; - - /* - * Check that every dirent we see can also be looked up by hash. - * Userspace usually asks for a 32k buffer, so we will too. - */ - bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, - sc->ip->i_disk_size); - - /* - * Look up every name in this directory by hash. - * - * Use the xfs_readdir function to call xchk_dir_actor on - * every directory entry in this directory. In _actor, we check - * the name, inode number, and ftype (if applicable) of the - * entry. xfs_readdir uses the VFS filldir functions to provide - * iteration context. - * - * The VFS grabs a read or write lock via i_rwsem before it reads - * or writes to a directory. If we've gotten this far we've - * already obtained IOLOCK_EXCL, which (since 4.10) is the same as - * getting a write lock on i_rwsem. Therefore, it is safe for us - * to drop the ILOCK here in order to reuse the _readdir and - * _dir_lookup routines, which do their own ILOCK locking. - */ - oldpos = 0; - sc->ilock_flags &= ~XFS_ILOCK_EXCL; - xfs_iunlock(sc->ip, XFS_ILOCK_EXCL); - while (true) { - error = xfs_readdir(sc->tp, sc->ip, &sdc.dir_iter, bufsize); - if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, - &error)) - goto out; - if (oldpos == sdc.dir_iter.pos) - break; - oldpos = sdc.dir_iter.pos; - } + return 0; -out: + /* Look up every name in this directory by hash. */ + error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, NULL); + if (error == -ECANCELED) + error = 0; return error; } diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index d1db18250ee3..af351c4ee6ec 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -16,6 +16,7 @@ #include "xfs_dir2_priv.h" #include "scrub/scrub.h" #include "scrub/common.h" +#include "scrub/readdir.h" /* Set us up to scrub parents. */ int @@ -30,39 +31,36 @@ xchk_setup_parent( /* Look for an entry in a parent pointing to this inode. */ struct xchk_parent_ctx { - struct dir_context dc; struct xfs_scrub *sc; - xfs_ino_t ino; xfs_nlink_t nlink; - bool cancelled; }; /* Look for a single entry in a directory pointing to an inode. */ -STATIC bool +STATIC int xchk_parent_actor( - struct dir_context *dc, - const char *name, - int namelen, - loff_t pos, - u64 ino, - unsigned type) + struct xfs_scrub *sc, + struct xfs_inode *dp, + xfs_dir2_dataptr_t dapos, + const struct xfs_name *name, + xfs_ino_t ino, + void *priv) { - struct xchk_parent_ctx *spc; + struct xchk_parent_ctx *spc = priv; int error = 0; - spc = container_of(dc, struct xchk_parent_ctx, dc); - if (spc->ino == ino) + /* Does this name make sense? */ + if (!xfs_dir2_namecheck(name->name, name->len)) + error = -EFSCORRUPTED; + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) + return error; + + if (sc->ip->i_ino == ino) spc->nlink++; - /* - * If we're facing a fatal signal, bail out. Store the cancellation - * status separately because the VFS readdir code squashes error codes - * into short directory reads. - */ if (xchk_should_terminate(spc->sc, &error)) - spc->cancelled = true; + return error; - return !error; + return 0; } /* Count the number of dentries in the parent dir that point to this inode. */ @@ -73,50 +71,19 @@ xchk_parent_count_parent_dentries( xfs_nlink_t *nlink) { struct xchk_parent_ctx spc = { - .dc.actor = xchk_parent_actor, - .ino = sc->ip->i_ino, .sc = sc, + .nlink = 0, }; - size_t bufsize; - loff_t oldpos; uint lock_mode; int error = 0; - /* - * If there are any blocks, read-ahead block 0 as we're almost - * certain to have the next operation be a read there. This is - * how we guarantee that the parent's extent map has been loaded, - * if there is one. - */ lock_mode = xfs_ilock_data_map_shared(parent); - if (parent->i_df.if_nextents > 0) - error = xfs_dir3_data_readahead(parent, 0, 0); + error = xchk_dir_walk(sc, parent, xchk_parent_actor, &spc); xfs_iunlock(parent, lock_mode); if (error) return error; - /* - * Iterate the parent dir to confirm that there is - * exactly one entry pointing back to the inode being - * scanned. - */ - bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, - parent->i_disk_size); - oldpos = 0; - while (true) { - error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); - if (error) - goto out; - if (spc.cancelled) { - error = -EAGAIN; - goto out; - } - if (oldpos == spc.dc.pos) - break; - oldpos = spc.dc.pos; - } *nlink = spc.nlink; -out: return error; } diff --git a/fs/xfs/scrub/readdir.c b/fs/xfs/scrub/readdir.c new file mode 100644 index 000000000000..e51c1544be63 --- /dev/null +++ b/fs/xfs/scrub/readdir.c @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2022-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_trace.h" +#include "xfs_bmap.h" +#include "xfs_trans.h" +#include "xfs_error.h" +#include "scrub/scrub.h" +#include "scrub/readdir.h" + +/* Call a function for every entry in a shortform directory. */ +STATIC int +xchk_dir_walk_sf( + struct xfs_scrub *sc, + struct xfs_inode *dp, + xchk_dirent_fn dirent_fn, + void *priv) +{ + struct xfs_name name = { + .name = ".", + .len = 1, + .type = XFS_DIR3_FT_DIR, + }; + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_geometry *geo = mp->m_dir_geo; + struct xfs_dir2_sf_entry *sfep; + struct xfs_dir2_sf_hdr *sfp; + xfs_ino_t ino; + xfs_dir2_dataptr_t dapos; + unsigned int i; + int error; + + ASSERT(dp->i_df.if_bytes == dp->i_disk_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + + sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data; + + /* dot entry */ + dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, + geo->data_entry_offset); + + error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv); + if (error) + return error; + + /* dotdot entry */ + dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, + geo->data_entry_offset + + xfs_dir2_data_entsize(mp, sizeof(".") - 1)); + ino = xfs_dir2_sf_get_parent_ino(sfp); + name.name = ".."; + name.len = 2; + + error = dirent_fn(sc, dp, dapos, &name, ino, priv); + if (error) + return error; + + /* iterate everything else */ + sfep = xfs_dir2_sf_firstentry(sfp); + for (i = 0; i < sfp->count; i++) { + dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, + xfs_dir2_sf_get_offset(sfep)); + ino = xfs_dir2_sf_get_ino(mp, sfp, sfep); + name.name = sfep->name; + name.len = sfep->namelen; + name.type = xfs_dir2_sf_get_ftype(mp, sfep); + + error = dirent_fn(sc, dp, dapos, &name, ino, priv); + if (error) + return error; + + sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); + } + + return 0; +} + +/* Call a function for every entry in a block directory. */ +STATIC int +xchk_dir_walk_block( + struct xfs_scrub *sc, + struct xfs_inode *dp, + xchk_dirent_fn dirent_fn, + void *priv) +{ + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_geometry *geo = mp->m_dir_geo; + struct xfs_buf *bp; + unsigned int off, next_off, end; + int error; + + error = xfs_dir3_block_read(sc->tp, dp, &bp); + if (error) + return error; + + /* Walk each directory entry. */ + end = xfs_dir3_data_end_offset(geo, bp->b_addr); + for (off = geo->data_entry_offset; off < end; off = next_off) { + struct xfs_name name = { }; + struct xfs_dir2_data_unused *dup = bp->b_addr + off; + struct xfs_dir2_data_entry *dep = bp->b_addr + off; + xfs_ino_t ino; + xfs_dir2_dataptr_t dapos; + + /* Skip an empty entry. */ + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + next_off = off + be16_to_cpu(dup->length); + continue; + } + + /* Otherwise, find the next entry and report it. */ + next_off = off + xfs_dir2_data_entsize(mp, dep->namelen); + if (next_off > end) + break; + + dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off); + ino = be64_to_cpu(dep->inumber); + name.name = dep->name; + name.len = dep->namelen; + name.type = xfs_dir2_data_get_ftype(mp, dep); + + error = dirent_fn(sc, dp, dapos, &name, ino, priv); + if (error) + break; + } + + xfs_trans_brelse(sc->tp, bp); + return error; +} + +/* Read a leaf-format directory buffer. */ +STATIC int +xchk_read_leaf_dir_buf( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_da_geometry *geo, + xfs_dir2_off_t *curoff, + struct xfs_buf **bpp) +{ + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec map; + struct xfs_ifork *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK); + xfs_dablk_t last_da; + xfs_dablk_t map_off; + xfs_dir2_off_t new_off; + + *bpp = NULL; + + /* + * Look for mapped directory blocks at or above the current offset. + * Truncate down to the nearest directory block to start the scanning + * operation. + */ + last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); + map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff)); + + if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map)) + return 0; + if (map.br_startoff >= last_da) + return 0; + xfs_trim_extent(&map, map_off, last_da - map_off); + + /* Read the directory block of that first mapping. */ + new_off = xfs_dir2_da_to_byte(geo, map.br_startoff); + if (new_off > *curoff) + *curoff = new_off; + + return xfs_dir3_data_read(tp, dp, map.br_startoff, 0, bpp); +} + +/* Call a function for every entry in a leaf directory. */ +STATIC int +xchk_dir_walk_leaf( + struct xfs_scrub *sc, + struct xfs_inode *dp, + xchk_dirent_fn dirent_fn, + void *priv) +{ + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_geometry *geo = mp->m_dir_geo; + struct xfs_buf *bp = NULL; + xfs_dir2_off_t curoff = 0; + unsigned int offset = 0; + int error; + + /* Iterate every directory offset in this directory. */ + while (curoff < XFS_DIR2_LEAF_OFFSET) { + struct xfs_name name = { }; + struct xfs_dir2_data_unused *dup; + struct xfs_dir2_data_entry *dep; + xfs_ino_t ino; + unsigned int length; + xfs_dir2_dataptr_t dapos; + + /* + * If we have no buffer, or we're off the end of the + * current buffer, need to get another one. + */ + if (!bp || offset >= geo->blksize) { + if (bp) { + xfs_trans_brelse(sc->tp, bp); + bp = NULL; + } + + error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff, + &bp); + if (error || !bp) + break; + + /* + * Find our position in the block. + */ + offset = geo->data_entry_offset; + curoff += geo->data_entry_offset; + } + + /* Skip an empty entry. */ + dup = bp->b_addr + offset; + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + length = be16_to_cpu(dup->length); + offset += length; + curoff += length; + continue; + } + + /* Otherwise, find the next entry and report it. */ + dep = bp->b_addr + offset; + length = xfs_dir2_data_entsize(mp, dep->namelen); + + dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; + ino = be64_to_cpu(dep->inumber); + name.name = dep->name; + name.len = dep->namelen; + name.type = xfs_dir2_data_get_ftype(mp, dep); + + error = dirent_fn(sc, dp, dapos, &name, ino, priv); + if (error) + break; + + /* Advance to the next entry. */ + offset += length; + curoff += length; + } + + if (bp) + xfs_trans_brelse(sc->tp, bp); + return error; +} + +/* + * Call a function for every entry in a directory. + * + * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. + */ +int +xchk_dir_walk( + struct xfs_scrub *sc, + struct xfs_inode *dp, + xchk_dirent_fn dirent_fn, + void *priv) +{ + struct xfs_da_args args = { + .dp = dp, + .geo = dp->i_mount->m_dir_geo, + .trans = sc->tp, + }; + bool isblock; + int error; + + if (xfs_is_shutdown(dp->i_mount)) + return -EIO; + + ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); + ASSERT(xfs_isilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) + return xchk_dir_walk_sf(sc, dp, dirent_fn, priv); + + /* dir2 functions require that the data fork is loaded */ + error = xfs_iread_extents(sc->tp, dp, XFS_DATA_FORK); + if (error) + return error; + + error = xfs_dir2_isblock(&args, &isblock); + if (error) + return error; + + if (isblock) + return xchk_dir_walk_block(sc, dp, dirent_fn, priv); + + return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv); +} + +/* + * Look up the inode number for an exact name in a directory. + * + * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. Names are not + * checked for correctness. + */ +int +xchk_dir_lookup( + struct xfs_scrub *sc, + struct xfs_inode *dp, + const struct xfs_name *name, + xfs_ino_t *ino) +{ + struct xfs_da_args args = { + .dp = dp, + .geo = dp->i_mount->m_dir_geo, + .trans = sc->tp, + .name = name->name, + .namelen = name->len, + .filetype = name->type, + .hashval = xfs_dir2_hashname(dp->i_mount, name), + .whichfork = XFS_DATA_FORK, + .op_flags = XFS_DA_OP_OKNOENT, + }; + bool isblock, isleaf; + int error; + + if (xfs_is_shutdown(dp->i_mount)) + return -EIO; + + ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); + ASSERT(xfs_isilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { + error = xfs_dir2_sf_lookup(&args); + goto out_check_rval; + } + + /* dir2 functions require that the data fork is loaded */ + error = xfs_iread_extents(sc->tp, dp, XFS_DATA_FORK); + if (error) + return error; + + error = xfs_dir2_isblock(&args, &isblock); + if (error) + return error; + + if (isblock) { + error = xfs_dir2_block_lookup(&args); + goto out_check_rval; + } + + error = xfs_dir2_isleaf(&args, &isleaf); + if (error) + return error; + + if (isleaf) { + error = xfs_dir2_leaf_lookup(&args); + goto out_check_rval; + } + + error = xfs_dir2_node_lookup(&args); + +out_check_rval: + if (error == -EEXIST) + error = 0; + if (!error) + *ino = args.inumber; + return error; +} diff --git a/fs/xfs/scrub/readdir.h b/fs/xfs/scrub/readdir.h new file mode 100644 index 000000000000..55787f4df123 --- /dev/null +++ b/fs/xfs/scrub/readdir.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2022-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_SCRUB_READDIR_H__ +#define __XFS_SCRUB_READDIR_H__ + +typedef int (*xchk_dirent_fn)(struct xfs_scrub *sc, struct xfs_inode *dp, + xfs_dir2_dataptr_t dapos, const struct xfs_name *name, + xfs_ino_t ino, void *priv); + +int xchk_dir_walk(struct xfs_scrub *sc, struct xfs_inode *dp, + xchk_dirent_fn dirent_fn, void *priv); + +int xchk_dir_lookup(struct xfs_scrub *sc, struct xfs_inode *dp, + const struct xfs_name *name, xfs_ino_t *ino); + +#endif /* __XFS_SCRUB_READDIR_H__ */ -- cgit v1.2.3 From d9a94480f978d5fbf1235a12a476f9f39a029ea5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 11 Apr 2023 19:00:17 -0700 Subject: xfs: xfs_iget in the directory scrubber needs to use UNTRUSTED In commit 4b80ac64450f, we tried to strengthen the directory scrubber by using the iget call to detect directory entries that point to unallocated inodes. Unfortunately, that commit neglected to pass XFS_IGET_UNTRUSTED to xfs_iget, so we don't check the inode btree first. If the inode number points to something that isn't even an inode cluster, iget will throw corruption errors and return -EFSCORRUPTED, which means that we fail to mark the directory corrupt. Fixes: 4b80ac64450f ("xfs: scrub should mark a directory corrupt if any entries cannot be iget'd") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index f1cbe7b22688..41f10e1c580c 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -57,13 +57,15 @@ xchk_dir_check_ftype( * eofblocks cleanup (which allocates what would be a nested * transaction), we can't use DONTCACHE here because DONTCACHE * inodes can trigger immediate inactive cleanup of the inode. + * Use UNTRUSTED here to check the allocation status of the inode in + * the inode btrees. * * If _iget returns -EINVAL or -ENOENT then the child inode number is * garbage and the directory is corrupt. If the _iget returns * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a * cross referencing error. Any other error is an operational error. */ - error = xfs_iget(mp, sc->tp, inum, 0, 0, &ip); + error = xfs_iget(mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, &ip); if (error == -EINVAL || error == -ENOENT) { error = -EFSCORRUPTED; xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); -- cgit v1.2.3 From 6bb9209ceebb07fd07cec25af04eed1809c654de Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 11 Apr 2023 19:00:18 -0700 Subject: xfs: always check the existence of a dirent's child inode When we're scrubbing directory entries, we always need to iget the child inode to make sure that the inode pointer points to a valid inode. The original directory scrub code (commit a5c4) only set us up to do this for ftype=1 filesystems, which is not sufficient; and then commit 4b80 made it worse by exempting the dot and dotdot entries. Sorta-fixes: a5c46e5e8912 ("xfs: scrub directory metadata") Sorta-fixes: 4b80ac64450f ("xfs: scrub should mark a directory corrupt if any entries cannot be iget'd") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/dir.c | 73 +++++++++++++++++++++--------------------------------- 1 file changed, 28 insertions(+), 45 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 41f10e1c580c..6404201d3d36 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -33,52 +33,23 @@ xchk_setup_directory( /* Scrub a directory entry. */ /* Check that an inode's mode matches a given XFS_DIR3_FT_* type. */ -STATIC int +STATIC void xchk_dir_check_ftype( struct xfs_scrub *sc, xfs_fileoff_t offset, - xfs_ino_t inum, + struct xfs_inode *ip, int ftype) { struct xfs_mount *mp = sc->mp; - struct xfs_inode *ip; - int error = 0; if (!xfs_has_ftype(mp)) { if (ftype != XFS_DIR3_FT_UNKNOWN && ftype != XFS_DIR3_FT_DIR) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); - goto out; - } - - /* - * Grab the inode pointed to by the dirent. We release the - * inode before we cancel the scrub transaction. Since we're - * don't know a priori that releasing the inode won't trigger - * eofblocks cleanup (which allocates what would be a nested - * transaction), we can't use DONTCACHE here because DONTCACHE - * inodes can trigger immediate inactive cleanup of the inode. - * Use UNTRUSTED here to check the allocation status of the inode in - * the inode btrees. - * - * If _iget returns -EINVAL or -ENOENT then the child inode number is - * garbage and the directory is corrupt. If the _iget returns - * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a - * cross referencing error. Any other error is an operational error. - */ - error = xfs_iget(mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, &ip); - if (error == -EINVAL || error == -ENOENT) { - error = -EFSCORRUPTED; - xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); - goto out; + return; } - if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, offset, &error)) - goto out; if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); - xfs_irele(ip); -out: - return error; } /* @@ -97,9 +68,9 @@ xchk_dir_actor( void *priv) { struct xfs_mount *mp = dp->i_mount; + struct xfs_inode *ip; xfs_ino_t lookup_ino; xfs_dablk_t offset; - bool checked_ftype = false; int error = 0; offset = xfs_dir2_db_to_da(mp->m_dir_geo, @@ -122,9 +93,6 @@ xchk_dir_actor( if (!strncmp(".", name->name, name->len)) { /* If this is "." then check that the inum matches the dir. */ - if (xfs_has_ftype(mp) && name->type != XFS_DIR3_FT_DIR) - xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); - checked_ftype = true; if (ino != dp->i_ino) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); } else if (!strncmp("..", name->name, name->len)) { @@ -132,9 +100,6 @@ xchk_dir_actor( * If this is ".." in the root inode, check that the inum * matches this dir. */ - if (xfs_has_ftype(mp) && name->type != XFS_DIR3_FT_DIR) - xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); - checked_ftype = true; if (dp->i_ino == mp->m_sb.sb_rootino && ino != dp->i_ino) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); } @@ -151,14 +116,32 @@ xchk_dir_actor( return -ECANCELED; } - /* Verify the file type. This function absorbs error codes. */ - if (!checked_ftype) { - error = xchk_dir_check_ftype(sc, offset, lookup_ino, - name->type); - if (error) - goto out; + /* + * Grab the inode pointed to by the dirent. We release the + * inode before we cancel the scrub transaction. Since we're + * don't know a priori that releasing the inode won't trigger + * eofblocks cleanup (which allocates what would be a nested + * transaction), we can't use DONTCACHE here because DONTCACHE + * inodes can trigger immediate inactive cleanup of the inode. + * Use UNTRUSTED here to check the allocation status of the inode in + * the inode btrees. + * + * If _iget returns -EINVAL or -ENOENT then the child inode number is + * garbage and the directory is corrupt. If the _iget returns + * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a + * cross referencing error. Any other error is an operational error. + */ + error = xfs_iget(mp, sc->tp, ino, XFS_IGET_UNTRUSTED, 0, &ip); + if (error == -EINVAL || error == -ENOENT) { + error = -EFSCORRUPTED; + xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); + goto out; } + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, offset, &error)) + goto out; + xchk_dir_check_ftype(sc, offset, ip, name->type); + xfs_irele(ip); out: if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return -ECANCELED; -- cgit v1.2.3