Skip to content

Commit

Permalink
Add vdev property to toggle adding io to vdev queue
Browse files Browse the repository at this point in the history
Added vdev property to toggle queueing io to vdev queue when
reading / writing from / to vdev. The intention behind this
property is to improve performance when using o_direct.

Signed-off-by: MigeljanImeri <[email protected]>
  • Loading branch information
MigeljanImeri committed Nov 13, 2024
1 parent 1c9a4c8 commit 118abc4
Show file tree
Hide file tree
Showing 14 changed files with 147 additions and 16 deletions.
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ typedef enum {
VDEV_PROP_TRIM_SUPPORT,
VDEV_PROP_TRIM_ERRORS,
VDEV_PROP_SLOW_IOS,
VDEV_PROP_QUEUE_IO,
VDEV_NUM_PROPS
} vdev_prop_t;

Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ struct vdev {
uint64_t vdev_io_t;
uint64_t vdev_slow_io_n;
uint64_t vdev_slow_io_t;
uint64_t vdev_queue_io;
};

#define VDEV_PAD_SIZE (8 << 10)
Expand Down
27 changes: 14 additions & 13 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,24 +209,25 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_TRYHARD (1ULL << 17)
#define ZIO_FLAG_OPTIONAL (1ULL << 18)
#define ZIO_FLAG_DIO_READ (1ULL << 19)
#define ZIO_FLAG_DIO_WRITE (1ULL << 20)
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)

/*
* Flags not inherited by any children.
*/
#define ZIO_FLAG_DONT_QUEUE (1ULL << 20) /* must be first for INHERIT */
#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 21)
#define ZIO_FLAG_IO_BYPASS (1ULL << 22)
#define ZIO_FLAG_IO_REWRITE (1ULL << 23)
#define ZIO_FLAG_RAW_COMPRESS (1ULL << 24)
#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 25)
#define ZIO_FLAG_GANG_CHILD (1ULL << 26)
#define ZIO_FLAG_DDT_CHILD (1ULL << 27)
#define ZIO_FLAG_GODFATHER (1ULL << 28)
#define ZIO_FLAG_NOPWRITE (1ULL << 29)
#define ZIO_FLAG_REEXECUTED (1ULL << 30)
#define ZIO_FLAG_DELEGATED (1ULL << 31)
#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32)
#define ZIO_FLAG_DONT_QUEUE (1ULL << 21) /* must be first for INHERIT */
#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 22)
#define ZIO_FLAG_IO_BYPASS (1ULL << 23)
#define ZIO_FLAG_IO_REWRITE (1ULL << 24)
#define ZIO_FLAG_RAW_COMPRESS (1ULL << 25)
#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 26)
#define ZIO_FLAG_GANG_CHILD (1ULL << 27)
#define ZIO_FLAG_DDT_CHILD (1ULL << 28)
#define ZIO_FLAG_GODFATHER (1ULL << 29)
#define ZIO_FLAG_NOPWRITE (1ULL << 30)
#define ZIO_FLAG_REEXECUTED (1ULL << 31)
#define ZIO_FLAG_DELEGATED (1ULL << 32)
#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 33)

#define ZIO_ALLOCATOR_NONE (-1)
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
Expand Down
3 changes: 2 additions & 1 deletion lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -5916,7 +5916,8 @@
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
<enumerator name='VDEV_NUM_PROPS' value='52'/>
<enumerator name='VDEV_PROP_QUEUE_IO' value='52'/>
<enumerator name='VDEV_NUM_PROPS' value='53'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
Expand Down
3 changes: 3 additions & 0 deletions man/man7/vdevprops.7
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ If this device should perform new allocations, used to disable a device
when it is scheduled for later removal.
See
.Xr zpool-remove 8 .
.It Sy queue_io
Add io to the vdev queue when reading or writing to this vdev.
Disabling this property can sometimes improve performance for direct IOs.
.El
.Ss User Properties
In addition to the standard native properties, ZFS supports arbitrary user
Expand Down
1 change: 1 addition & 0 deletions module/zcommon/zfs_valstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag,
{ '.', "TH", "TRYHARD" },
{ '.', "OP", "OPTIONAL" },
{ '.', "RD", "DIO_READ" },
{ '.', "WD", "DIO_WRITE" },
{ '.', "DQ", "DONT_QUEUE" },
{ '.', "DP", "DONT_PROPAGATE" },
{ '.', "BY", "IO_BYPASS" },
Expand Down
3 changes: 3 additions & 0 deletions module/zcommon/zpool_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,9 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_TRIM_SUPPORT, "trim_support", 0,
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "TRIMSUP",
boolean_table, sfeatures);
zprop_register_index(VDEV_PROP_QUEUE_IO, "queue_io", 1,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "QUEUE_IO",
boolean_table, sfeatures);

/* default index properties */
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
Expand Down
3 changes: 2 additions & 1 deletion module/zfs/dmu_direct.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx)
zio_t *zio = zio_write(pio, os->os_spa, txg, bp, data,
db->db.db_size, db->db.db_size, &zp,
dmu_write_direct_ready, NULL, dmu_write_direct_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb);
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_DIO_WRITE,
&zb);

if (pio == NULL)
return (zio_wait(zio));
Expand Down
12 changes: 12 additions & 0 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);

vd->vdev_queue_io = vdev_prop_default_numeric(VDEV_PROP_QUEUE_IO);

list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node);
list_link_init(&vd->vdev_initialize_node);
Expand Down Expand Up @@ -6053,6 +6055,15 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_slow_io_t = intval;
break;
case VDEV_PROP_QUEUE_IO:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
if (vd->vdev_ops->vdev_op_leaf) {
vd->vdev_queue_io = intval;
}
break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
Expand Down Expand Up @@ -6416,6 +6427,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
case VDEV_PROP_IO_T:
case VDEV_PROP_SLOW_IO_N:
case VDEV_PROP_SLOW_IO_T:
case VDEV_PROP_QUEUE_IO:
err = vdev_prop_get_int(vd, prop, &intval);
if (err && err != ENOENT)
break;
Expand Down
10 changes: 10 additions & 0 deletions module/zfs/vdev_queue.c
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,12 @@ vdev_queue_io(zio_t *zio)
zio->io_flags |= ZIO_FLAG_DONT_QUEUE;
zio->io_timestamp = gethrtime();

if (!zio->io_vd->vdev_queue_io &&
zio->io_flags & (ZIO_FLAG_DIO_READ | ZIO_FLAG_DIO_WRITE)) {
zio->io_queue_state = ZIO_QS_NONE;
return (zio);
}

mutex_enter(&vq->vq_lock);
vdev_queue_io_add(vq, zio);
nio = vdev_queue_io_to_issue(vq);
Expand Down Expand Up @@ -978,6 +984,10 @@ vdev_queue_io_done(zio_t *zio)
vq->vq_io_complete_ts = now;
vq->vq_io_delta_ts = zio->io_delta = now - zio->io_timestamp;

if (zio->io_queue_state == ZIO_QS_NONE) {
return;
}

mutex_enter(&vq->vq_lock);
vdev_queue_pending_remove(vq, zio);

Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ tags = ['functional', 'cli_root', 'zpool_scrub']
[tests/functional/cli_root/zpool_set]
tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg',
'zpool_set_ashift', 'zpool_set_features', 'vdev_set_001_pos',
'user_property_001_pos', 'user_property_002_neg']
'user_property_001_pos', 'user_property_002_neg', 'vdev_set_queue_io']
tags = ['functional', 'cli_root', 'zpool_set']

[tests/functional/cli_root/zpool_split]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_set/setup.ksh \
functional/cli_root/zpool/setup.ksh \
functional/cli_root/zpool_set/vdev_set_001_pos.ksh \
functional/cli_root/zpool_set/vdev_set_queue_io.ksh \
functional/cli_root/zpool_set/zpool_set_common.kshlib \
functional/cli_root/zpool_set/zpool_set_001_pos.ksh \
functional/cli_root/zpool_set/zpool_set_002_neg.ksh \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,5 @@ typeset -a properties=(
trim_support
trim_errors
slow_ios
queue_io
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2024 by Triad National Security, LLC.
#

. $STF_SUITE/include/libtest.shlib

#
# DESCRIPTION:
# Toggling vdev queue_io property while reading from vdev should not cause panic.
#
# STRATEGY:
# 1. Create a zpool
# 2. Write a file to the pool.
# 3. Start reading from file, while also toggling the queue_io property on / off.
#

verify_runnable "global"

command -v fio > /dev/null || log_unsupported "fio missing"
log_must save_tunable DIO_ENABLED
log_must set_tunable32 DIO_ENABLED 1

function toggle_queue_io
{
zpool set queue_io=off $TESTPOOL1 $FILEDEV
sleep 0.1
zpool set queue_io=on $TESTPOOL1 $FILEDEV
sleep 0.1
}

function cleanup
{
log_must destroy_pool $TESTPOOL1
rm -f $FILEDEV
log_must restore_tunable DIO_ENABLED
}

log_assert "Toggling vdev queue_io property while reading from vdev should not cause panic"
log_onexit cleanup

# 1. Create a pool

FILEDEV="$TEST_BASE_DIR/filedev.$$"
log_must truncate -s $(($MINVDEVSIZE * 2)) $FILEDEV
log_must create_pool $TESTPOOL1 $FILEDEV

mntpnt=$(get_prop mountpoint $TESTPOOL1)

# 2. Write a file to the pool, while also toggling the queue_io property on / off.

log_must eval "fio --filename=$mntpnt/foobar --name=write-file \
--rw=write --size=$MINVDEVSIZE --bs=128k --numjobs=1 --direct=1 \
--ioengine=sync --runtime=10 &"

ITERATIONS=30

for i in $(seq $ITERATIONS); do
log_must toggle_queue_io
done;
wait

# 3. Starting reading from file, while also toggling the queue_io property on / off.

log_must eval "fio --filename=$mntpnt/foobar --name=read-file \
--rw=read --size=$MINVDEVSIZE --bs=128k --numjobs=1 --direct=1 \
--ioengine=sync --time_based --runtime=10 &"

for i in $(seq $ITERATIONS); do
log_must toggle_queue_io
done;
wait

log_pass "Toggling vdev queue_io property while reading from vdev does not cause panic"

0 comments on commit 118abc4

Please sign in to comment.