Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For discussion: "slack" compression option #15215

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/dbufstat.in
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,8 @@ def get_compstring(c):
"ZIO_COMPRESS_GZIP_6", "ZIO_COMPRESS_GZIP_7",
"ZIO_COMPRESS_GZIP_8", "ZIO_COMPRESS_GZIP_9",
"ZIO_COMPRESS_ZLE", "ZIO_COMPRESS_LZ4",
"ZIO_COMPRESS_ZSTD", "ZIO_COMPRESS_FUNCTION"]
"ZIO_COMPRESS_ZSTD", "ZIO_COMPRESS_SLACK",
"ZIO_COMPRESS_FUNCTION"]

# If "-rr" option is used, don't convert to string representation
if raw > 1:
Expand Down
6 changes: 4 additions & 2 deletions cmd/zstream/zstream_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,16 @@ zstream_do_decompress(int argc, char *argv[])
type = ZIO_COMPRESS_LZJB;
else if (0 == strcmp("gzip", argv[i]))
type = ZIO_COMPRESS_GZIP_1;
else if (0 == strcmp("slack", argv[i]))
type = ZIO_COMPRESS_SLACK;
else if (0 == strcmp("zle", argv[i]))
type = ZIO_COMPRESS_ZLE;
else if (0 == strcmp("zstd", argv[i]))
type = ZIO_COMPRESS_ZSTD;
else {
fprintf(stderr, "Invalid compression type %s.\n"
"Supported types are off, lz4, lzjb, gzip, "
"zle, and zstd\n",
"Supported types are off, lz4, lzjb, "
"gzip, slack, zle, and zstd\n",
argv[i]);
exit(2);
}
Expand Down
18 changes: 15 additions & 3 deletions include/sys/abd.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,22 @@
/*
* ABD operations
*/
typedef enum {
ABD_ITER_REVERSE = (1 << 0)
} abd_iter_flags_t;

#define ABD_ITER_FLAGS_MASK (ABD_ITER_REVERSE)

int abd_iterate_func_flags(abd_t *, size_t, size_t, abd_iter_func_t *, void *,
abd_iter_flags_t);
#define abd_iterate_func(abd, off, len, fn, priv) \

Check failure on line 123 in include/sys/abd.h

View workflow job for this annotation

GitHub Actions / checkstyle

#define followed by space instead of tab
abd_iterate_func_flags(abd, off, len, fn, priv, 0)

int abd_iterate_func2_flags(abd_t *, abd_t *, size_t, size_t, size_t,
abd_iter_func2_t *, void *, abd_iter_flags_t);
#define abd_iterate_func2(sabd, dabd, soff, doff, len, fn, priv) \

Check failure on line 128 in include/sys/abd.h

View workflow job for this annotation

GitHub Actions / checkstyle

#define followed by space instead of tab
abd_iterate_func2_flags(sabd, dabd, soff, doff, len, fn, priv, 0)

int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
abd_iter_func2_t *, void *);
void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
Expand Down
10 changes: 9 additions & 1 deletion include/sys/zio_compress.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ enum zio_compress {
ZIO_COMPRESS_ZLE,
ZIO_COMPRESS_LZ4,
ZIO_COMPRESS_ZSTD,
ZIO_COMPRESS_SLACK,
ZIO_COMPRESS_FUNCTIONS
};

Expand Down Expand Up @@ -119,6 +120,9 @@ enum zio_zstd_levels {
ZIO_ZSTD_LEVEL_LEVELS
};

/* True if compressor will reuse the source buffer */
#define ZIO_COMPRESS_INPLACE(c) (c == ZIO_COMPRESS_SLACK)

/* Forward Declaration to avoid visibility problems */
struct zio_prop;

Expand Down Expand Up @@ -170,11 +174,15 @@ extern size_t zfs_lz4_compress(abd_t *src, abd_t *dst, size_t s_len,
size_t d_len, int level);
extern int zfs_lz4_decompress(abd_t *src, abd_t *dst, size_t s_len,
size_t d_len, int level);
extern size_t zfs_slack_compress(abd_t *src, abd_t *dst, size_t s_len,
size_t d_len, int level);
extern int zfs_slack_decompress(abd_t *src, abd_t *dst, size_t s_len,
size_t d_len, int level);

/*
* Compress and decompress data if necessary.
*/
extern size_t zio_compress_data(enum zio_compress c, abd_t *src, abd_t **dst,
extern size_t zio_compress_data(enum zio_compress c, abd_t *src, abd_t **dstp,
size_t s_len, size_t d_len, uint8_t level);
extern int zio_decompress_data(enum zio_compress c, abd_t *src, abd_t *abd,
size_t s_len, size_t d_len, uint8_t *level);
Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ typedef enum spa_feature {
SPA_FEATURE_FAST_DEDUP,
SPA_FEATURE_LONGNAME,
SPA_FEATURE_LARGE_MICROZAP,
SPA_FEATURE_SLACK_COMPRESS,
SPA_FEATURES
} spa_feature_t;

Expand Down
7 changes: 4 additions & 3 deletions lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@
<elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='2464' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='2520' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
Expand Down Expand Up @@ -6195,7 +6195,8 @@
<enumerator name='SPA_FEATURE_FAST_DEDUP' value='41'/>
<enumerator name='SPA_FEATURE_LONGNAME' value='42'/>
<enumerator name='SPA_FEATURE_LARGE_MICROZAP' value='43'/>
<enumerator name='SPA_FEATURES' value='44'/>
<enumerator name='SPA_FEATURE_SLACK_COMPRESS' value='44'/>
<enumerator name='SPA_FEATURES' value='45'/>
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/>
Expand Down Expand Up @@ -9375,7 +9376,7 @@
</abi-instr>
<abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='19712' id='fd4573e5'>
<subrange length='44' type-id='7359adad' id='cf8ba455'/>
<subrange length='45' type-id='7359adad' id='cf8ba455'/>
</array-type-def>
<enum-decl name='zfeature_flags' id='6db816a4'>
<underlying-type type-id='9cac1fee'/>
Expand Down
1 change: 1 addition & 0 deletions lib/libzpool/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ nodist_libzpool_la_SOURCES = \
module/zfs/rrwlock.c \
module/zfs/sa.c \
module/zfs/sha2_zfs.c \
module/zfs/slack.c \
module/zfs/skein_zfs.c \
module/zfs/spa.c \
module/zfs/spa_checkpoint.c \
Expand Down
10 changes: 8 additions & 2 deletions man/man7/zfsprops.7
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@
.\" Copyright 2019 Joyent, Inc.
.\" Copyright (c) 2019, Kjeld Schouten-Lebbing
.\" Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
.\" Copyright (c) 2024, Klara, Inc.
.\"
.Dd June 29, 2024
.Dd November 18, 2024
.Dt ZFSPROPS 7
.Os
.
Expand Down Expand Up @@ -796,7 +797,7 @@ Changing this property affects only newly-written data.
.It Xo
.Sy compression Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy gzip Ns | Ns
.Sy gzip- Ns Ar N Ns | Ns Sy lz4 Ns | Ns Sy lzjb Ns | Ns Sy zle Ns | Ns Sy zstd Ns | Ns
.Sy zstd- Ns Ar N Ns | Ns Sy zstd-fast Ns | Ns Sy zstd-fast- Ns Ar N
.Sy zstd- Ns Ar N Ns | Ns Sy zstd-fast Ns | Ns Sy zstd-fast- Ns | Ns Sy slack Ns Ar N
.Xc
Controls the compression algorithm used for this dataset.
.Pp
Expand Down Expand Up @@ -906,6 +907,11 @@ The
.Sy zle
compression algorithm compresses runs of zeros.
.Pp
The
.Sy slack
compression algorithm removes runs of zeroes from the end of blocks.
It is useful when using very large block sizes with incompressible data.
.Pp
This property can also be referred to by its shortened column name
.Sy compress .
Changing this property affects only newly-written data.
Expand Down
37 changes: 36 additions & 1 deletion man/man7/zpool-features.7
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
.\" Copyright (c) 2019, Allan Jude
.\" Copyright (c) 2021, Colm Buckley <[email protected]>
.\"
.Dd October 2, 2024
.Dd November 18, 2024
.Dt ZPOOL-FEATURES 7
.Os
.
Expand Down Expand Up @@ -932,6 +932,41 @@ preventing hash collision attacks on systems with dedup.
.Pp
.checksum-spiel skein
.
.feature com.klarasystems slack_compress no extensible_dataset
.Sy slack
is a compression option that simply removes the trailing run of zero bytes at
the end of each block.
It is designed for when very large block sizes are used with to store largely
incompressible data.
Normally in this scenario
.Sy compress Ns = Ns Sy none
would be used,
but with large block sizes that can often leave a multi-megabyte run of zeroes
at the end of a block, adding memory and checksumming overhead.
.Pp
When the
.Sy slack_compress
feature is set to
.Sy enabled ,
the administrator can turn on
.Sy slack
compression of any dataset using
.Nm zfs Cm set Sy compress Ns = Ns Sy slack Ar dset
.Po see Xr zfs-set 8 Pc .
This feature becomes
.Sy active
once a
.Sy compress
property has been set to
.Sy slack ,
and will return to being
.Sy enabled
once all filesystems that have ever had their
.Sy compress
property set to
.Sy slack
are destroyed.
.
.feature com.delphix spacemap_histogram yes
This features allows ZFS to maintain more information about how free space
is organized within the pool.
Expand Down
1 change: 1 addition & 0 deletions man/man8/zstream.8
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Valid compression types include
.Sy gzip ,
.Sy lz4 ,
.Sy lzjb ,
.Sy slack ,
.Sy zstd ,
and
.Sy zle .
Expand Down
1 change: 1 addition & 0 deletions module/Kbuild.in
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ ZFS_OBJS := \
sa.o \
sha2_zfs.o \
skein_zfs.o \
slack.o \
spa.o \
spa_checkpoint.o \
spa_config.o \
Expand Down
1 change: 1 addition & 0 deletions module/Makefile.bsd
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ SRCS+= abd.c \
sa.c \
sha2_zfs.c \
skein_zfs.c \
slack.c \
spa.c \
space_map.c \
space_reftree.c \
Expand Down
14 changes: 14 additions & 0 deletions module/zcommon/zfeature_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,20 @@ zpool_feature_init(void)
ZFEATURE_TYPE_BOOLEAN, large_microzap_deps, sfeatures);
}

{
{
static const spa_feature_t slack_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_SLACK_COMPRESS,
"com.klarasystems:slack_compress", "slack_compress",
"slack compression support",
ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_BOOLEAN,
slack_deps, sfeatures);
}
}

zfs_mod_list_supported_free(sfeatures);
}

Expand Down
1 change: 1 addition & 0 deletions module/zcommon/zfs_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ zfs_prop_init(void)
{ "gzip-9", ZIO_COMPRESS_GZIP_9 },
{ "zle", ZIO_COMPRESS_ZLE },
{ "lz4", ZIO_COMPRESS_LZ4 },
{ "slack", ZIO_COMPRESS_SLACK },
{ "zstd", ZIO_COMPRESS_ZSTD },
{ "zstd-fast",
ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_DEFAULT) },
Expand Down
67 changes: 62 additions & 5 deletions module/zfs/abd.c
Original file line number Diff line number Diff line change
Expand Up @@ -757,19 +757,72 @@ abd_advance_abd_iter(abd_t *abd, abd_t *cabd, struct abd_iter *aiter,
return (cabd);
}

int
abd_iterate_func(abd_t *abd, size_t off, size_t size,
abd_iter_func_t *func, void *private)
static int
abd_iterate_func_reverse_flags(abd_t *abd, size_t off, size_t size,
abd_iter_func_t *func, void *private, abd_iter_flags_t flags)
{
ASSERT0(flags & ~ABD_ITER_FLAGS_MASK);
ASSERT(flags & ABD_ITER_REVERSE);

struct abd_iter aiter;
int ret = 0;

abd_t *c_abd;
while (size > 0) {
/*
* XXX in the forward iterator, this is abd_advance_abd_iter().
* going backwards directly is awkward, so for now we just
* reinitialise the iterator to the wanted position. that
* itself is a forward walk, but to truly go backwards
* would require backpointers in gang abds too. this is
* fine for now. -- robn, 2024-11-19
*/
c_abd = abd_init_abd_iter(abd, &aiter, off + size - 1);
IMPLY(abd_is_gang(abd), c_abd != NULL);

abd_iter_map(&aiter);

/*
* XXX abd_iter_map() set iter_mapaddr =
* page addr + iter_offset, so we need to adjust back
*/
void *addr = aiter.iter_mapaddr - aiter.iter_offset;
size_t len = aiter.iter_offset + 1;

ASSERT3U(len, >, 0);

ret = func(addr, len, private);

abd_iter_unmap(&aiter);

if (ret != 0)
break;

size -= len;
}

return (ret);
}

int
abd_iterate_func_flags(abd_t *abd, size_t off, size_t size,
abd_iter_func_t *func, void *private, abd_iter_flags_t flags)
{
ASSERT0(flags & ~ABD_ITER_FLAGS_MASK);

if (size == 0)
return (0);

abd_verify(abd);
ASSERT3U(off + size, <=, abd->abd_size);

if (flags & ABD_ITER_REVERSE)
return (abd_iterate_func_reverse_flags(abd, off, size, func,
private, flags));

struct abd_iter aiter;
int ret = 0;

abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);

while (size > 0) {
Expand Down Expand Up @@ -932,13 +985,17 @@ abd_zero_off(abd_t *abd, size_t off, size_t size)
* times during this iteration.
*/
int
abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
size_t size, abd_iter_func2_t *func, void *private)
abd_iterate_func2_flags(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
size_t size, abd_iter_func2_t *func, void *private, abd_iter_flags_t flags)
{
(void) flags;
int ret = 0;
struct abd_iter daiter, saiter;
abd_t *c_dabd, *c_sabd;

ASSERT0(flags & ~ABD_ITER_FLAGS_MASK);
ASSERT0(flags & ABD_ITER_REVERSE); /* XXX no support for reverse yet */

if (size == 0)
return (0);

Expand Down
5 changes: 4 additions & 1 deletion module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1785,11 +1785,12 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
*/
if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
!HDR_COMPRESSION_ENABLED(hdr)) {
abd = NULL;
abd = abd_alloc_sametype(hdr->b_l1hdr.b_pabd, lsize);
csize = zio_compress_data(HDR_GET_COMPRESS(hdr),
hdr->b_l1hdr.b_pabd, &abd, lsize, MIN(lsize, psize),
hdr->b_complevel);
if (csize >= lsize || csize > psize) {
abd_free(abd);
ret = SET_ERROR(EIO);
return (ret);
}
Expand Down Expand Up @@ -10537,6 +10538,8 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)

/* a log block is never entirely zero */
ASSERT(psize != 0);
/* LZ4 compress will always allocate a buffer */
ASSERT3P(abd, !=, NULL);
asize = vdev_psize_to_asize(dev->l2ad_vdev, psize);
ASSERT(asize <= sizeof (*lb));

Expand Down
Loading
Loading