Skip to content

Commit

Permalink
feat: support case-insensitive lookups (fixes gh #232)
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Nov 18, 2024
1 parent 4551ec4 commit d92a0c1
Show file tree
Hide file tree
Showing 12 changed files with 3,525 additions and 9 deletions.
38 changes: 38 additions & 0 deletions .maintainer-scripts/unicode-case-fold.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/perl -w
use strict;
use warnings;

# Input data: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt

my %map;

while (<>) {
chomp;
next if /^(#|\s*$)/;
my($char, $status, $fold, $comment) = split /\s*;\s*/;
if ($status =~ /^[CS]$/) {
$comment =~ s/^#\s*//;
# print " case 0x$char: return 0x$fold; // [$status] $comment\n";
$map{hex($char)} = hex($fold);
}
}

my @valid_code_points = (0..0xD7FF, 0xE000..0x10FFFF);

sub cp_to_str {
my $cp = shift;
my $fmt = $cp < 0x10000 ? "\\u%04X" : "\\U%08X";
return sprintf $fmt, $cp;
}

while (@valid_code_points) {
my @cps = splice @valid_code_points, 0, 256;
my $orig;
my $folded;
for my $cp (@cps) {
my $fold = $map{$cp} // $cp;
$orig .= cp_to_str($cp);
$folded .= cp_to_str($fold);
}
print " {u8\"$orig\"sv, u8\"$folded\"sv},\n" if $orig ne $folded;
}
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ if(WITH_TESTS)
pcmaudio_categorizer_test
speedometer_test
terminal_test
unicode_test
utils_test
file_utils_test
worker_group_test
Expand Down
1 change: 1 addition & 0 deletions cmake/libdwarfs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ add_library(
src/internal/fs_section.cpp
src/internal/glob_to_regex.cpp
src/internal/string_table.cpp
src/internal/unicode_case_folding.cpp
src/internal/wcwidth.c
src/internal/worker_group.cpp

Expand Down
11 changes: 11 additions & 0 deletions doc/dwarfs.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,17 @@ options:
overlays and want the file system to reflect its read-only
state, you can set this option.

- `-o case_insensitive`:
Perform case-insensitive lookups in the mounted file system,
i.e. an entry orignally named `ReadMe.txt` can be accessed as
`readme.txt`, `README.TXT`, or `rEaDmE.tXt`. This works across
all platforms. When mounting a file system with many files, this
may be slightly slower and consume slightly more memory as case-
insensitive lookup requires an additional mapping table that is
built on-demand. Note that this is not supported if the file
system contains directories with entries that only differ in
case.

- `-o (no_)cache_image`:
By default, `dwarfs` tries to ensure that the compressed file
system image will not be cached by the kernel (i.e. the default
Expand Down
32 changes: 32 additions & 0 deletions include/dwarfs/internal/unicode_case_folding.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz ([email protected])
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/

#pragma once

#include <string>
#include <string_view>

namespace dwarfs::internal {

std::string utf8_case_fold(std::string_view in);
std::string utf8_case_fold_unchecked(std::string_view in);

} // namespace dwarfs::internal
1 change: 1 addition & 0 deletions include/dwarfs/reader/metadata_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct metadata_options {
bool enable_nlink{false};
bool readonly{false};
bool check_consistency{false};
bool case_insensitive_lookup{false};
size_t block_size{512};
std::optional<file_stat::uid_type> fs_uid{};
std::optional<file_stat::gid_type> fs_gid{};
Expand Down
Loading

0 comments on commit d92a0c1

Please sign in to comment.