Skip to content

Commit

Permalink
Consider wide unicode characters to be size of 2 (#1237)
Browse files Browse the repository at this point in the history
closes #260
  • Loading branch information
belav authored May 3, 2024
1 parent c0f15ea commit ed295a5
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 9 deletions.
24 changes: 24 additions & 0 deletions Src/CSharpier.Tests/CodeFormatterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace CSharpier.Tests;

using CSharpier.Utilities;

// TODO xml move these around
[TestFixture]
[Parallelizable(ParallelScope.All)]
Expand Down Expand Up @@ -62,6 +64,28 @@ public void Format_Should_Use_Width()
result.Code.Should().Be("var someVariable =\n someValue;\n");
}

[Test]
public void Format_Should_Measure_Regular_Characters()
{
var code = """
var x = "123456";
""";
var result = CodeFormatter.Format(code, new CodeFormatterOptions { Width = 20 });

result.Code.Should().Be("var x = \"123456\";\n");
}

[Test]
public void Format_Should_Measure_Wide_Characters()
{
var code = """
var x = "가가가가가가";
""";
var result = CodeFormatter.Format(code, new CodeFormatterOptions { Width = 20 });

result.Code.Should().Be("var x =\n \"가가가가가가\";\n");
}

[Test]
public void Format_Should_Use_IndentStyle()
{
Expand Down
33 changes: 33 additions & 0 deletions Src/CSharpier/Utilities/CharacterSizeCalculator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/* Copyright (c) Microsoft Corporation.
* Under MIT License
* From https://github.com/PowerShell/PowerShell/tree/master
*/

namespace CSharpier.Utilities;

internal static class CharacterSizeCalculator
{
// csharpier-ignore
public static int CalculateWidth(char c)
{
// The following is based on http://www.cl.cam.ac.uk/~mgk25/c/wcwidth.c
// which is derived from https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
var isWide = c >= 0x1100 &&
(c <= 0x115f || /* Hangul Jamo init. consonants */
c == 0x2329 || c == 0x232a ||
((uint)(c - 0x2e80) <= (0xa4cf - 0x2e80) &&
c != 0x303f) || /* CJK ... Yi */
((uint)(c - 0xac00) <= (0xd7a3 - 0xac00)) || /* Hangul Syllables */
((uint)(c - 0xf900) <= (0xfaff - 0xf900)) || /* CJK Compatibility Ideographs */
((uint)(c - 0xfe10) <= (0xfe19 - 0xfe10)) || /* Vertical forms */
((uint)(c - 0xfe30) <= (0xfe6f - 0xfe30)) || /* CJK Compatibility Forms */
((uint)(c - 0xff00) <= (0xff60 - 0xff00)) || /* Fullwidth Forms */
((uint)(c - 0xffe0) <= (0xffe6 - 0xffe0)));

// We can ignore these ranges because .Net strings use surrogate pairs
// for this range and we do not handle surrogate pairs.
// (c >= 0x20000 && c <= 0x2fffd) ||
// (c >= 0x30000 && c <= 0x3fffd)
return isWide ? 2 : 1;
}
}
11 changes: 2 additions & 9 deletions Src/CSharpier/Utilities/StringExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,6 @@ namespace CSharpier.Utilities;

internal static class StringExtensions
{
public static string CalculateHash(this string value)
{
using var hasher = MD5.Create();
var hashedBytes = hasher.ComputeHash(Encoding.UTF8.GetBytes(value));
return BitConverter.ToString(hashedBytes).Replace("-", string.Empty).ToLower();
}

public static bool EqualsIgnoreCase(this string value, string otherValue)
{
return string.Compare(value, otherValue, StringComparison.OrdinalIgnoreCase) == 0;
Expand All @@ -37,10 +30,10 @@ public static bool IsBlank(this string? value)
return value == null || string.IsNullOrEmpty(value.Trim());
}

// this will eventually deal with the visual width not being the same as the code width https://github.com/belav/csharpier/issues/260
// some unicode characters should be considered size of 2 when calculating how big this string will be when printed
public static int GetPrintedWidth(this string value)
{
return value.Length;
return value.Sum(CharacterSizeCalculator.CalculateWidth);
}

public static int CalculateCurrentLeadingIndentation(this string line, int indentSize)
Expand Down

0 comments on commit ed295a5

Please sign in to comment.