Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'UTF8Array' to the standard library #1062

Merged
merged 9 commits into from
Oct 5, 2023
8 changes: 4 additions & 4 deletions Library/Hylo/Core/CollectionOfOne.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ public type CollectionOfOne<Element: Movable & Deinitializable>: Deinitializable
&self.contents = contents
}

public fun start_index() -> Bool { true }
public fun start_index() -> Bool { false }

public fun end_index() -> Bool { false }
public fun end_index() -> Bool { true }

public fun index(after i: Bool) -> Bool { false }
public fun index(after i: Bool) -> Bool { true }

public subscript(_ position: Bool): Element {
let {
// TODO: uncomment when #1046 is implemented
// precondition(position, "index is out of bounds")
// precondition(!position, "index is out of bounds")
yield contents
}
}
Expand Down
3 changes: 3 additions & 0 deletions Library/Hylo/Core/Numbers/Integers/FixedWidthInteger.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
/// catch overflows, or access the minimum or maximum representable values of an integer type.
public trait FixedWidthInteger: BinaryInteger {

/// Returns `true` if the bits set in `mask` are also set in `self`.
fun matches(_ mask: Self) -> Bool

/// Returns the sum of `self` and `other` along with a flag indicating whether overflow occurred
/// in the operation.
fun adding_reporting_overflow(_ other: Self) -> {partial_value: Self, overflow: Bool}
Expand Down
4 changes: 4 additions & 0 deletions Library/Hylo/Core/Numbers/Integers/Int.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ public conformance Int: BinaryInteger {

public conformance Int: FixedWidthInteger {

public fun matches(_ mask: Self) -> Bool {
(self & mask) == mask
}

public fun adding_reporting_overflow(_ other: Self) -> {partial_value: Self, overflow: Bool} {
let r = Builtin.sadd_with_overflow_word(value, other.value)
return (partial_value: Int(value: r.0), overflow: Bool(value: r.1))
Expand Down
4 changes: 4 additions & 0 deletions Library/Hylo/Core/Numbers/Integers/UInt.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ public conformance UInt: BinaryInteger {

public conformance UInt: FixedWidthInteger {

public fun matches(_ mask: Self) -> Bool {
(self & mask) == mask
}

public fun adding_reporting_overflow(_ other: Self) -> {partial_value: Self, overflow: Bool} {
let r = Builtin.uadd_with_overflow_word(value, other.value)
return (partial_value: UInt(value: r.0), overflow: Bool(value: r.1))
Expand Down
3 changes: 3 additions & 0 deletions Library/Hylo/Core/Operators.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,11 @@ public operator infix/= : assignment
public operator infix%= : assignment
public operator infix+= : assignment
public operator infix-= : assignment
public operator infix^= : assignment
public operator infix&= : assignment
public operator infix&&= : assignment
public operator infix|= : assignment
public operator infix||= : assignment

public operator infix** : exponentiation

Expand Down
12 changes: 12 additions & 0 deletions Library/Hylo/Core/Pointer.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ public type Pointer<Pointee>: Regular {
yield base as* (remote let Pointee)
}

/// Creates an instance with the same memory representation as `address`.
public init(bit_pattern address: UInt) {
&self.base = Builtin.inttoptr_word(address.value)
}

/// Creates an instance representing the same address as `p`.
public init(_ p: PointerToMutable<Pointee>) {
&base = p.base
Expand All @@ -27,6 +32,13 @@ public type Pointer<Pointee>: Regular {
&base = p.base
}

/// Returns `self` offset forward by `n` array elements of `Pointee` type.
public fun advance(by n: Int) -> Self {
let offset_in_bytes = MemoryLayout<Pointee>.stride() * n
return Pointer<Pointee>.new(
base: Builtin.advanced_by_bytes_word(base, offset_in_bytes.value))
}

/// Creates an instance that does not address any usable storage.
public static fun null() -> Self {
.new(base: Builtin.zeroinitializer_ptr())
Expand Down
6 changes: 6 additions & 0 deletions Library/Hylo/Core/PointerToMutable.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ public extension PointerToMutable where Pointee: Movable {

}

/// The address of `x`.
public subscript mutable_pointer<T>(to x: inout T): PointerToMutable<T> {
let { yield PointerToMutable(base: Builtin.address(of: x)) }
}


/// Initializes `x` to `y`.
///
/// - Note: This function is a workaround for the lack of `set` bindings (see #925).
Expand Down
25 changes: 25 additions & 0 deletions Library/Hylo/LibC.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,28 @@ public fun fdopen(_ descriptor: Int, _ mode: CVoidPointer) -> CVoidPointer
/// returning the number of elements written.
@ffi("fwrite")
public fun fwrite(_ data: CVoidPointer, _ size: Int, _ count: Int, _ stream: CVoidPointer) -> Int

/// Copies `count` elements from the object pointed to by `source` to the object pointed to by
/// `destination` and returns `destination`.
///
/// Both objects are reinterpreted as buffers of `Int8`.
///
/// If the objects overlap, the behavior is undefined. If either `source` or `destination` is
/// invalid or null, the behavior is undefined.
@ffi("memcpy")
public fun memcpy(
_ destination: CVoidPointer, _ source: CVoidPointer, _ count: Int
) -> CVoidPointer

/// Copies `count` elements from the object pointed to by `source` to the object pointed to by
/// `destination` and returns `destination`.
///
/// Both objects are reinterpreted as buffers of `Int8`.
///
/// Objects may overlap: copying takes place as if the elements from `source` were copied to a
/// temporary buffer and then copied to `destination`. If either `source` or `destination` is
/// invalid or null, the behavior is undefined.
@ffi("memmove")
public fun memmove(
_ destination: CVoidPointer, _ source: CVoidPointer, _ count: Int
) -> CVoidPointer
211 changes: 211 additions & 0 deletions Library/Hylo/UTF8Array.hylo
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
/// A collection of UTF-8 code units.
public type UTF8Array {

// TODO: Remove when `UInt64` is implemented
typealias UInt64 = UInt

/// The units in the collection.
///
/// The two highest bits of `units`, b63 and b62, encode the representation discriminator:
///
/// ┌──────────────────────╥─────┬─────┐
/// │ Form ║ b63 │ b62 │
/// ├──────────────────────╫─────┼─────┤
/// │ inline, owned ║ 0 │ 0 │
/// │ out-of-line, owned ║ 1 │ 0 │
/// │ out-of-line, unowned ║ 1 │ 1 │
/// └──────────────────────╨─────┴─────┘
///
/// b63 indicates whether the payload of the view is stored out-of-line. If it is, `units` with
/// b63 and b62 unset stores a pointer to the out-of-line payload, which is a buffer storing an
/// `Int`, which is the number of units in the view, followed by a contiguous array of bytes,
/// with contains the units themselves, and finally a null terminator.
///
/// If the payload is inline, the number of units in the view is stored in the 6 lowest bits of
/// `units`'s most significant byte and the units themselves are stored in the following bytes.
/// For example, the inline UTF-8 view of "Salut" is as follows:
///
/// most significant byte
/// ↓
/// ┌────┬────┬────┬────┬────┬────┬────┬────┐
/// | 05 | 53 | 61 | 6C | 75 | 74 | 00 | 00 |
/// └────┴────┴────┴────┴────┴────┴────┴────┘
///
/// b62 indicates if the view owns its storage and is responsible for its deallocation if it is
/// out-of-line. Unowned, out-of-line storage typically correspond to static allocations.
let units: UInt64

/// Creates an instance with given representation.
memberwise init

}

/// A collection of UTF-8 code units.
public extension UTF8Array {

/// Creates a view taking ownership of the out-of-line payload referred by `p`.
init(taking_ownership_of p: MemoryAddress) {
var u = UInt64(truncating_or_extending: UInt(bit_pattern: p))
&u |= (0b10 as UInt64) << 62
&self = .new(units: u)
}

/// Creates an empty view.
public init() {
&self = .new(units: 0)
}

/// Projects the units in `self` as a null-terminated buffer.
///
/// Use this method to read the contents of the view as a C-style null-terminated string. The
/// returned buffer has a size `count() + 1`. It is alive only for the duration of the projection
/// and shall not be mutated.
public property nullterminated: Pointer<Int8> {
let {
if is_inline() {
var storage: UInt = 0
let buffer = PointerToMutable<Int8>(type_punning: mutable_pointer[to: &storage])

// Note: The copy could be optimized away if we stored the units in memory the same way
// they would be stored in an array, i.e., in reverse order on big-endian machines.
var i = 0
while i < 7 {
let s = 8 * (6 - i)
let v = Int8(truncating_or_extending: units >> s)
buffer.unsafe_initialize_pointee(v)
&i += 1
}

yield Pointer<Int8>(buffer)
} else {
yield unsafe_heap_payload.0
}
}
}

/// Returns `true` if the payload of the `self` is stored inline.
fun is_inline() -> Bool {
// Note: the flag is stored inversed so that `0` is an empty string.
(units & ((1 as UInt64) << 63)) == 0
}

/// Returns `true` if `self` owns its payload.
fun is_owned() -> Bool {
(units & ((1 as UInt64) << 62)) == 0
}

/// Projects the address and size of `self`'s payload, assuming it is allocated out-of-line.
///
/// - Requires: `!is_inline()`.
property unsafe_heap_payload: {start: Pointer<Int8>, count: Int} {
let {
// TODO: uncomment when #1046 is implemented
// assert(!is_inline())
let buffer = Pointer<Int>(
bit_pattern: UInt(truncating_or_extending: units & ~((0xff as UInt64) << 56)))
yield (
start: Pointer<Int8>(type_punning: buffer.advance(by: 1)),
count: buffer.unsafe[].copy())
}
}

}

public conformance UTF8Array: Deinitializable {

public fun deinit() sink {
if !is_inline() {
PointerToMutable(adding_mutation_to: unsafe_heap_payload.0).deallocate()
}
}

}

public conformance UTF8Array: Copyable {

public fun copy() -> Self {
if is_inline() || !is_owned() {
return .new(units: units.copy())
} else {
let payload = unsafe_heap_payload
let payload_size = MemoryLayout<Int>.stride() + payload.1 + 1
let payload_clone = MemoryAddress.allocate_bytes(
count: payload_size,
aligned_at: MemoryLayout<Int>.alignment())

// Note: copy the entire payload at once.
let d = CVoidPointer(base: payload_clone.base)
let s = CVoidPointer(base: payload.0.copy().base)
_ = memmove(d, s, payload_size)

return .new(taking_ownership_of: payload_clone)
}
}

}

public conformance UTF8Array: Equatable {

public fun infix== (_ other: Self) -> Bool {
// If both LHS and RHS are stored inline, their representation are bitwise equal.
if self.is_inline() && other.is_inline() {
return self.units == other.units
}

// LHS and RHS are equal if they point to the same buffer.
if !self.is_inline() && !other.is_inline() {
return self.unsafe_heap_payload.0 == other.unsafe_heap_payload.0
}

// LHS and RHS are equal if they contain the same elements in the same order.
// TODO: Rewrite as `self.elements_equal(other)`.
if self.count() != other.count() { return false }
var i = 0
while i < self.count() {
if self[i] != other[i] { return false }
&i += 1
}
return true
}

}

// public conformance UTF8Array: Collection {
public extension UTF8Array {

/// An index in an UTF8Array.
public typealias Index = Int

/// A single UTF-8 code unit.
public typealias Element = Int

public fun start_index() -> Int {
0
}

/// Returns the number of elements in `self`.
public fun count() -> Int {
if is_inline() {
Int(truncating_or_extending: units >> 56)
} else {
unsafe_heap_payload.1.copy()
}
}

/// Accesses the unit at `position` in `self`.
public subscript(_ position: Int): Int8 {
yield 0
if is_inline() {
// TODO: uncomment when #1046 is implemented
// precondition((0 <= position) && (position < Int(units >> 56)))
let s = 8 * (6 - position)
yield Int8(truncating_or_extending: units >> s)
} else {
let p = unsafe_heap_payload
// TODO: uncomment when #1046 is implemented
// precondition((0 <= position) && (position < p.1))
yield p.0.advance(by: position).unsafe[]
}
}

}
Loading