Skip to content

Commit

Permalink
Standardize on start/end, not offset/length for range requests (#71)
Browse files Browse the repository at this point in the history
* Standardize on start/end, not offset/length for range requests

* update changelog

* bump to 0.3.0-beta.2
  • Loading branch information
kylebarron authored Nov 1, 2024
1 parent ed72fb6 commit 034f172
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 35 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# Changelog

## [0.3.0] -

### Breaking changes

- `get_range`, `get_range_async`, `get_ranges`, and `get_ranges_async` now use **start/end** instead of **offset/length**. This is for consistency with the `range` option of `obstore.get`.

## [0.2.0] - 2024-10-25

## What's Changed
### What's Changed

- Streaming list results. `list` now returns an async or sync generator. by @kylebarron in https://github.com/developmentseed/obstore/pull/35
- Optionally return list result as arrow. The `return_arrow` keyword argument returns chunks from `list` as Arrow RecordBatches, which is faster than materializing Python dicts/lists. by @kylebarron in https://github.com/developmentseed/obstore/pull/38
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion obstore/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "obstore"
version = "0.3.0-beta.1"
version = "0.3.0-beta.2"
authors = { workspace = true }
edition = { workspace = true }
description = "A Python interface to the Rust object_store crate, providing a uniform API for interacting with object storage services and local files."
Expand Down
16 changes: 8 additions & 8 deletions obstore/python/obstore/_get.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ async def get_async(
Refer to the documentation for [get][obstore.get].
"""

def get_range(store: ObjectStore, path: str, offset: int, length: int) -> Buffer:
def get_range(store: ObjectStore, path: str, start: int, end: int) -> Buffer:
"""
Return the bytes that are stored at the specified location in the given byte range.
Expand All @@ -247,24 +247,24 @@ def get_range(store: ObjectStore, path: str, offset: int, length: int) -> Buffer
Args:
store: The ObjectStore instance to use.
path: The path within ObjectStore to retrieve.
offset: The start of the byte range.
length: The number of bytes.
start: The start of the byte range.
end: The end of the byte range (exclusive).
Returns:
A `Buffer` object implementing the Python buffer protocol, allowing
zero-copy access to the underlying memory provided by Rust.
"""

async def get_range_async(
store: ObjectStore, path: str, offset: int, length: int
store: ObjectStore, path: str, start: int, end: int
) -> Buffer:
"""Call `get_range` asynchronously.
Refer to the documentation for [get_range][obstore.get_range].
"""

def get_ranges(
store: ObjectStore, path: str, offsets: Sequence[int], lengths: Sequence[int]
store: ObjectStore, path: str, starts: Sequence[int], ends: Sequence[int]
) -> List[Buffer]:
"""
Return the bytes that are stored at the specified location in the given byte ranges
Expand All @@ -277,8 +277,8 @@ def get_ranges(
Args:
store: The ObjectStore instance to use.
path: The path within ObjectStore to retrieve.
offsets: A sequence of `int` where each offset starts.
lengths: A sequence of `int` representing the number of bytes within each range.
starts: A sequence of `int` where each offset starts.
ends: A sequence of `int` where each offset ends (exclusive).
Returns:
A sequence of `Buffer`, one for each range. This `Buffer` object implements the
Expand All @@ -287,7 +287,7 @@ def get_ranges(
"""

async def get_ranges_async(
store: ObjectStore, path: str, offsets: Sequence[int], lengths: Sequence[int]
store: ObjectStore, path: str, starts: Sequence[int], ends: Sequence[int]
) -> List[Buffer]:
"""Call `get_ranges` asynchronously.
Expand Down
34 changes: 16 additions & 18 deletions obstore/src/get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,13 +359,12 @@ pub(crate) fn get_range(
py: Python,
store: PyObjectStore,
path: String,
offset: usize,
length: usize,
start: usize,
end: usize,
) -> PyObjectStoreResult<PyArrowBuffer> {
let runtime = get_runtime(py)?;
let range = offset..offset + length;
py.allow_threads(|| {
let out = runtime.block_on(store.as_ref().get_range(&path.into(), range))?;
let out = runtime.block_on(store.as_ref().get_range(&path.into(), start..end))?;
Ok::<_, PyObjectStoreError>(PyArrowBuffer::new(Buffer::from_bytes(out.into())))
})
}
Expand All @@ -375,14 +374,13 @@ pub(crate) fn get_range_async(
py: Python,
store: PyObjectStore,
path: String,
offset: usize,
length: usize,
start: usize,
end: usize,
) -> PyResult<Bound<PyAny>> {
let range = offset..offset + length;
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let out = store
.as_ref()
.get_range(&path.into(), range)
.get_range(&path.into(), start..end)
.await
.map_err(PyObjectStoreError::ObjectStoreError)?;
Ok(PyArrowBuffer::new(Buffer::from_bytes(out.into())))
Expand All @@ -394,14 +392,14 @@ pub(crate) fn get_ranges(
py: Python,
store: PyObjectStore,
path: String,
offsets: Vec<usize>,
lengths: Vec<usize>,
starts: Vec<usize>,
ends: Vec<usize>,
) -> PyObjectStoreResult<Vec<PyArrowBuffer>> {
let runtime = get_runtime(py)?;
let ranges = offsets
let ranges = starts
.into_iter()
.zip(lengths)
.map(|(offset, length)| offset..offset + length)
.zip(ends)
.map(|(start, end)| start..end)
.collect::<Vec<_>>();
py.allow_threads(|| {
let out = runtime.block_on(store.as_ref().get_ranges(&path.into(), &ranges))?;
Expand All @@ -418,13 +416,13 @@ pub(crate) fn get_ranges_async(
py: Python,
store: PyObjectStore,
path: String,
offsets: Vec<usize>,
lengths: Vec<usize>,
starts: Vec<usize>,
ends: Vec<usize>,
) -> PyResult<Bound<PyAny>> {
let ranges = offsets
let ranges = starts
.into_iter()
.zip(lengths)
.map(|(offset, length)| offset..offset + length)
.zip(ends)
.map(|(start, end)| start..end)
.collect::<Vec<_>>();
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let out = store
Expand Down
12 changes: 6 additions & 6 deletions tests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_get_range():
path = "big-data.txt"

obs.put(store, path, data)
buffer = obs.get_range(store, path, 5, 10)
buffer = obs.get_range(store, path, 5, 15)
view = memoryview(buffer)
assert view == data[5:15]

Expand All @@ -116,9 +116,9 @@ def test_get_ranges():
path = "big-data.txt"

obs.put(store, path, data)
offsets = [5, 10, 15, 20]
lengths = [10, 10, 10, 10]
buffers = obs.get_ranges(store, path, offsets, lengths)
starts = [5, 10, 15, 20]
ends = [15, 20, 25, 30]
buffers = obs.get_ranges(store, path, starts, ends)

for offset, length, buffer in zip(offsets, lengths, buffers):
assert memoryview(buffer) == data[offset : offset + length]
for start, end, buffer in zip(starts, ends, buffers):
assert memoryview(buffer) == data[start:end]

0 comments on commit 034f172

Please sign in to comment.