Skip to content

Commit

Permalink
Merge branch 'main' into extend-read-parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
jitingxu1 authored Sep 20, 2024
2 parents c3fba44 + c62efce commit 8b6b3c6
Show file tree
Hide file tree
Showing 43 changed files with 281 additions and 414 deletions.
103 changes: 103 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# backends
bigquery:
- changed-files:
- any-glob-to-any-file: "ibis/backends/bigquery/**"

clickhouse:
- changed-files:
- any-glob-to-any-file: "ibis/backends/clickhouse/**"

datafusion:
- changed-files:
- any-glob-to-any-file: "ibis/backends/datafusion/**"

druid:
- changed-files:
- any-glob-to-any-file: "ibis/backends/druid/**"

duckdb:
- changed-files:
- any-glob-to-any-file: "ibis/backends/duckdb/**"

exasol:
- changed-files:
- any-glob-to-any-file: "ibis/backends/exasol/**"

flink:
- changed-files:
- any-glob-to-any-file: "ibis/backends/flink/**"

impala:
- changed-files:
- any-glob-to-any-file: "ibis/backends/impala/**"

mssql:
- changed-files:
- any-glob-to-any-file: "ibis/backends/mssql/**"

mysql:
- changed-files:
- any-glob-to-any-file: "ibis/backends/mysql/**"

oracle:
- changed-files:
- any-glob-to-any-file: "ibis/backends/oracle/**"

polars:
- changed-files:
- any-glob-to-any-file: "ibis/backends/polars/**"

postgres:
- changed-files:
- any-glob-to-any-file: "ibis/backends/postgres/**"

pyspark:
- changed-files:
- any-glob-to-any-file: "ibis/backends/pyspark/**"

risingwave:
- changed-files:
- any-glob-to-any-file: "ibis/backends/risingwave/**"

snowflake:
- changed-files:
- any-glob-to-any-file: "ibis/backends/snowflake/**"

sqlite:
- changed-files:
- any-glob-to-any-file: "ibis/backends/sqlite/**"

trino:
- changed-files:
- any-glob-to-any-file: "ibis/backends/trino/**"

# miscellaneous labels
tests:
- changed-files:
- any-glob-to-any-file: "**/tests/**"

nix:
- changed-files:
- any-glob-to-any-file: "**/*.nix"
- any-glob-to-any-file: "poetry.lock"

datatypes:
- changed-files:
- any-glob-to-any-file: "ibis/expr/datatypes/**"

ci:
- changed-files:
- any-glob-to-any-file: ".github/**"

dependencies:
- changed-files:
- any-glob-to-any-file: "**/*.nix"
- any-glob-to-any-file: "poetry.lock"
- any-glob-to-any-file: "flake.lock"
- any-glob-to-any-file: "requirements-dev.txt"
- any-glob-to-any-file: "conda/*.yml"

docs:
- changed-files:
- any-glob-to-any-file: "**/*.qmd"
- any-glob-to-any-file: "**/*.md"
2 changes: 1 addition & 1 deletion .github/workflows/docs-preview.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
path: docs/**/.jupyter_cache

- name: build docs
run: nix develop --ignore-environment --keep HOME -c just docs-build-all
run: nix develop '.#ibis311' --ignore-environment --keep HOME -c just docs-build-all

- name: install netlify cli
run: npm install -g netlify-cli
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/ibis-docs-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ jobs:

- name: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: restore cache of the previously rendered notebooks
uses: actions/cache/restore@v4
Expand All @@ -67,10 +69,10 @@ jobs:
path: docs/**/.jupyter_cache

- name: build api docs
run: nix develop --ignore-environment -c just docs-apigen --verbose
run: nix develop '.#ibis311' --ignore-environment -c just docs-apigen --verbose

- name: build docs
run: nix develop --ignore-environment --keep HOME -c just docs-render
run: nix develop '.#ibis311' --ignore-environment --keep HOME -c just docs-render

- name: cache rendered notebooks
uses: actions/cache/save@v4
Expand All @@ -79,7 +81,7 @@ jobs:
path: docs/**/.jupyter_cache

- name: build jupyterlite
run: nix develop --ignore-environment --keep HOME -c just build-jupyterlite
run: nix develop '.#ibis311' --ignore-environment --keep HOME -c just build-jupyterlite

- name: check that all frozen computations were done before push
run: git diff --exit-code --stat
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/ibis-docs-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,13 @@ jobs:
path: docs/**/.jupyter_cache

- name: generate api docs
run: nix develop --ignore-environment -c just docs-apigen --verbose
run: nix develop '.#ibis311' --ignore-environment -c just docs-apigen --verbose

- name: build docs
run: nix develop --ignore-environment --keep HOME -c just docs-render
run: nix develop '.#ibis311' --ignore-environment --keep HOME -c just docs-render

- name: build jupyterlite
run: nix develop --ignore-environment --keep HOME -c just build-jupyterlite
run: nix develop '.#ibis311' --ignore-environment --keep HOME -c just build-jupyterlite

- name: check that all frozen computations were done before push
run: git diff --exit-code --stat
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: PR Labeler
on:
- pull_request_target

jobs:
labeler:
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@v5
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,7 @@ Ibis is an open source project and welcomes contributions from anyone in the com
Join our community by interacting on GitHub or chatting with us on [Zulip](https://ibis-project.zulipchat.com/).

For more information visit https://ibis-project.org/.

## Governance

The Ibis project is an [independently governed](https://github.com/ibis-project/governance/blob/main/governance.md) open source community project to build and maintain the portable Python dataframe library. Ibis has contributors across a range of data companies and institutions.
5 changes: 1 addition & 4 deletions docs/backends/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,7 @@ def find_member_with_docstring(member):
if base not in resolved_bases:
resolved_bases.append(base)

# Remove `CanCreateSchema` and `CanListSchema` since they are deprecated
# and we don't want to document their existence.
filtered_bases = filter(lambda x: "schema" not in x.name.lower(), resolved_bases)
for base in filtered_bases:
for base in resolved_bases:
try:
parent_member = get_callable(base, member.name)
except KeyError:
Expand Down
8 changes: 7 additions & 1 deletion docs/backends/bigquery.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,13 @@ The simplest way to authenticate with the BigQuery backend is to use [Google's `
Once you have `gcloud` installed, you can authenticate to BigQuery (and other Google Cloud services) by running

```sh
gcloud auth login
gcloud auth login --update-adc
```

You will also likely want to configure a default project:

```sh
gcloud config set core/project <project_id>
```

For any authentication problems, or information on other ways of authenticating,
Expand Down
10 changes: 10 additions & 0 deletions docs/reference/cursed_knowledge.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,13 @@ execution engines.
* Impala's `LTRIM` and `RTRIM` functions accept a _set_ of whitespace (or other)
characters to remove from the left-, and right-hand-side sides of the input
string, but the `TRIM` function only removes _spaces_.

## ClickHouse

* [ClickHouse's random number generating
functions](https://clickhouse.com/docs/en/sql-reference/functions/random-functions)
are considered in [common subexpression
elimination](https://en.wikipedia.org/wiki/Common_subexpression_elimination),
so to get two unique random numbers, users must defeat that optimization.
This is done by passing **any** argument to those functions. It's left as an
exercise for the reader to figure out how to generate two unique inputs.
9 changes: 6 additions & 3 deletions docs/tutorials/browser/repl.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ from urllib.parse import urlencode
lines = """
%pip install numpy pandas tzdata
import pyodide_js, pathlib, js
await pyodide_js.loadPackage("https://storage.googleapis.com/ibis-wasm-wheels/pyarrow-16.0.0.dev2661%2Bg9bddb87fd-cp311-cp311-emscripten_3_1_46_wasm32.whl")
pathlib.Path("penguins.csv").write_text(await (await js.fetch("https://storage.googleapis.com/ibis-tutorial-data/penguins.csv")).text())
del pyodide_js, pathlib, js
wheel_url = "https://storage.googleapis.com/ibis-wasm-wheels/pyarrow-17.0.0-cp311-cp311-emscripten_3_1_46_wasm32.whl"
await pyodide_js.loadPackage(wheel_url)
penguins_csv_url = "https://storage.googleapis.com/ibis-tutorial-data/penguins.csv"
penguins_text = await (await js.fetch(penguins_csv_url)).text()
pathlib.Path("penguins.csv").write_text(penguins_text)
del pyodide_js, pathlib, js, wheel_url, penguins_csv_url, penguins_text
%clear
%pip install 'ibis-framework[duckdb]'
from ibis.interactive import *
Expand Down
38 changes: 0 additions & 38 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,44 +744,6 @@ def drop_database(
"""


# TODO: remove this for 10.0
class CanListSchema:
@util.deprecated(
instead="Use `list_databases` instead`", as_of="9.0", removed_in="10.0"
)
def list_schemas(
self, like: str | None = None, database: str | None = None
) -> list[str]:
return self.list_databases(like=like, catalog=database)

@property
@util.deprecated(
instead="Use `Backend.current_database` instead.",
as_of="9.0",
removed_in="10.0",
)
def current_schema(self) -> str:
return self.current_database


class CanCreateSchema(CanListSchema):
@util.deprecated(
instead="Use `create_database` instead", as_of="9.0", removed_in="10.0"
)
def create_schema(
self, name: str, database: str | None = None, force: bool = False
) -> None:
self.create_database(name=name, catalog=database, force=force)

@util.deprecated(
instead="Use `drop_database` instead", as_of="9.0", removed_in="10.0"
)
def drop_schema(
self, name: str, database: str | None = None, force: bool = False
) -> None:
self.drop_database(name=name, catalog=database, force=force)


class CacheEntry(NamedTuple):
orig_op: ops.Relation
cached_op_ref: weakref.ref[ops.Relation]
Expand Down
Loading

0 comments on commit 8b6b3c6

Please sign in to comment.