diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml deleted file mode 100644 index a52ff5b..0000000 --- a/.github/workflows/deploy.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: Release - -permissions: - contents: write - -on: - push: - tags: - - v[0-9]+.* - -jobs: - create-release: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: taiki-e/create-gh-release-action@v1 - with: - # (required) GitHub token for creating GitHub Releases. - token: ${{ secrets.GITHUB_TOKEN }} - - upload-assets: - needs: create-release - strategy: - matrix: - include: - - name: aarch64-latest - target: aarch64-unknown-linux-gnu - os: ubuntu-latest - - name: x86_64-latest - target: x86_64-unknown-linux-gnu - os: ubuntu-latest - - name: aarch64-20.04 - target: aarch64-unknown-linux-gnu - os: ubuntu-20.04 - - name: x86_64-20.04 - target: x86_64-unknown-linux-gnu - os: ubuntu-20.04 - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - - uses: taiki-e/upload-rust-binary-action@v1 - with: - # (required) Comma-separated list of binary names (non-extension portion of filename) to build and upload. - # Note that glob pattern is not supported yet. - bin: daemon,parquet_file_service - # (optional) On which platform to distribute the `.tar.gz` file. - # [default value: unix] - # [possible values: all, unix, windows, none] - tar: unix - # (optional) On which platform to distribute the `.zip` file. - # [default value: windows] - # [possible values: all, unix, windows, none] - zip: windows - archive: app-$target-${{ matrix.name }} - # (required) GitHub token for uploading assets to GitHub Releases. - token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/install_duckdb.yml b/.github/workflows/install_duckdb.yml new file mode 100644 index 0000000..d835f0e --- /dev/null +++ b/.github/workflows/install_duckdb.yml @@ -0,0 +1,27 @@ +name: Duckdb_installer + +on: + # Defining workflow_call means that this workflow can be called from + # your main workflow job + workflow_call: + +jobs: + duckdb: + runs-on: ubuntu-latest + # This is optional; it exposes the plan to your job as an environment variable + env: + DUCKDB_LIB_DIR: ${{ inputs.plan }} + steps: + - name: Install duckdb + shell: bash + run: | + if ! command -v unzip &> /dev/null; then + apt-get update && apt-get install -y unzip + fi + + wget "https://github.com/duckdb/duckdb/releases/download/v1.0.0/libduckdb-linux-amd64.zip" + mkdir duckdb_lib + unzip libduckdb-linux-amd64.zip -d duckdb_lib + echo "DUCKDB_LIB_DIR=$(pwd)/duckdb_lib" >> $GITHUB_ENV + echo $DUCKDB_LIB_DIR + ls duckdb_lib \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..a41298a --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,281 @@ +# Copyright 2022-2024, axodotdev +# SPDX-License-Identifier: MIT or Apache-2.0 +# +# CI that: +# +# * checks for a Git Tag that looks like a release +# * builds artifacts with cargo-dist (archives, installers, hashes) +# * uploads those artifacts to temporary workflow zip +# * on success, uploads the artifacts to a GitHub Release +# +# Note that the GitHub Release will be created with a generated +# title/body based on your changelogs. + +name: Release +permissions: + "contents": "write" + +# This task will run whenever you push a git tag that looks like a version +# like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc. +# Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where +# PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION +# must be a Cargo-style SemVer Version (must have at least major.minor.patch). +# +# If PACKAGE_NAME is specified, then the announcement will be for that +# package (erroring out if it doesn't have the given version or isn't cargo-dist-able). +# +# If PACKAGE_NAME isn't specified, then the announcement will be for all +# (cargo-dist-able) packages in the workspace with that version (this mode is +# intended for workspaces with only one dist-able package, or with all dist-able +# packages versioned/released in lockstep). +# +# If you push multiple tags at once, separate instances of this workflow will +# spin up, creating an independent announcement for each one. However, GitHub +# will hard limit this to 3 tags per commit, as it will assume more tags is a +# mistake. +# +# If there's a prerelease-style suffix to the version, then the release(s) +# will be marked as a prerelease. +on: + pull_request: + push: + tags: + - '**[0-9]+.[0-9]+.[0-9]+*' + +jobs: + # Run 'cargo dist plan' (or host) to determine what tasks we need to do + plan: + runs-on: "ubuntu-20.04" + outputs: + val: ${{ steps.plan.outputs.manifest }} + tag: ${{ !github.event.pull_request && github.ref_name || '' }} + tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }} + publishing: ${{ !github.event.pull_request }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install cargo-dist + # we specify bash to get pipefail; it guards against the `curl` command + # failing. otherwise `sh` won't catch that `curl` returned non-0 + shell: bash + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.18.0/cargo-dist-installer.sh | sh" + - name: Cache cargo-dist + uses: actions/upload-artifact@v4 + with: + name: cargo-dist-cache + path: ~/.cargo/bin/cargo-dist + # sure would be cool if github gave us proper conditionals... + # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible + # functionality based on whether this is a pull_request, and whether it's from a fork. + # (PRs run on the *source* but secrets are usually on the *target* -- that's *good* + # but also really annoying to build CI around when it needs secrets to work right.) + - id: plan + run: | + cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json + echo "cargo dist ran successfully" + cat plan-dist-manifest.json + echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" + - name: "Upload dist-manifest.json" + uses: actions/upload-artifact@v4 + with: + name: artifacts-plan-dist-manifest + path: plan-dist-manifest.json + + # Build and packages all the platform-specific things + build-local-artifacts: + name: build-local-artifacts (${{ join(matrix.targets, ', ') }}) + # Let the initial task tell us to not run (currently very blunt) + needs: + - plan + if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} + strategy: + fail-fast: false + # Target platforms/runners are computed by cargo-dist in create-release. + # Each member of the matrix has the following arguments: + # + # - runner: the github runner + # - dist-args: cli flags to pass to cargo dist + # - install-dist: expression to run to install cargo-dist on the runner + # + # Typically there will be: + # - 1 "global" task that builds universal installers + # - N "local" tasks that build each platform's binaries and platform-specific installers + matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} + runs-on: ${{ matrix.runner }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json + steps: + - name: enable windows longpaths + run: | + git config --global core.longpaths true + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install cargo-dist + run: ${{ matrix.install_dist }} + # Get the dist-manifest + - name: Fetch local artifacts + uses: actions/download-artifact@v4 + with: + pattern: artifacts-* + path: target/distrib/ + merge-multiple: true + - name: Install dependencies + run: | + ${{ matrix.packages_install }} + - name: Build artifacts + run: | + # Actually do builds and make zips and whatnot + cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json + echo "cargo dist ran successfully" + - id: cargo-dist + name: Post-build + # We force bash here just because github makes it really hard to get values up + # to "real" actions without writing to env-vars, and writing to env-vars has + # inconsistent syntax between shell and powershell. + shell: bash + run: | + # Parse out what we just built and upload it to scratch storage + echo "paths<> "$GITHUB_OUTPUT" + jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" + echo "EOF" >> "$GITHUB_OUTPUT" + + cp dist-manifest.json "$BUILD_MANIFEST_NAME" + - name: "Upload artifacts" + uses: actions/upload-artifact@v4 + with: + name: artifacts-build-local-${{ join(matrix.targets, '_') }} + path: | + ${{ steps.cargo-dist.outputs.paths }} + ${{ env.BUILD_MANIFEST_NAME }} + + # Build and package all the platform-agnostic(ish) things + build-global-artifacts: + needs: + - plan + - build-local-artifacts + runs-on: "ubuntu-20.04" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install cached cargo-dist + uses: actions/download-artifact@v4 + with: + name: cargo-dist-cache + path: ~/.cargo/bin/ + - run: chmod +x ~/.cargo/bin/cargo-dist + # Get all the local artifacts for the global tasks to use (for e.g. checksums) + - name: Fetch local artifacts + uses: actions/download-artifact@v4 + with: + pattern: artifacts-* + path: target/distrib/ + merge-multiple: true + - id: cargo-dist + shell: bash + run: | + cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json + echo "cargo dist ran successfully" + + # Parse out what we just built and upload it to scratch storage + echo "paths<> "$GITHUB_OUTPUT" + jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" + echo "EOF" >> "$GITHUB_OUTPUT" + + cp dist-manifest.json "$BUILD_MANIFEST_NAME" + - name: "Upload artifacts" + uses: actions/upload-artifact@v4 + with: + name: artifacts-build-global + path: | + ${{ steps.cargo-dist.outputs.paths }} + ${{ env.BUILD_MANIFEST_NAME }} + # Determines if we should publish/announce + host: + needs: + - plan + - build-local-artifacts + - build-global-artifacts + # Only run if we're "publishing", and only if local and global didn't fail (skipped is fine) + if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + runs-on: "ubuntu-20.04" + outputs: + val: ${{ steps.host.outputs.manifest }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install cached cargo-dist + uses: actions/download-artifact@v4 + with: + name: cargo-dist-cache + path: ~/.cargo/bin/ + - run: chmod +x ~/.cargo/bin/cargo-dist + # Fetch artifacts from scratch-storage + - name: Fetch artifacts + uses: actions/download-artifact@v4 + with: + pattern: artifacts-* + path: target/distrib/ + merge-multiple: true + - id: host + shell: bash + run: | + cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json + echo "artifacts uploaded and released successfully" + cat dist-manifest.json + echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" + - name: "Upload dist-manifest.json" + uses: actions/upload-artifact@v4 + with: + # Overwrite the previous copy + name: artifacts-dist-manifest + path: dist-manifest.json + # Create a GitHub Release while uploading all files to it + - name: "Download GitHub Artifacts" + uses: actions/download-artifact@v4 + with: + pattern: artifacts-* + path: artifacts + merge-multiple: true + - name: Cleanup + run: | + # Remove the granular manifests + rm -f artifacts/*-dist-manifest.json + - name: Create GitHub Release + env: + PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}" + ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}" + ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}" + RELEASE_COMMIT: "${{ github.sha }}" + run: | + # Write and read notes from a file to avoid quoting breaking things + echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt + + gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/* + + announce: + needs: + - plan + - host + # use "always() && ..." to allow us to wait for all publish jobs while + # still allowing individual publish jobs to skip themselves (for prereleases). + # "host" however must run to completion, no skipping allowed! + if: ${{ always() && needs.host.result == 'success' }} + runs-on: "ubuntu-20.04" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive diff --git a/.gitignore b/.gitignore index e95eba5..8b27412 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,6 @@ stations.xml data/ weather_data/ */.env -.env \ No newline at end of file +.env +duckdb_lib +*/duckdb_lib \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index e967fae..bc6cadf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -583,20 +583,19 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "axum" -version = "0.7.5" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" dependencies = [ "async-trait", "axum-core", "axum-macros", + "bitflags 1.3.2", "bytes", "futures-util", - "http", - "http-body", - "http-body-util", - "hyper", - "hyper-util", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.29", "itoa", "matchit", "memchr", @@ -609,7 +608,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sync_wrapper 1.0.1", + "sync_wrapper 0.1.2", "tokio", "tower", "tower-layer", @@ -619,20 +618,17 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.4.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3" +checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" dependencies = [ "async-trait", "bytes", "futures-util", - "http", - "http-body", - "http-body-util", + "http 0.2.12", + "http-body 0.4.6", "mime", - "pin-project-lite", "rustversion", - "sync_wrapper 0.1.2", "tower-layer", "tower-service", "tracing", @@ -640,9 +636,9 @@ dependencies = [ [[package]] name = "axum-macros" -version = "0.4.1" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00c055ee2d014ae5981ce1016374e8213682aa14d9bf40e48ab48b5f3ef20eaa" +checksum = "cdca6a10ecad987bda04e95606ef85a5417dcaac1a78455242d72e031e2b6b62" dependencies = [ "heck", "proc-macro2", @@ -1382,6 +1378,25 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +[[package]] +name = "h2" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.5" @@ -1393,7 +1408,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.1.0", "indexmap", "slab", "tokio", @@ -1458,6 +1473,17 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "379dada1584ad501b383485dd706b8afb7a70fcbc7f4da7d780638a5a6124a60" +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.1.0" @@ -1469,6 +1495,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.0" @@ -1476,7 +1513,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" dependencies = [ "bytes", - "http", + "http 1.1.0", ] [[package]] @@ -1487,16 +1524,16 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.0", "pin-project-lite", ] [[package]] name = "http-range-header" -version = "0.4.1" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a397c49fec283e3d6211adbe480be95aae5f304cfb923e9970e08956d5168a" +checksum = "add0ab9360ddbd88cfeb3bd9574a1d85cfdfa14db10b3e21d3700dbc4328758f" [[package]] name = "httparse" @@ -1510,6 +1547,29 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "hyper" +version = "0.14.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.4.0" @@ -1519,11 +1579,10 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.5", + "http 1.1.0", + "http-body 1.0.0", "httparse", - "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -1538,8 +1597,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "908bb38696d7a037a01ebcc68a00634112ac2bbf8ca74e30a2c3d2f4f021302b" dependencies = [ "futures-util", - "http", - "hyper", + "http 1.1.0", + "hyper 1.4.0", "hyper-util", "rustls", "rustls-pki-types", @@ -1556,7 +1615,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.4.0", "hyper-util", "native-tls", "tokio", @@ -1573,9 +1632,9 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", - "hyper", + "http 1.1.0", + "http-body 1.0.0", + "hyper 1.4.0", "pin-project-lite", "socket2", "tokio", @@ -1771,6 +1830,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b3f02cecc430f61561bde538d42af4be2d9d5a8b058f74883e460bc1055461" dependencies = [ "autocfg", + "cc", "flate2", "pkg-config", "serde", @@ -1889,15 +1949,16 @@ dependencies = [ [[package]] name = "multer" -version = "3.1.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b" +checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" dependencies = [ "bytes", "encoding_rs", "futures-util", - "http", + "http 0.2.12", "httparse", + "log", "memchr", "mime", "spin", @@ -2215,8 +2276,8 @@ dependencies = [ "config", "duckdb", "futures", - "h2", - "hyper", + "h2 0.3.26", + "hyper 0.14.29", "log", "mime", "num_cpus", @@ -2534,11 +2595,11 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.5", + "http 1.1.0", + "http-body 1.0.0", "http-body-util", - "hyper", + "hyper 1.4.0", "hyper-rustls", "hyper-tls", "hyper-util", @@ -2577,7 +2638,7 @@ checksum = "39346a33ddfe6be00cbc17a34ce996818b97b230b87229f10114693becca1268" dependencies = [ "anyhow", "async-trait", - "http", + "http 1.1.0", "reqwest", "serde", "thiserror", @@ -2595,8 +2656,8 @@ dependencies = [ "chrono", "futures", "getrandom", - "http", - "hyper", + "http 1.1.0", + "hyper 1.4.0", "parking_lot 0.11.2", "reqwest", "reqwest-middleware", @@ -3385,16 +3446,16 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.5.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" +checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140" dependencies = [ "bitflags 2.4.2", "bytes", + "futures-core", "futures-util", - "http", - "http-body", - "http-body-util", + "http 0.2.12", + "http-body 0.4.6", "http-range-header", "httpdate", "mime", diff --git a/Cargo.toml b/Cargo.toml index e37457e..f5ba9a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,16 +5,39 @@ members = ["daemon", "parquet_file_service"] [workspace.dependencies] anyhow = "1.0.75" -parquet = "50.0.0" +parquet = "52.1.0" log = "0.4.18" slog = "2.7.0" slog-term = "2.9.0" slog-async = "2.7.0" clap = { version = "4.3.23", features = ["derive", "env"] } -reqwest = {version = "0.11.22", features=["stream"]} +reqwest = {version = "0.12.5", features=["stream"]} tokio = { version = "1.35.1", features = ["full"] } tokio-util = "0.7.10" time = "0.3.31" openssl = { version = "0.10.60", features = ["vendored"] } config = "0.14.0" -rustix = "0.38.19" \ No newline at end of file +rustix = "0.38.19" + +# Config for 'cargo dist' +[workspace.metadata.dist] +# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) +cargo-dist-version = "0.18.0" +# CI backends to support +ci = "github" +# The installers to generate for each app +installers = ["shell"] +# Target platforms to build apps for (Rust target-triple syntax) +targets = ["x86_64-unknown-linux-gnu"] +# Publish jobs to run in CI +pr-run-mode = "plan" +# Whether to install an updater program +install-updater = true + +[workspace.metadata.dist.dependencies.apt] +musl-tools = '*' + +# The profile that 'cargo dist' will build with +[profile.dist] +inherits = "release" +lto = "thin" diff --git a/daemon/Cargo.toml b/daemon/Cargo.toml index b91fc45..02746b8 100644 --- a/daemon/Cargo.toml +++ b/daemon/Cargo.toml @@ -2,6 +2,7 @@ name = "daemon" version = "0.3.0" edition = "2021" +repository = "https://github.com/tee8z/noaa-data-pipeline" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/daemon/src/domains/forecasts/download_forecast.rs b/daemon/src/domains/forecasts/download_forecast.rs index 3b11253..549a070 100644 --- a/daemon/src/domains/forecasts/download_forecast.rs +++ b/daemon/src/domains/forecasts/download_forecast.rs @@ -765,7 +765,7 @@ impl ForecastService { } } - info!(self.logger, "done waiting for data, contining"); + info!(self.logger, "done waiting for data, continuing"); let mut forecasts = vec![]; for all_forecasts in forecast_data.lock().await.values() { for weather_forecats in all_forecasts { diff --git a/daemon/src/domains/observations/download_observations.rs b/daemon/src/domains/observations/download_observations.rs index d58e692..360ff78 100644 --- a/daemon/src/domains/observations/download_observations.rs +++ b/daemon/src/domains/observations/download_observations.rs @@ -6,7 +6,7 @@ use parquet::{ use parquet_derive::ParquetRecordWriter; use slog::Logger; use std::sync::Arc; -use time::{format_description::well_known::Rfc2822, macros::format_description, OffsetDateTime}; +use time::{format_description::well_known::Rfc3339, macros::format_description, OffsetDateTime}; use crate::{CityWeather, Metar, ObservationData, Units, XmlFetcher}; @@ -31,21 +31,43 @@ impl TryFrom for CurrentWeather { fn try_from(val: Metar) -> Result { Ok(CurrentWeather { station_id: val.station_id.clone(), - latitude: val.latitude.parse::()?, - longitude: val.longitude.parse::()?, - generated_at: OffsetDateTime::parse(&&val.observation_time, &Rfc2822) - .map_err(|e| anyhow!("error parsing observation_time time: {}", e))?, - temperature_value: val.temp_c.parse::().map(Some).unwrap_or(None), + latitude: val.latitude.unwrap_or(String::from("")).parse::()?, + longitude: val.longitude.unwrap_or(String::from("")).parse::()?, + generated_at: OffsetDateTime::parse( + &&val + .observation_time + .clone() + .unwrap_or(OffsetDateTime::now_utc().to_string()), + &Rfc3339, + ) + .map_err(|e| anyhow!("error parsing observation_time time: {} {:?}", e, val.observation_time))?, + temperature_value: val + .temp_c + .unwrap_or(String::from("")) + .parse::() + .map(Some) + .unwrap_or(None), temperature_unit_code: Units::Celcius.to_string(), wind_direction: val .wind_dir_degrees + .unwrap_or(String::from("")) .parse::() .map(Some) .unwrap_or(None), wind_direction_unit_code: Units::DegreesTrue.to_string(), - wind_speed: val.wind_speed_kt.parse::().map(Some).unwrap_or(None), + wind_speed: val + .wind_speed_kt + .unwrap_or(String::from("")) + .parse::() + .map(Some) + .unwrap_or(None), wind_speed_unit_code: Units::Knots.to_string(), - dewpoint_value: val.dewpoint_c.parse::().map(Some).unwrap_or(None), + dewpoint_value: val + .dewpoint_c + .unwrap_or(String::from("")) + .parse::() + .map(Some) + .unwrap_or(None), dewpoint_unit_code: Units::Celcius.to_string(), }) } @@ -136,18 +158,6 @@ pub fn create_observation_schema() -> Type { .build() .unwrap(); - let relative_humidity = Type::primitive_type_builder("relative_humidity", PhysicalType::INT64) - .with_repetition(Repetition::OPTIONAL) - .build() - .unwrap(); - - let relative_humidity_unit_code = - Type::primitive_type_builder("relative_humidity_unit_code", PhysicalType::BYTE_ARRAY) - .with_repetition(Repetition::REQUIRED) - .with_logical_type(Some(LogicalType::String)) - .build() - .unwrap(); - let wind_direction = Type::primitive_type_builder("wind_direction", PhysicalType::INT64) .with_repetition(Repetition::OPTIONAL) .build() @@ -193,8 +203,6 @@ pub fn create_observation_schema() -> Type { Arc::new(generated_at), Arc::new(temperature_value), Arc::new(temperature_unit_code), - Arc::new(relative_humidity), - Arc::new(relative_humidity_unit_code), Arc::new(wind_direction), Arc::new(wind_direction_unit_code), Arc::new(wind_speed), @@ -227,11 +235,22 @@ impl ObservationService { let mut observations = vec![]; for value in converted_xml.data.metar.iter() { + if value.temp_c.is_none() + || value.longitude.is_none() + || value.latitude.is_none() + || value.observation_time.is_none() + { + // skip reading if missing key values + continue; + } let current: CurrentWeather = value.clone().try_into()?; + let mut observation: Observation = current.try_into()?; - let city = city_weather.city_data.get(&observation.station_id).unwrap(); - observation.station_name = city.station_name.clone(); - observations.push(observation) + if let Some(city) = city_weather.city_data.get(&observation.station_id) { + // only add observation if we have a station_name with it + observation.station_name = city.station_name.clone(); + observations.push(observation) + } } Ok(observations) } diff --git a/daemon/src/domains/observations/xml_observation.rs b/daemon/src/domains/observations/xml_observation.rs index 3764ff8..56e7ec1 100644 --- a/daemon/src/domains/observations/xml_observation.rs +++ b/daemon/src/domains/observations/xml_observation.rs @@ -26,17 +26,6 @@ pub struct ObservationData { #[serde(rename = "data")] pub data: CurrentData, - #[serde(rename = "_xmlns:xsd")] - pub xmlns_xsd: String, - - #[serde(rename = "_xmlns:xsi")] - pub xmlns_xsi: String, - - #[serde(rename = "_version")] - pub version: String, - - #[serde(rename = "_xsi:noNamespaceSchemaLocation")] - pub xsi_no_namespace_schema_location: String, } #[derive(Serialize, Deserialize)] @@ -57,43 +46,25 @@ pub struct Metar { pub station_id: String, #[serde(rename = "observation_time")] - pub observation_time: String, + pub observation_time: Option, #[serde(rename = "latitude")] - pub latitude: String, + pub latitude: Option, #[serde(rename = "longitude")] - pub longitude: String, + pub longitude: Option, #[serde(rename = "temp_c")] - pub temp_c: String, + pub temp_c: Option, #[serde(rename = "dewpoint_c")] - pub dewpoint_c: String, + pub dewpoint_c: Option, #[serde(rename = "wind_dir_degrees")] - pub wind_dir_degrees: String, + pub wind_dir_degrees: Option, #[serde(rename = "wind_speed_kt")] - pub wind_speed_kt: String, - - #[serde(rename = "visibility_statute_mi")] - pub visibility_statute_mi: String, - - #[serde(rename = "altim_in_hg")] - pub altim_in_hg: String, - - #[serde(rename = "quality_control_flags")] - pub quality_control_flags: QualityControlFlags, - - #[serde(rename = "sky_condition")] - pub sky_condition: SkyConditionUnion, - - #[serde(rename = "flight_category")] - pub flight_category: String, - - #[serde(rename = "metar_type")] - pub metar_type: String, + pub wind_speed_kt: Option, #[serde(rename = "elevation_m")] pub elevation_m: String, @@ -112,24 +83,7 @@ pub struct QualityControlFlags { #[serde(rename = "auto_station")] pub auto_station: Option, - #[serde(rename = "no_signal")] pub no_signal: Option, } -#[derive(Clone, Serialize, Deserialize)] -pub struct SkyConditionElement { - #[serde(rename = "sky_cover")] - pub sky_cover: String, - - #[serde(rename = "cloud_base_ft_agl")] - pub cloud_base_ft_agl: Option, -} - -#[derive(Clone, Serialize, Deserialize)] -#[serde(untagged)] -pub enum SkyConditionUnion { - SkyConditionElement(SkyConditionElement), - - SkyConditionElementArray(Vec), -} diff --git a/parquet_file_service/Cargo.toml b/parquet_file_service/Cargo.toml index 05588f7..899c21b 100644 --- a/parquet_file_service/Cargo.toml +++ b/parquet_file_service/Cargo.toml @@ -2,6 +2,7 @@ name = "parquet_file_service" version = "0.3.0" edition = "2021" +repository = "https://github.com/tee8z/noaa-data-pipeline" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -12,10 +13,10 @@ time = { version = "0.3.25", features = ["parsing", "formatting"] } uuid = { version = "1.4.1", features = ["v4"] } anyhow = "1.0.72" mime = "0.3.17" -hyper = "1.4.0" -h2 = "0.4.5" -axum = { version = "0.7.5", features = ["macros", "tracing", "multipart"] } -tower-http = { version= "0.5.2", features = ["fs", "cors"] } +hyper = "0.14.27" +h2 = "0.3.24" +axum = { version = "0.6.19", features = ["macros", "tracing", "multipart"] } +tower-http = { version= "0.4.4", features = ["fs", "cors"] } futures = "0.3.28" serde = { version="1.0.188", features= ["derive"]} openssl = { version = "0.10.60", features = ["vendored"] } @@ -29,8 +30,9 @@ clap = { version = "4.3.23", features = ["derive", "env"] } toml = "0.8.10" rustix = "0.38.19" scooby = "0.5.0" -duckdb = { version = "0.10.2" } +duckdb = { version = "0.10.2", features = ["bundled"] } regex = "1.10.3" [dev-dependencies] +duckdb = { version = "0.10.2" } tower = "0.4.13" \ No newline at end of file diff --git a/parquet_file_service/build.sh b/parquet_file_service/build.sh new file mode 100755 index 0000000..40523c7 --- /dev/null +++ b/parquet_file_service/build.sh @@ -0,0 +1,22 @@ +#!/bin/bash +if ! command -v unzip &> /dev/null; then + apt-get update && apt-get install -y unzip +fi +if [ ! -d "duckdb_lib" ]; then + mkdir duckdb_lib +else + rm -rf duckdb_lib + mkdir duckdb_lib + echo "Directory $dir already exists." +fi + +if [ -f "libduckdb-linux-amd64.zip" ]; then + # File exists, remove it + rm "libduckdb-linux-amd64.zip" +fi + +wget "https://github.com/duckdb/duckdb/releases/download/v1.0.0/libduckdb-linux-amd64.zip" +unzip libduckdb-linux-amd64.zip -d duckdb_lib +rm libduckdb-linux-amd64.zip +echo "$(pwd)/duckdb_lib" +DUCKDB_LIB_DIR="$(pwd)/duckdb_lib" cargo build \ No newline at end of file