Skip to content

Commit

Permalink
Merge pull request #94 from ploomber/pipeline
Browse files Browse the repository at this point in the history
Setting up Ploomber pipeline components
  • Loading branch information
lfunderburk authored Aug 24, 2023
2 parents 6e7b0cd + 4f4599e commit 7ec1e26
Show file tree
Hide file tree
Showing 10 changed files with 2,789 additions and 107 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,7 @@ pipeline/products/eda-pipeline.ipynb

!docs/intro-to-dashboards-and-apps/data/database/car_data.*
!pipeline/data/database/car_data.*

# Mini project ploomber output
mini-projects/movie_rec_system/movie_rec_system/products/etl/eda-pipeline.ipynb
mini-projects/movie_rec_system/movie_rec_system/products/etl/extract-pipeline.ipynb
4 changes: 4 additions & 0 deletions mini-projects/movie-rec-system/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.movies_data.duckdb.metadata
movies_data.duckdb
movie_rec_system/products/eda-pipeline.ipynb
movie_rec_system/products/extract-pipeline.ipynb
22 changes: 22 additions & 0 deletions mini-projects/movie-rec-system/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Use the official Python image as the base image
FROM python:3.10

# Set the working directory
WORKDIR /app

# Copy the poetry files
COPY pyproject.toml poetry.lock /app/

# Install poetry
RUN pip install poetry

# Install project dependencies
RUN poetry install

# Copy the rest of the application code
COPY . .

# Expose the port that the app runs on
EXPOSE 8000

RUN poetry run ploomber build
4 changes: 2 additions & 2 deletions mini-projects/movie-rec-system/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ poetry lock
poetry install
```

5. Run the app
5. Run the as a Ploomber pipeline

```
cd mini-projects/
poetry run python movie_rec_system/src/extract.py
poetry run ploomber build
```
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -19,28 +19,59 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<span style=\"None\">Found pyproject.toml from &#x27;C:\\Users\\jpjon\\Documents\\Ploomber\\sql\\mini-projects\\movie-rec-system&#x27;</span>"
],
"text/plain": [
"Found pyproject.toml from 'C:\\Users\\jpjon\\Documents\\Ploomber\\sql\\mini-projects\\movie-rec-system'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%reload_ext sql\n",
"%sql duckdb:///../../movies_data.duckdb"
"%sql duckdb:///movies_data.duckdb"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"TableNotFoundError: There is no table with name 'movies' in the default schema\n",
"If you need help solving this issue, send us a message: https://ploomber.io/community\n"
]
}
],
"source": [
"%sqlcmd columns -t movies"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"TableNotFoundError: There is no table with name 'genres' in the default schema\n",
"If you need help solving this issue, send us a message: https://ploomber.io/community\n"
]
}
],
"source": [
"%sqlcmd columns -t genres"
]
Expand Down Expand Up @@ -278,9 +309,8 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
},
"orig_nbformat": 4
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
Expand Down
Empty file.
9 changes: 9 additions & 0 deletions mini-projects/movie-rec-system/pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
tasks:
- source: movie_rec_system/etl/extract.py
product:
nb: movie_rec_system/products/extract-pipeline.ipynb
data: movies_data.duckdb
- source: movie_rec_system/etl/eda.ipynb
static_analysis: disable
product:
nb: movie_rec_system/products/eda-pipeline.ipynb
Loading

0 comments on commit 7ec1e26

Please sign in to comment.