Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor entrypoints #175

Merged
merged 19 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ docs/
.tox/
.scripts/
.tests/
.venv/
minikube/
1 change: 1 addition & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
fetch-depth: 0
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh

- name: "Set up Python"
run: uv python install
- run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,5 @@ cov.xml
.DS_Store

data/

minikube/
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.9
3.10
22 changes: 22 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Python 3.8 Image without Dependecies
FROM ubuntu:24.04

LABEL maintainer="vijay.vammi@astrazeneca.com"

RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
curl \
&& rm -rf /var/lib/apt/lists/*

ADD https://astral.sh/uv/0.5.12/install.sh /uv-installer.sh
RUN sh /uv-installer.sh && rm /uv-installer.sh
ENV PATH="/root/.local/bin/:$PATH"

COPY . /app
WORKDIR /app

RUN uv python install && \
uv sync --index https://artifactory.astrazeneca.net/api/pypi/pypi-virtual/simple/ --frozen --all-extras

ENV PATH="/app/.venv/bin:$PATH"
File renamed without changes.
1 change: 0 additions & 1 deletion examples/01-tasks/python_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def main():
pipeline = Pipeline(steps=[hello_task])

pipeline.execute()

return pipeline


Expand Down
23 changes: 23 additions & 0 deletions examples/11-jobs/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from examples.common.functions import write_files
from runnable import Catalog, Job, PythonTask

print("Running catalog.py")


def main():
write_catalog = Catalog(put=["df.csv", "data_folder/data.txt"])
generate_data = PythonTask(
name="generate_data",
function=write_files,
catalog=write_catalog,
)

job = Job(name="catalog", task=generate_data)

_ = job.execute()

return job


if __name__ == "__main__":
main()
5 changes: 5 additions & 0 deletions examples/11-jobs/catalog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
type: python
command: examples.common.functions.write_files
catalog:
- "*"
- data_folder/data.txt
41 changes: 41 additions & 0 deletions examples/11-jobs/k8s-job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
job-executor:
type: "k8s-job"
config:
jobSpec:
# activeDeadlineSeconds: Optional[int]
# selector: Optional[LabelSelector]
# ttlSecondsAfterFinished: Optional[int]
template:
# metadata:
# annotations: Optional[Dict[str, str]]
# generate_name: Optional[str] = run_id
# namespace: Optional[str] = "default"
spec:
# activeDeadlineSeconds: Optional[int]
# nodeSelector: Optional[Dict[str, str]]
# tolerations: Optional[List[Toleration]]
# volumes:
# - name: str
# hostPath:
# path: str
# serviceAccountName: Optional[str]
# restartPolicy: Optional[str] = Choose from [Always, OnFailure, Never]
container:
# command: List[str]
# env:
# - name: str
# value: str
image: runnable-m1
# imagePullPolicy: Optional[str] = choose from [Always, Never, IfNotPresent]
# resources:
# limits:
# cpu: str
# memory: str
# gpu: str
# requests:
# cpu: str
# memory: str
# gpu: str
# volumeMounts:
# - name: str
# mountPath: str
4 changes: 4 additions & 0 deletions examples/11-jobs/local-container.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
job-executor:
type: "local-container" # (1)
config:
docker_image: runnable-m1:latest # (2)
3 changes: 3 additions & 0 deletions examples/11-jobs/notebook.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# name: notebook job
type: notebook
command: examples/common/simple_notebook.ipynb # The path is relative to the root of the project.
47 changes: 47 additions & 0 deletions examples/11-jobs/passing_parameters_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
The below example shows how to set/get parameters in python
tasks of the pipeline.

The function, set_parameter, returns
- JSON serializable types
- pydantic models
- pandas dataframe, any "object" type

pydantic models are implicitly handled by runnable
but "object" types should be marked as "pickled".

Use pickled even for python data types is advised for
reasonably large collections.

Run the below example as:
python examples/03-parameters/passing_parameters_python.py

"""

from examples.common.functions import write_parameter
from runnable import Job, PythonTask, metric, pickled


def main():
write_parameters = PythonTask(
function=write_parameter,
returns=[
pickled("df"),
"integer",
"floater",
"stringer",
"pydantic_param",
metric("score"),
],
name="set_parameter",
)

job = Job(name="set_parameters", task=write_parameters)

job.execute()

return job


if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions examples/11-jobs/python_parameters.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
type: python
command: examples.common.functions.write_parameter
returns:
- name: df
kind: object
- name: integer
- name: floater
- name: stringer
- name: pydantic_param
- name: score
kind: metric
44 changes: 44 additions & 0 deletions examples/11-jobs/python_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
You can execute this pipeline by:

python examples/01-tasks/python_tasks.py

The stdout of "Hello World!" would be captured as execution
log and stored in the catalog.

An example of the catalog structure:

.catalog
└── baked-heyrovsky-0602
└── hello.execution.log

2 directories, 1 file


The hello.execution.log has the captured stdout of "Hello World!".
"""

from examples.common.functions import hello
from runnable import Job, PythonTask


def main():
# Create a tasks which calls the function "hello"
# If this step executes successfully,
# the pipeline will terminate with success
hello_task = PythonTask(
name="hello",
function=hello,
terminate_with_success=True,
)

# The pipeline has only one step.
job = Job(name="hello", task=hello_task)

job.execute()

return job


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions examples/11-jobs/python_tasks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
type: python
command: examples.common.functions.hello # dotted path to the function.
20 changes: 20 additions & 0 deletions examples/11-jobs/scripts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
dag:
description: |
This is a sample pipeline with one step that
executes a shell command.

You can run this pipeline by:
runnable execute -f examples/01-tasks/scripts.yaml

For example:
.catalog
└── seasoned-perlman-1355
└── hello.execution.log

start_at: shell
steps:
shell:
type: task
command_type: shell
command: echo "hello world!!" # The path is relative to the root of the project.
next: success
14 changes: 7 additions & 7 deletions examples/common/simple_notebook_out.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "bd34d156",
"id": "215adf58",
"metadata": {
"ploomber": {
"timestamp_end": 1714453073.951735,
"timestamp_start": 1714453073.951505
"timestamp_end": 1735514269.76332,
"timestamp_start": 1735514269.76314
},
"tags": [
"injected-parameters"
Expand All @@ -24,8 +24,8 @@
"id": "3e98e89e-765c-42d4-81ea-c371c2eab14d",
"metadata": {
"ploomber": {
"timestamp_end": 1714453073.951955,
"timestamp_start": 1714453073.95176
"timestamp_end": 1735514269.763565,
"timestamp_start": 1735514269.763376
}
},
"outputs": [],
Expand All @@ -40,8 +40,8 @@
"id": "8eac7a3f",
"metadata": {
"ploomber": {
"timestamp_end": 1714453073.952089,
"timestamp_start": 1714453073.951969
"timestamp_end": 1735514269.763689,
"timestamp_start": 1735514269.763579
}
},
"outputs": [
Expand Down
12 changes: 6 additions & 6 deletions examples/configs/argo-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
executor:
pipeline-executor:
type: "argo" # (1)
config:
image: harbor.csis.astrazeneca.net/mlops/runnable:latest # (2)
Expand All @@ -7,12 +7,12 @@ executor:
- name: magnus-volume
mount_path: /mnt

run_log_store: # (4)
run-log-store: # (4)
type: chunked-fs
config:
log_folder: /mnt/run_log_store
# config:
# log_folder: /mnt/run_log_store

catalog:
type: file-system
config:
catalog_location: /mnt/catalog
# config:
# catalog_location: /mnt/catalog
2 changes: 1 addition & 1 deletion examples/configs/chunked-fs-run_log.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
run_log_store:
run-log-store:
type: chunked-fs
4 changes: 2 additions & 2 deletions examples/configs/default.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
pipeline-executor:
type: local # (1)

run_log_store:
run-log-store:
type: buffered # (2)

catalog:
Expand Down
6 changes: 3 additions & 3 deletions examples/configs/local-container.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
pipeline-executor:
type: "local-container" # (1)
config:
docker_image: runnable:latest # (2)
docker_image: runnable-m1:latest # (2)

run_log_store: # (4)
run-log-store: # (4)
type: chunked-fs
4 changes: 2 additions & 2 deletions examples/configs/mocked-config-debug.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
catalog:
type: file-system # (1)

run_log_store:
run-log-store:
type: file-system # (1)

executor:
pipeline-executor:
type: mocked
config:
patches:
Expand Down
4 changes: 2 additions & 2 deletions examples/configs/mocked-config-simple.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
catalog:
type: file-system # (1)

run_log_store:
run-log-store:
type: file-system # (1)

executor:
pipeline-executor:
type: mocked
4 changes: 2 additions & 2 deletions examples/configs/mocked-config-unittest.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
catalog:
type: file-system # (1)

run_log_store:
run-log-store:
type: file-system # (1)

executor:
pipeline-executor:
type: mocked
config:
patches:
Expand Down
4 changes: 2 additions & 2 deletions examples/configs/mocked-config.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
catalog:
type: file-system # (1)

run_log_store:
run-log-store:
type: file-system # (1)

executor:
pipeline-executor:
type: mocked
config:
patches:
Expand Down
Loading
Loading