AstraZeneca · vijayvammi · Jan 7, 2025 · Dec 27, 2024 · Dec 29, 2024 · Dec 29, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -6,3 +6,5 @@ docs/
 .tox/
 .scripts/
 .tests/
+.venv/
+minikube/
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -21,6 +21,7 @@ jobs:
           fetch-depth: 0
       - name: Install uv
         run: curl -LsSf https://astral.sh/uv/install.sh | sh
+
       - name: "Set up Python"
         run: uv python install
       - run: |

diff --git a/.gitignore b/.gitignore
@@ -155,3 +155,5 @@ cov.xml
 .DS_Store
 
 data/
+
+minikube/
diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.9
+3.10
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,22 @@
+# Python 3.8 Image without Dependecies
+FROM ubuntu:24.04
+
+LABEL maintainer="vijay.vammi@astrazeneca.com"
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    ca-certificates \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+ADD https://astral.sh/uv/0.5.12/install.sh /uv-installer.sh
+RUN sh /uv-installer.sh && rm /uv-installer.sh
+ENV PATH="/root/.local/bin/:$PATH"
+
+COPY . /app
+WORKDIR /app
+
+RUN uv python install && \
+    uv sync --index https://artifactory.astrazeneca.net/api/pypi/pypi-virtual/simple/ --frozen --all-extras
+
+ENV PATH="/app/.venv/bin:$PATH"
diff --git a/extensions/executor/k8s_job/__init__.py → docs/architecture/yaml.md b/extensions/executor/k8s_job/__init__.py → docs/architecture/yaml.md
diff --git a/examples/01-tasks/python_tasks.py b/examples/01-tasks/python_tasks.py
@@ -36,7 +36,6 @@ def main():
     pipeline = Pipeline(steps=[hello_task])
 
     pipeline.execute()
-
     return pipeline
 
 

diff --git a/examples/11-jobs/catalog.py b/examples/11-jobs/catalog.py
@@ -0,0 +1,23 @@
+from examples.common.functions import write_files
+from runnable import Catalog, Job, PythonTask
+
+print("Running catalog.py")
+
+
+def main():
+    write_catalog = Catalog(put=["df.csv", "data_folder/data.txt"])
+    generate_data = PythonTask(
+        name="generate_data",
+        function=write_files,
+        catalog=write_catalog,
+    )
+
+    job = Job(name="catalog", task=generate_data)
+
+    _ = job.execute()
+
+    return job
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/11-jobs/catalog.yaml b/examples/11-jobs/catalog.yaml
@@ -0,0 +1,5 @@
+type: python
+command: examples.common.functions.write_files
+catalog:
+    - "*"
+    - data_folder/data.txt
diff --git a/examples/11-jobs/k8s-job.yaml b/examples/11-jobs/k8s-job.yaml
@@ -0,0 +1,41 @@
+job-executor:
+  type: "k8s-job"
+  config:
+    jobSpec:
+      # activeDeadlineSeconds: Optional[int]
+      # selector: Optional[LabelSelector]
+      # ttlSecondsAfterFinished: Optional[int]
+      template:
+        # metadata:
+          # annotations: Optional[Dict[str, str]]
+          # generate_name: Optional[str] = run_id
+          # namespace: Optional[str] = "default"
+        spec:
+          # activeDeadlineSeconds: Optional[int]
+          # nodeSelector: Optional[Dict[str, str]]
+          # tolerations: Optional[List[Toleration]]
+          # volumes:
+          #   - name: str
+          #     hostPath:
+          #       path: str
+          # serviceAccountName: Optional[str]
+          # restartPolicy: Optional[str] = Choose from [Always, OnFailure, Never]
+          container:
+            # command: List[str]
+            # env:
+            #     - name: str
+            #       value: str
+            image: runnable-m1
+            # imagePullPolicy: Optional[str] = choose from [Always, Never, IfNotPresent]
+            # resources:
+            #   limits:
+            #     cpu: str
+            #     memory: str
+            #     gpu: str
+            #   requests:
+            #     cpu: str
+            #     memory: str
+            #     gpu: str
+            # volumeMounts:
+            #   - name: str
+            #     mountPath: str
diff --git a/examples/11-jobs/local-container.yaml b/examples/11-jobs/local-container.yaml
@@ -0,0 +1,4 @@
+job-executor:
+  type: "local-container" # (1)
+  config:
+    docker_image: runnable-m1:latest # (2)
diff --git a/examples/11-jobs/notebook.yaml b/examples/11-jobs/notebook.yaml
@@ -0,0 +1,3 @@
+# name: notebook job
+type: notebook
+command: examples/common/simple_notebook.ipynb # The path is relative to the root of the project.
diff --git a/examples/11-jobs/passing_parameters_python.py b/examples/11-jobs/passing_parameters_python.py
@@ -0,0 +1,47 @@
+"""
+The below example shows how to set/get parameters in python
+tasks of the pipeline.
+
+The function, set_parameter, returns
+    - JSON serializable types
+    - pydantic models
+    - pandas dataframe, any "object" type
+
+pydantic models are implicitly handled by runnable
+but "object" types should be marked as "pickled".
+
+Use pickled even for python data types is advised for
+reasonably large collections.
+
+Run the below example as:
+    python examples/03-parameters/passing_parameters_python.py
+
+"""
+
+from examples.common.functions import write_parameter
+from runnable import Job, PythonTask, metric, pickled
+
+
+def main():
+    write_parameters = PythonTask(
+        function=write_parameter,
+        returns=[
+            pickled("df"),
+            "integer",
+            "floater",
+            "stringer",
+            "pydantic_param",
+            metric("score"),
+        ],
+        name="set_parameter",
+    )
+
+    job = Job(name="set_parameters", task=write_parameters)
+
+    job.execute()
+
+    return job
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/11-jobs/python_parameters.yaml b/examples/11-jobs/python_parameters.yaml
@@ -0,0 +1,11 @@
+type: python
+command: examples.common.functions.write_parameter
+returns:
+  - name: df
+    kind: object
+  - name: integer
+  - name: floater
+  - name: stringer
+  - name: pydantic_param
+  - name: score
+    kind: metric
diff --git a/examples/11-jobs/python_tasks.py b/examples/11-jobs/python_tasks.py
@@ -0,0 +1,44 @@
+"""
+You can execute this pipeline by:
+
+    python examples/01-tasks/python_tasks.py
+
+The stdout of "Hello World!" would be captured as execution
+log and stored in the catalog.
+
+An example of the catalog structure:
+
+.catalog
+└── baked-heyrovsky-0602
+    └── hello.execution.log
+
+2 directories, 1 file
+
+
+The hello.execution.log has the captured stdout of "Hello World!".
+"""
+
+from examples.common.functions import hello
+from runnable import Job, PythonTask
+
+
+def main():
+    # Create a tasks which calls the function "hello"
+    # If this step executes successfully,
+    # the pipeline will terminate with success
+    hello_task = PythonTask(
+        name="hello",
+        function=hello,
+        terminate_with_success=True,
+    )
+
+    # The pipeline has only one step.
+    job = Job(name="hello", task=hello_task)
+
+    job.execute()
+
+    return job
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/11-jobs/python_tasks.yaml b/examples/11-jobs/python_tasks.yaml
@@ -0,0 +1,2 @@
+type: python
+command: examples.common.functions.hello # dotted path to the function.
diff --git a/examples/11-jobs/scripts.yaml b/examples/11-jobs/scripts.yaml
@@ -0,0 +1,20 @@
+dag:
+  description: |
+    This is a sample pipeline with one step that
+    executes a shell command.
+
+    You can run this pipeline by:
+    runnable execute -f examples/01-tasks/scripts.yaml
+
+    For example:
+    .catalog
+    └── seasoned-perlman-1355
+        └── hello.execution.log
+
+  start_at: shell
+  steps:
+    shell:
+      type: task
+      command_type: shell
+      command: echo "hello world!!" # The path is relative to the root of the project.
+      next: success
diff --git a/examples/common/simple_notebook_out.ipynb b/examples/common/simple_notebook_out.ipynb
@@ -3,11 +3,11 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "bd34d156",
+   "id": "215adf58",
    "metadata": {
     "ploomber": {
-     "timestamp_end": 1714453073.951735,
-     "timestamp_start": 1714453073.951505
+     "timestamp_end": 1735514269.76332,
+     "timestamp_start": 1735514269.76314
     },
     "tags": [
      "injected-parameters"
@@ -24,8 +24,8 @@
    "id": "3e98e89e-765c-42d4-81ea-c371c2eab14d",
    "metadata": {
     "ploomber": {
-     "timestamp_end": 1714453073.951955,
-     "timestamp_start": 1714453073.95176
+     "timestamp_end": 1735514269.763565,
+     "timestamp_start": 1735514269.763376
     }
    },
    "outputs": [],
@@ -40,8 +40,8 @@
    "id": "8eac7a3f",
    "metadata": {
     "ploomber": {
-     "timestamp_end": 1714453073.952089,
-     "timestamp_start": 1714453073.951969
+     "timestamp_end": 1735514269.763689,
+     "timestamp_start": 1735514269.763579
     }
    },
    "outputs": [

diff --git a/examples/configs/argo-config.yaml b/examples/configs/argo-config.yaml
@@ -1,4 +1,4 @@
-executor:
+pipeline-executor:
   type: "argo" # (1)
   config:
     image: harbor.csis.astrazeneca.net/mlops/runnable:latest # (2)
@@ -7,12 +7,12 @@ executor:
       - name: magnus-volume
         mount_path: /mnt
 
-run_log_store: # (4)
+run-log-store: # (4)
   type: chunked-fs
-  config:
-    log_folder: /mnt/run_log_store
+  # config:
+  #   log_folder: /mnt/run_log_store
 
 catalog:
   type: file-system
-  config:
-   catalog_location: /mnt/catalog
+  # config:
+  #  catalog_location: /mnt/catalog
diff --git a/examples/configs/chunked-fs-run_log.yaml b/examples/configs/chunked-fs-run_log.yaml
@@ -1,2 +1,2 @@
-run_log_store:
+run-log-store:
   type: chunked-fs
diff --git a/examples/configs/default.yaml b/examples/configs/default.yaml
@@ -1,7 +1,7 @@
-executor:
+pipeline-executor:
   type: local # (1)
 
-run_log_store:
+run-log-store:
   type: buffered # (2)
 
 catalog:

diff --git a/examples/configs/local-container.yaml b/examples/configs/local-container.yaml
@@ -1,7 +1,7 @@
-executor:
+pipeline-executor:
   type: "local-container" # (1)
   config:
-    docker_image: runnable:latest # (2)
+    docker_image: runnable-m1:latest # (2)
 
-run_log_store: # (4)
+run-log-store: # (4)
   type: chunked-fs
diff --git a/examples/configs/mocked-config-debug.yaml b/examples/configs/mocked-config-debug.yaml
@@ -1,10 +1,10 @@
 catalog:
   type: file-system # (1)
 
-run_log_store:
+run-log-store:
   type: file-system # (1)
 
-executor:
+pipeline-executor:
   type: mocked
   config:
     patches:

diff --git a/examples/configs/mocked-config-simple.yaml b/examples/configs/mocked-config-simple.yaml
@@ -1,8 +1,8 @@
 catalog:
   type: file-system # (1)
 
-run_log_store:
+run-log-store:
   type: file-system # (1)
 
-executor:
+pipeline-executor:
   type: mocked
diff --git a/examples/configs/mocked-config-unittest.yaml b/examples/configs/mocked-config-unittest.yaml
@@ -1,10 +1,10 @@
 catalog:
   type: file-system # (1)
 
-run_log_store:
+run-log-store:
   type: file-system # (1)
 
-executor:
+pipeline-executor:
   type: mocked
   config:
     patches:

diff --git a/examples/configs/mocked-config.yaml b/examples/configs/mocked-config.yaml
@@ -1,10 +1,10 @@
 catalog:
   type: file-system # (1)
 
-run_log_store:
+run-log-store:
   type: file-system # (1)
 
-executor:
+pipeline-executor:
   type: mocked
   config:
     patches:
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,5 @@ docs/ @@
     .tox/
     .scripts/
     .tests/
+    .venv/
+    minikube/
Original file line number	Diff line number	Diff line change
Expand Up		@@ -36,7 +36,6 @@ def main():
		pipeline = Pipeline(steps=[hello_task])

		pipeline.execute()

		return pipeline


Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		type: python
		command: examples.common.functions.hello # dotted path to the function.