-
Notifications
You must be signed in to change notification settings - Fork 217
/
Copy pathDockerfile
75 lines (57 loc) · 2.64 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# syntax=docker/dockerfile:1
# check=skip=InvalidDefaultArgInFrom
# Cannot use $PYTHON_VERSION because that is overwritten by the base image
# with the patch version (we only care about major and minor version)
ARG CATALOG_PY_VERSION
ARG CATALOG_AIRFLOW_VERSION
FROM apache/airflow:slim-${CATALOG_AIRFLOW_VERSION}-python${CATALOG_PY_VERSION} AS cat
LABEL org.opencontainers.image.source="https://github.com/WordPress/openverse"
# Build-time arguments, with sensible defaults
ARG REQUIREMENTS_FILE=requirements-prod.txt
# Path configurations
ENV AIRFLOW_HOME=/opt/airflow
ENV DAGS_FOLDER=${AIRFLOW_HOME}/catalog/dags
ENV PYTHONPATH=${DAGS_FOLDER}
ENV PATH=${AIRFLOW_HOME}/.local/bin:$PATH
ENV IPYTHONDIR=/home/airflow/.cache/ipython
# Container optimizations
ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1
ENV PIP_NO_COLOR=1
# Airflow/workflow configuration
ENV OUTPUT_DIR=/var/workflow_output/
ENV AIRFLOW__CORE__DAGS_FOLDER=${DAGS_FOLDER}
ENV AIRFLOW__CORE__LOAD_EXAMPLES=False
ENV AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=False
ENV AIRFLOW__CORE__ENABLE_XCOM_PICKLING=True
ENV AIRFLOW__WEBSERVER__EXPOSE_STACKTRACE=True
# TODO: Test if moving this to .env changes anything!
ENV AIRFLOW__LOGGING__REMOTE_LOGGING=True
ENV AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID=aws_default
ENV AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER=s3://openverse-airflow-logs
USER root
RUN apt-get update \
&& apt-get install -yqq --no-install-recommends \
build-essential \
libpq-dev \
libffi-dev \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
# Create and set the ownership of the cache directory so airflow user can write to them
# These directories are not needed in production but it's easier to perform
# this step here while we're doing it for the output directory
RUN mkdir -p ${OUTPUT_DIR} /home/airflow/.cache && \
chown airflow ${OUTPUT_DIR} /home/airflow/.cache
USER airflow
WORKDIR ${AIRFLOW_HOME}
# Always add the prod req because the dev reqs depend on it for deduplication
COPY ${REQUIREMENTS_FILE} requirements-prod.txt ${AIRFLOW_HOME}/
# args go out of scope when a new build stage starts so it must be redeclared here
ARG CATALOG_PY_VERSION
ARG CATALOG_AIRFLOW_VERSION
# https://airflow.apache.org/docs/apache-airflow/stable/installation/installing-from-pypi.html#constraints-files
ARG CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-${CATALOG_AIRFLOW_VERSION}/constraints-${CATALOG_PY_VERSION}.txt"
RUN pip install -r ${REQUIREMENTS_FILE} -c ${CONSTRAINTS_FILE}
COPY entrypoint.sh /opt/airflow/entrypoint.sh
COPY variables.tsv /opt/airflow/variables.tsv
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/opt/airflow/entrypoint.sh"]