-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdefault.Dockerfile
209 lines (158 loc) · 7.95 KB
/
default.Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
FROM jupyter/scipy-notebook:latest
USER root
RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update && apt-get -qq install -y --no-install-recommends git libav-tools cmake build-essential liblapacke-dev \
# needed by vowpal wabbit
libboost-program-options-dev zlib1g-dev libboost-all-dev \
# needed by libhunspell
# libhunspell-dev \
# needed by magenta
libasound2-dev portaudio19-dev \
# needed by torchaudio I believe
libjack-dev libsox-fmt-all libsox-dev sox \
&& apt-get -qq autoremove -y && apt-get -qq clean \
&& rm -rf /var/lib/apt/lists/*
USER $NB_UID
RUN conda config --set channel_priority false
# root should be changed to base in the new versions
RUN conda update -n root -y conda
COPY files/environment.default.yaml environment.yaml
RUN conda env update -n root --file=environment.yaml -q \
&& conda remove qt pyqt --quiet --yes --force \
&& conda clean -tipsy && \
jupyter nbextension enable --py --sys-prefix widgetsnbextension && \
jupyter nbextension enable --py --sys-prefix qgrid && \
jupyter labextension install qgrid && \
rm -rf $CONDA_DIR/share/jupyter/lab/staging && \
rm -rf /home/$NB_USER/.cache/yarn && \
rm -rf $CONDA_DIR/share/jupyter/lab/staging
#RUN jupyter labextension install @jupyterlab/google-drive
# RUN jupyter labextension install @jupyterlab/github
# RUN jupyter labextension install jupyterlab_bokeh
# Configure ipython kernel to use matplotlib inline backend by default
RUN mkdir -p $HOME/.ipython/profile_default/startup
COPY files/mplimportnotebook.py $HOME/.ipython/profile_default/startup/
RUN mkdir -p $HOME/.config/matplotlib && echo 'backend: agg' > $HOME/.config/matplotlib/matplotlibrc
COPY files/ipython_config.py $HOME/.ipython/profile_default/ipython_config.py
ENV PATH $HOME/bin:$PATH
RUN mkdir $HOME/bin
# install fasttext -- not available thru conda
RUN git clone --depth 1 https://github.com/facebookresearch/fastText.git && \
cd fastText \
&& make && mv fasttext $HOME/bin \
&& pip install . && cd .. \
&& rm -rf fastText
# Regularized Greedy Forests
RUN wget https://github.com/fukatani/rgf_python/releases/download/0.2.0/rgf1.2.zip && \
unzip -q rgf1.2.zip && \
cd rgf1.2 && \
make && \
mv bin/rgf $HOME/bin && \
cd .. && \
rm -rf rgf*
# FTRL
RUN git clone --depth 1 https://github.com/alexeygrigorev/libftrl-python && cd libftrl-python && cmake . && make && \
mv libftrl.so ftrl/ && pip install . && cd .. && rm -rf libftrl-python
# Vowpal wabbit
RUN git clone --depth 1 https://github.com/JohnLangford/vowpal_wabbit.git && \
cd vowpal_wabbit && \
make vw && \
make spanning_tree && \
cp vowpalwabbit/vw $HOME/bin/ && \
cp vowpalwabbit/active_interactor $HOME/bin/ && \
cp cluster/spanning_tree $HOME/bin/ && \
cd .. && rm -rf vowpal_wabbit
# # libfm
RUN git clone --depth 1 https://github.com/srendle/libfm.git && cd libfm && make all && \
mv bin/* $HOME/bin/ && cd .. && rm -rf libfm
# # fast_rgf
RUN git clone --depth 1 https://github.com/baidu/fast_rgf.git && cd fast_rgf && \
sed -i '10 s/^##*//' CMakeLists.txt && \
cd build && cmake .. && make && make install && cd .. && mv bin/* $HOME/bin && \
cd .. && rm -rf fast_rgf
USER $NB_UID
# RUN git clone --depth 1 https://github.com/PAIR-code/facets.git && cd facets && jupyter nbextension install facets-dist/ --sys-prefix
# ENV PYTHONPATH $HOME/facets/facets_overview/python/:$PYTHONPATH
RUN git clone --depth 1 https://github.com/guestwalk/libffm.git && cd libffm && make && cp ffm-predict $HOME/bin/ && cp ffm-train $HOME/bin/ && cd .. && rm -rf libffm
# RUN git clone https://github.com/alno/batch-learn.git && cd batch-learn && mkdir build && cd build && cmake .. && make && cp batch-learn $HOME/bin/ && cd ../.. && rm -rf batch-learn
RUN git clone --depth 1 https://github.com/jeroenjanssens/data-science-at-the-command-line.git && mv data-science-at-the-command-line/tools/* $HOME/bin/ && \
rm -rf data-science-at-the-command-line
# NLP DATA
# RUN python -m nltk.downloader all \
# && find $HOME/nltk_data -type f -name "*.zip" -delete
# RUN python -m spacy download en
# RUN python -m textblob.download_corpora
RUN ipython -c 'import disp; disp.install()'
# R
# RUN R -e "install.packages('CausalImpact', '$CONDA_DIR/lib/R/library', repos = 'http://cran.us.r-project.org')"
# Rstudio Server
# ENV RSTUDIO_WHICH_R='$CONDA_DIR/bin/R'
# USER root
# RUN apt-get update \
# && apt-get install -y --no-install-recommends wget \
# && rstudio_version=$(wget --no-check-certificate -qO- https://s3.amazonaws.com/rstudio-server/current.ver) \
# && wget https://download2.rstudio.org/rstudio-server-${rstudio_version}-amd64.deb -O /rstudio-server.deb \
# && apt-get install -y --no-install-recommends /rstudio-server.deb \
# && rm /rstudio-server.deb
# RUN echo "rsession-which-r=$CONDA_DIR/bin/R" >> /etc/rstudio/rserver.conf
# Julia dependencies
# # install Julia packages in /opt/julia instead of $HOME
# ENV JULIA_PKGDIR=/opt/julia
# ENV JULIA_VERSION=0.6.2
# RUN mkdir /opt/julia-${JULIA_VERSION} && \
# cd /tmp && \
# wget -q https://julialang-s3.julialang.org/bin/linux/x64/`echo ${JULIA_VERSION} | cut -d. -f 1,2`/julia-${JULIA_VERSION}-linux-x86_64.tar.gz && \
# echo "dc6ec0b13551ce78083a5849268b20684421d46a7ec46b17ec1fab88a5078580 *julia-${JULIA_VERSION}-linux-x86_64.tar.gz" | sha256sum -c - && \
# tar xzf julia-${JULIA_VERSION}-linux-x86_64.tar.gz -C /opt/julia-${JULIA_VERSION} --strip-components=1 && \
# rm /tmp/julia-${JULIA_VERSION}-linux-x86_64.tar.gz
# RUN ln -fs /opt/julia-*/bin/julia /usr/local/bin/julia
# # Show Julia where conda libraries are \
# RUN mkdir /etc/julia && \
# echo "push!(Libdl.DL_LOAD_PATH, \"$CONDA_DIR/lib\")" >> /etc/julia/juliarc.jl && \
# # Create JULIA_PKGDIR \
# mkdir $JULIA_PKGDIR && \
# chown $NB_USER $JULIA_PKGDIR && \
# fix-permissions $JULIA_PKGDIR
# USER $NB_UID
# # Add Julia packages
# # Install IJulia as jovyan and then move the kernelspec out
# # to the system share location. Avoids problems with runtime UID change not
# # taking effect properly on the .local folder in the jovyan home dir.
# RUN julia -e 'Pkg.init()' && \
# julia -e 'Pkg.update()' && \
# # julia -e 'Pkg.add("HDF5")' && \
# julia -e 'Pkg.add("Gadfly")' && \
# julia -e 'Pkg.add("RDatasets")' && \
# julia -e 'Pkg.add("IJulia")' && \
# # Precompile Julia packages \
# # julia -e 'using HDF5' && \
# julia -e 'using Gadfly' && \
# julia -e 'using RDatasets' && \
# julia -e 'using IJulia'
# #COPY files/julia_packages.jl julia_packages.jl
# #RUN julia julia_packages.jl && \
# # move kernelspec out of home \
# RUN mv $HOME/.local/share/jupyter/kernels/julia* $CONDA_DIR/share/jupyter/kernels/ && \
# chmod -R go+rx $CONDA_DIR/share/jupyter && \
# rm -rf $HOME/.local && \
# fix-permissions $JULIA_PKGDIR $CONDA_DIR/share/jupyter
RUN npm i -g catboost-viewer && npm cache clean --force
# tensorflow board
EXPOSE 6006
# rstudio-server
EXPOSE 8787
# USER root
# TODO install the python interface and may be the java one too
RUN git clone --depth 1 https://github.com/cjlin1/libsvm && cd libsvm && make && mv svm-predict $HOME/bin/ && mv svm-train $HOME/bin/ && mv svm-scale $HOME/bin/ && cd .. \
&& rm -rf libsvm
# TODO install the python interface
RUN git clone --depth 1 https://github.com/cjlin1/liblinear && cd liblinear && make && mv predict $HOME/bin/liblinear-predict && mv train $HOME/bin/liblinear-train && cd .. \
&& rm -rf liblinear
# for dataiku
# RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update && apt-get -qq install -y --no-install-recommends acl curl nginx-full zip \
# && apt-get -qq autoremove -y && apt-get -qq clean \
# && rm -rf /var/lib/apt/lists/*
# USER $NB_UID
# RUN wget https://downloads.dataiku.com/public/studio/4.1.3/dataiku-dss-4.1.3.tar.gz && \
# tar xzf dataiku-dss-4.1.3.tar.gz && \
# dataiku-dss-4.1.3/installer.sh -d $HOME/dataiku -p 11000 -C
#CMD ["/usr/lib/rstudio-server/bin/rserver", "--server-daemonize=0", "--server-app-armor-enabled=0"]