-
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathMakefile
212 lines (164 loc) · 6.73 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
FILE_NAME:=.env
# The environment file is used to store all the environment variables for the project.
ENVIRONMENT:=DEV
DEBUG:=True
BACKEND_SERVER_HOST:=127.0.0.1
BACKEND_SERVER_PORT:=8000
BACKEND_SERVER_WORKERS:=4
BACKEND_SERVER_VERSION:=v0.1.20
TIMEZONE:="UTC"
IS_ALLOWED_CREDENTIALS:=True
# JWT Token
JWT_SECRET_KEY:=YOUR-JWT-SECRET-KEY
JWT_SUBJECT:=YOUR-JWT-SUBJECT
JWT_TOKEN_PREFIX:=YOUR-TOKEN-PREFIX
JWT_ALGORITHM:=HS256
JWT_MIN:=60
JWT_HOUR:=23
JWT_DAY:=6
# Hash Functions
HASHING_ALGORITHM_LAYER_1:=bcrypt
HASHING_ALGORITHM_LAYER_2:=argon2
HASHING_SALT:=YOUR-RANDOM-SALTY-SALT
# Codecov (Login to Codecov and get your TOKEN)
# CODECOV_TOKEN:=CODECOV_TOKEN=
DOCKER_VOLUME_DIRECTORY:=
# CPU Accelerate Inference Engine
INFERENCE_ENG:=llamacpp
INFERENCE_ENG_PORT:=8080
INFERENCE_ENG_VERSION:=server--b1-2321a5e
NUM_CPU_CORES:=8.00
NUM_CPU_CORES_EMBEDDING:=4.00
# Embedding engine and it uses same version with Inference Engine
EMBEDDING_ENG:=embedding_eng
EMBEDDING_ENG_PORT:=8080
# Language model, default is phi3-mini-4k-instruct-q4.gguf
# https://github.com/SkywardAI/llama.cpp/blob/9b2f16f8055265c67e074025350736adc1ea0666/tests/test-chat-template.cpp#L91-L92
LANGUAGE_MODEL_NAME:=Phi3-mini-4k-instruct-Q4.gguf
LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
INSTRUCTION:="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the questions from human."
EMBEDDING_MODEL_NAME:=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
EMBEDDING_MODEL_URL:=https://huggingface.co/aisuko/all-MiniLM-L6-v2-gguf/resolve/main/all-MiniLM-L6-v2-Q4_K_M-v2.gguf?download=true
ADMIN_USERNAME:=admin
ADMIN_EMAIL:=admin@admin.com
ADMIN_PASS:=admin
# yeager
METRICS_PATHS:=runs
# DEFAULT_RAG_DS_NAME
DEFAULT_RAG_DS_NAME:=aisuko/squad01-v2
.PHONY: env
env:
@echo "ENVIRONMENT=$(ENVIRONMENT)"> $(FILE_NAME)
@echo "DEBUG=$(DEBUG)">> $(FILE_NAME)
@echo "BACKEND_SERVER_HOST=$(BACKEND_SERVER_HOST)">> $(FILE_NAME)
@echo "BACKEND_SERVER_PORT=$(BACKEND_SERVER_PORT)">> $(FILE_NAME)
@echo "BACKEND_SERVER_WORKERS=$(BACKEND_SERVER_WORKERS)">> $(FILE_NAME)
@echo "BACKEND_SERVER_VERSION=$(BACKEND_SERVER_VERSION)">> $(FILE_NAME)
@echo "IS_ALLOWED_CREDENTIALS=$(IS_ALLOWED_CREDENTIALS)">> $(FILE_NAME)
@echo "API_TOKEN=$(API_TOKEN)">> $(FILE_NAME)
@echo "AUTH_TOKEN=$(AUTH_TOKEN)">> $(FILE_NAME)
@echo "JWT_SECRET_KEY=$(JWT_SECRET_KEY)">> $(FILE_NAME)
@echo "JWT_SUBJECT=$(JWT_SUBJECT)">> $(FILE_NAME)
@echo "JWT_TOKEN_PREFIX=$(JWT_TOKEN_PREFIX)">> $(FILE_NAME)
@echo "JWT_ALGORITHM=$(JWT_ALGORITHM)">> $(FILE_NAME)
@echo "JWT_MIN=$(JWT_MIN)">> $(FILE_NAME)
@echo "JWT_HOUR=$(JWT_HOUR)">> $(FILE_NAME)
@echo "JWT_DAY=$(JWT_DAY)">> $(FILE_NAME)
@echo "HASHING_ALGORITHM_LAYER_1=$(HASHING_ALGORITHM_LAYER_1)">> $(FILE_NAME)
@echo "HASHING_ALGORITHM_LAYER_2=$(HASHING_ALGORITHM_LAYER_2)">> $(FILE_NAME)
@echo "HASHING_SALT=$(HASHING_SALT)">> $(FILE_NAME)
@echo "DOCKER_VOLUME_DIRECTORY=$(DOCKER_VOLUME_DIRECTORY)">> $(FILE_NAME)
@echo "METRICS_PATHS=$(METRICS_PATHS)" >> $(FILE_NAME)
@echo "INFERENCE_ENG=$(INFERENCE_ENG)">> $(FILE_NAME)
@echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(FILE_NAME)
@echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(FILE_NAME)
@echo "EMBEDDING_ENG=$(EMBEDDING_ENG)">> $(FILE_NAME)
@echo "EMBEDDING_ENG_PORT=$(EMBEDDING_ENG_PORT)">> $(FILE_NAME)
@echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(FILE_NAME)
@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)" >> $(FILE_NAME)
@echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(FILE_NAME)
@echo "ADMIN_USERNAME=$(ADMIN_USERNAME)">> $(FILE_NAME)
@echo "ADMIN_EMAIL=$(ADMIN_EMAIL)">> $(FILE_NAME)
@echo "ADMIN_PASS=$(ADMIN_PASS)">> $(FILE_NAME)
@echo "TIMEZONE=$(TIMEZONE)">> $(FILE_NAME)
@echo "INSTRUCTION"=$(INSTRUCTION)>> $(FILE_NAME)
@echo "EMBEDDING_MODEL_NAME"=$(EMBEDDING_MODEL_NAME) >> $(FILE_NAME)
@echo "DEFAULT_RAG_DS_NAME"=$(DEFAULT_RAG_DS_NAME) >> $(FILE_NAME)
.PHONY: prepare
prepare: env
prepare: lm
############################################################################################################
# For development, require Nvidia GPU
.PHONY: build
build: env
docker compose -f docker-compose.yaml build
.PHONY: up
up: build lm
docker compose -f docker-compose.yaml up -d
.PHONY: stop
stop:
docker compose -f docker-compose.yaml stop
.PHONY: logs
logs:
@docker compose -f docker-compose.yaml logs -f
############################################################################################################
# For demo, without GPU augumentation, but slow for inference. Might include some bugs.
.PHONY: demo
demo: env lm
docker compose -f docker-compose.demo.yaml up -d
.PHONY: demo-stop
demo-stop:
docker compose -f docker-compose.demo.yaml stop
.PHONY: demo-logs
demo-logs:
docker compose -f docker-compose.demo.yaml logs -f
############################################################################################################
# For gpu host.
.PHONY: gpu-up
gpu: env lm
docker compose -f docker-compose.gpu.yaml up -d
.PHONY: gpu-stop
gpu-stop:
docker compose -f docker-compose.gpu.yaml stop
.PHONY: gpu-logs
gpu-logs:
docker compose -f docker-compose.gpu.yaml logs -f
############################################################################################################
# Linter
.PHONY: ruff
ruff:
@ruff check --output-format=github backend/src/ --config ruff.toml
############################################################################################################
# Download model from Hugging Face
.PHONY: lm
lm:
@mkdir -p volumes/models && [ -f volumes/models/$(LANGUAGE_MODEL_NAME) ] || wget -O volumes/models/$(LANGUAGE_MODEL_NAME) $(LANGUAGE_MODEL_URL)
@mkdir -p volumes/models && [ -f volumes/models/$(EMBEDDING_MODEL_NAME) ] || wget -O volumes/models/$(EMBEDDING_MODEL_NAME) $(EMBEDDING_MODEL_URL)
.PHONY: localinfer
localinfer: lm
@docker run -p 8080:8080 -v ./volumes/models:/models gclub/llama.cpp:$(INFERENCE_ENG_VERSION) -m models/$(LANGUAGE_MODEL_NAME) -c 512 -cnv -i --metrics --host 0.0.0.0 --port 8080
############################################################################################################
# Poetry
.PHONY: poetry
poetry:
@pipx install poetry==1.8.2
.PHONY: lock
lock:
@poetry -C backend lock
.PHONY: install
install:
@poetry -C backend install --no-root -vvv
.PHONY: install-dev
install-dev:
@poetry -C backend install --only dev --no-root -vvv
.PHONY: plugin
plugin:
@poetry -C backend self add poetry-plugin-export
.PHONY: expo
expo:
@poetry -C backend export -f requirements.txt --output backend/requirements.txt
############################################################################################################
# Testing
.PHONY: test
test:
@pytest backend/tests