Skip to content

Commit

Permalink
Instantiate Emscripten Runtime for python workers separately.
Browse files Browse the repository at this point in the history
  • Loading branch information
danlapid committed Nov 28, 2024
1 parent 564c8a2 commit eb843c6
Show file tree
Hide file tree
Showing 12 changed files with 192 additions and 52 deletions.
18 changes: 9 additions & 9 deletions src/pyodide/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -211,27 +211,27 @@ INTERNAL_DATA_MODULES = glob([
"internal/*.py",
"internal/patches/*.py",
"internal/topLevelEntropy/*.py",
])
]) + [
"generated/python_stdlib.zip",
"generated/pyodide.asm.wasm",
"generated/emscriptenSetup.js",
]

wd_ts_bundle(
name = "pyodide",
eslintrc_json = "eslint.config.mjs",
import_name = "pyodide",
internal_data_modules = ["generated/python_stdlib.zip"] + INTERNAL_DATA_MODULES,
internal_data_modules = INTERNAL_DATA_MODULES,
internal_json_modules = [
"generated/pyodide-lock.json",
"generated/pyodide-bucket.json",
],
internal_modules = [
"generated/emscriptenSetup.js",
] + INTERNAL_MODULES,
internal_wasm_modules = ["generated/pyodide.asm.wasm"],
internal_modules = INTERNAL_MODULES,
js_deps = [
"generated/emscriptenSetup",
"pyodide.asm.js@rule",
"pyodide.asm.wasm@rule",
"pyodide-lock.js@rule",
"python_stdlib.zip@rule",
"pyodide-lock.js@rule",
"pyodide-bucket.json@rule",
],
lint = False,
Expand Down Expand Up @@ -264,7 +264,7 @@ genrule(
for m in INTERNAL_DATA_MODULES
if m.endswith(".py")
] + [
":pyodide-internal_generated_emscriptenSetup",
":pyodide-internal_generated_emscriptenSetup.js",
":pyodide-internal_generated_pyodide.asm.wasm",
":pyodide-internal_generated_python_stdlib.zip",
":pyodide-internal_generated_pyodide-lock.json",
Expand Down
10 changes: 6 additions & 4 deletions src/pyodide/internal/pool/emscriptenSetup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { reportError } from 'pyodide-internal:util';
*/
import { _createPyodideModule } from 'pyodide-internal:generated/pyodide.asm';

export {
import {
setUnsafeEval,
setGetRandomValues,
} from 'pyodide-internal:pool/builtin_wrappers';
Expand Down Expand Up @@ -56,7 +56,7 @@ function getWaitForDynlibs(resolveReadyPromise: PreRunHook): PreRunHook {
* This is a simplified version of the `prepareFileSystem` function here:
* https://github.com/pyodide/pyodide/blob/main/src/js/module.ts
*/
function getPrepareFileSystem(pythonStdlib: Uint8Array): PreRunHook {
function getPrepareFileSystem(pythonStdlib: ArrayBuffer): PreRunHook {
return function prepareFileSystem(Module: Module): void {
try {
const pymajor = Module._py_version_major();
Expand Down Expand Up @@ -118,7 +118,7 @@ function getInstantiateWasm(
*/
function getEmscriptenSettings(
isWorkerd: boolean,
pythonStdlib: Uint8Array,
pythonStdlib: ArrayBuffer,
pyodideWasmModule: WebAssembly.Module
): EmscriptenSettings {
const config: PyodideConfig = {
Expand Down Expand Up @@ -193,7 +193,7 @@ function* featureDetectionMonkeyPatchesContextManager() {
*/
export async function instantiateEmscriptenModule(
isWorkerd: boolean,
pythonStdlib: Uint8Array,
pythonStdlib: ArrayBuffer,
wasmModule: WebAssembly.Module
): Promise<Module> {
const emscriptenSettings = getEmscriptenSettings(
Expand All @@ -210,6 +210,8 @@ export async function instantiateEmscriptenModule(

// Wait until we've executed all the preRun hooks before proceeding
const emscriptenModule = await emscriptenSettings.readyPromise;
emscriptenModule.setUnsafeEval = setUnsafeEval;
emscriptenModule.setGetRandomValues = setGetRandomValues;
return emscriptenModule;
} catch (e) {
console.warn('Error in instantiateEmscriptenModule');
Expand Down
50 changes: 14 additions & 36 deletions src/pyodide/internal/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,15 @@ import {
entropyBeforeTopLevel,
getRandomValues,
} from 'pyodide-internal:topLevelEntropy/lib';
import { default as UnsafeEval } from 'internal:unsafe-eval';
import { simpleRunPython } from 'pyodide-internal:util';

/**
* This file is a simplified version of the Pyodide loader:
* https://github.com/pyodide/pyodide/blob/main/src/js/pyodide.ts
*
* In particular, it drops the package lock, which disables
* `pyodide.loadPackage`. In trade we add memory snapshots here.
*/

/**
* _createPyodideModule and pyodideWasmModule together are produced by the
* Emscripten linker
* SetupEmscripten is an internal module defined in setup-emscripten.h the module instantiates
* emscripten seperately from this code in another context.
* The underlying code for it can be found in pool/emscriptenSetup.ts.
*/
import pyodideWasmModule from 'pyodide-internal:generated/pyodide.asm.wasm';
import { default as SetupEmscripten } from 'internal:setup-emscripten';

/**
* The Python and Pyodide stdlib zipped together. The zip format is convenient
* because Python has a "ziploader" that allows one to import directly from a
* zip file.
*
* The ziploader solves bootstrapping problems around unpacking: Python comes
* with a bunch of C libs to unpack various archive formats, but they need stuff
* in this zip file to initialize their runtime state.
*/
import pythonStdlib from 'pyodide-internal:generated/python_stdlib.zip';
import {
instantiateEmscriptenModule,
setUnsafeEval,
setGetRandomValues,
} from 'pyodide-internal:generated/emscriptenSetup';
import { default as UnsafeEval } from 'internal:unsafe-eval';
import { simpleRunPython } from 'pyodide-internal:util';
import { loadPackages } from 'pyodide-internal:loadPackage';

/**
Expand All @@ -59,7 +36,7 @@ import { loadPackages } from 'pyodide-internal:loadPackage';
* `noInitialRun: true` and so the C runtime is in an incoherent state until we
* restore the linear memory from the snapshot.
*/
async function prepareWasmLinearMemory(Module: Module): Promise<void> {
function prepareWasmLinearMemory(Module: Module): void {
// Note: if we are restoring from a snapshot, runtime is not initialized yet.
Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT;

Expand Down Expand Up @@ -92,15 +69,16 @@ export async function loadPyodide(
lockfile: PackageLock,
indexURL: string
): Promise<Pyodide> {
const Module = await enterJaegerSpan('instantiate_emscripten', () =>
instantiateEmscriptenModule(isWorkerd, pythonStdlib, pyodideWasmModule)
const Module = enterJaegerSpan('instantiate_emscripten', () =>
SetupEmscripten.getModule()
);
Module.API.config.jsglobals = globalThis;
if (isWorkerd) {
Module.API.config.indexURL = indexURL;
Module.API.config.resolveLockFilePromise!(lockfile);
}
setUnsafeEval(UnsafeEval);
setGetRandomValues(getRandomValues);
Module.setUnsafeEval(UnsafeEval);
Module.setGetRandomValues(getRandomValues);

mountSitePackages(Module, SITE_PACKAGES.rootInfo);
entropyMountFiles(Module);
Expand All @@ -110,7 +88,7 @@ export async function loadPyodide(
loadPackages(Module, TRANSITIVE_REQUIREMENTS)
);

await enterJaegerSpan('prepare_wasm_linear_memory', () =>
enterJaegerSpan('prepare_wasm_linear_memory', () =>
prepareWasmLinearMemory(Module)
);

Expand All @@ -120,7 +98,7 @@ export async function loadPyodide(
mountWorkerFiles(Module);

// Finish setting up Pyodide's ffi so we can use the nice Python interface
await enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap);
enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap);
const pyodide = Module.API.public_api;

finishSnapshotSetup(pyodide);
Expand Down
4 changes: 4 additions & 0 deletions src/pyodide/types/emscripten.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,8 @@ interface Module {
addRunDependency(x: string): void;
removeRunDependency(x: string): void;
noInitialRun: boolean;
setUnsafeEval(mod: typeof import('internal:unsafe-eval').default): void;
setGetRandomValues(
func: typeof import('pyodide-internal:topLevelEntropy/lib').getRandomValues
): void;
}
5 changes: 5 additions & 0 deletions src/pyodide/types/setup-emscripten.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
declare namespace SetupEmscripten {
const getModule: () => Module;
}

export default SetupEmscripten;
4 changes: 4 additions & 0 deletions src/workerd/api/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ filegroup(
"html-rewriter.c++",
"hyperdrive.c++",
"pyodide/pyodide.c++",
"pyodide/setup-emscripten.c++",
"memory-cache.c++",
"r2*.c++",
"rtti.c++",
Expand All @@ -37,6 +38,7 @@ filegroup(
"hyperdrive.h",
"memory-cache.h",
"pyodide/pyodide.h",
"pyodide/setup-emscripten.h",
"modules.h",
"r2*.h",
"rtti.h",
Expand Down Expand Up @@ -126,9 +128,11 @@ wd_cc_library(
name = "pyodide",
srcs = [
"pyodide/pyodide.c++",
"pyodide/setup-emscripten.c++",
],
hdrs = [
"pyodide/pyodide.h",
"pyodide/setup-emscripten.h",
"//src/pyodide:generated/pyodide_extra.capnp.h",
],
implementation_deps = ["//src/workerd/util:string-buffer"],
Expand Down
10 changes: 10 additions & 0 deletions src/workerd/api/pyodide/pyodide.c++
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// https://opensource.org/licenses/Apache-2.0
#include "pyodide.h"

#include <workerd/api/pyodide/setup-emscripten.h>
#include <workerd/util/string-buffer.h>
#include <workerd/util/strings.h>

Expand Down Expand Up @@ -483,6 +484,15 @@ void DiskCache::put(jsg::Lock& js, kj::String key, kj::Array<kj::byte> data) {
}
}

jsg::JsValue SetupEmscripten::getModule(jsg::Lock& js) {
js.v8Context()->SetSecurityToken(emscriptenRuntime.contextToken.getHandle(js));
return emscriptenRuntime.emscriptenRuntime.getHandle(js);
}

void SetupEmscripten::visitForGc(jsg::GcVisitor& visitor) {
visitor.visit(emscriptenRuntime.emscriptenRuntime);
}

bool hasPythonModules(capnp::List<server::config::Worker::Module>::Reader modules) {
for (auto module: modules) {
if (module.isPythonModule()) {
Expand Down
19 changes: 18 additions & 1 deletion src/workerd/api/pyodide/pyodide.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "workerd/util/wait-list.h"

#include <workerd/api/pyodide/setup-emscripten.h>
#include <workerd/io/io-context.h>
#include <workerd/jsg/jsg.h>
#include <workerd/jsg/modules-new.h>
Expand Down Expand Up @@ -408,6 +409,22 @@ class SimplePythonLimiter: public jsg::Object {
}
};

class SetupEmscripten: public jsg::Object {
public:
SetupEmscripten(EmscriptenRuntime emscriptenRuntime)
: emscriptenRuntime(kj::mv(emscriptenRuntime)) {};

jsg::JsValue getModule(jsg::Lock& js);

JSG_RESOURCE_TYPE(SetupEmscripten) {
JSG_METHOD(getModule);
}

private:
EmscriptenRuntime emscriptenRuntime;
void visitForGc(jsg::GcVisitor& visitor);
};

using Worker = server::config::Worker;

jsg::Ref<PyodideMetadataReader> makePyodideMetadataReader(
Expand All @@ -419,6 +436,6 @@ bool hasPythonModules(capnp::List<server::config::Worker::Module>::Reader module
api::pyodide::ReadOnlyBuffer, api::pyodide::PyodideMetadataReader, \
api::pyodide::ArtifactBundler, api::pyodide::DiskCache, \
api::pyodide::DisabledInternalJaeger, api::pyodide::SimplePythonLimiter, \
api::pyodide::MemorySnapshotResult
api::pyodide::MemorySnapshotResult, api::pyodide::SetupEmscripten

} // namespace workerd::api::pyodide
94 changes: 94 additions & 0 deletions src/workerd/api/pyodide/setup-emscripten.c++
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#include "setup-emscripten.h"

#include <workerd/io/trace.h>
#include <workerd/io/worker.h>

namespace workerd::api::pyodide {

v8::Local<v8::Module> loadEmscriptenSetupModule(
jsg::Lock& js, capnp::Data::Reader emsciptenSetupJsReader) {
v8::Local<v8::String> contentStr = jsg::v8Str(js.v8Isolate, emsciptenSetupJsReader.asChars());
v8::ScriptOrigin origin(
jsg::v8StrIntern(js.v8Isolate, "pyodide-internal:generated/emscriptenSetup"), 0, 0, false, -1,
{}, false, false, true);
v8::ScriptCompiler::Source source(contentStr, origin);
return jsg::check(v8::ScriptCompiler::CompileModule(js.v8Isolate, &source));
}

jsg::JsValue resolvePromise(jsg::Lock& js, jsg::JsValue prom) {
auto promise = KJ_ASSERT_NONNULL(prom.tryCast<jsg::JsPromise>());
if (promise.state() == jsg::PromiseState::PENDING) {
js.runMicrotasks();
}
KJ_ASSERT(promise.state() == jsg::PromiseState::FULFILLED);
return promise.result();
}

void instantiateEmscriptenSetupModule(jsg::Lock& js, v8::Local<v8::Module>& module) {
jsg::instantiateModule(js, module);
auto evalPromise = KJ_ASSERT_NONNULL(
jsg::JsValue(jsg::check(module->Evaluate(js.v8Context()))).tryCast<jsg::JsPromise>());
resolvePromise(js, evalPromise);
KJ_ASSERT(module->GetStatus() == v8::Module::kEvaluated);
}

v8::Local<v8::Function> getInstantiateEmscriptenModule(
jsg::Lock& js, v8::Local<v8::Module>& module) {
auto instantiateEmscriptenModule =
js.v8Get(module->GetModuleNamespace().As<v8::Object>(), "instantiateEmscriptenModule"_kj);
KJ_ASSERT(instantiateEmscriptenModule->IsFunction());
return instantiateEmscriptenModule.As<v8::Function>();
}

template <typename... Args>
jsg::JsValue callFunction(jsg::Lock& js, v8::Local<v8::Function>& func, Args... args) {
v8::LocalVector<v8::Value> argv(
js.v8Isolate, std::initializer_list<v8::Local<v8::Value>>{args...});
return jsg::JsValue(
jsg::check(func->Call(js.v8Context(), js.v8Null(), argv.size(), argv.data())));
}

jsg::JsValue callInstantiateEmscriptenModule(jsg::Lock& js,
v8::Local<v8::Function>& func,
bool isWorkerd,
capnp::Data::Reader pythonStdlibZipReader,
capnp::Data::Reader pyodideAsmWasmReader) {
AllowV8BackgroundThreadsScope scope;
js.setAllowEval(true);
KJ_DEFER(js.setAllowEval(false));

auto pythonStdlibZip = v8::ArrayBuffer::New(js.v8Isolate, pythonStdlibZipReader.size(),
v8::BackingStoreInitializationMode::kUninitialized);
memcpy(pythonStdlibZip->Data(), pythonStdlibZipReader.begin(), pythonStdlibZipReader.size());
auto pyodideAsmWasm = jsg::check(v8::WasmModuleObject::Compile(js.v8Isolate,
v8::MemorySpan<const uint8_t>(pyodideAsmWasmReader.begin(), pyodideAsmWasmReader.size())));
return resolvePromise(js,
callFunction(
js, func, js.boolean(isWorkerd), kj::mv(pythonStdlibZip), kj::mv(pyodideAsmWasm)));
}

EmscriptenRuntime EmscriptenRuntime::initialize(
jsg::Lock& js, bool isWorkerd, jsg::Bundle::Reader bundle) {
kj::Maybe<capnp::Data::Reader> emsciptenSetupJsReader;
kj::Maybe<capnp::Data::Reader> pythonStdlibZipReader;
kj::Maybe<capnp::Data::Reader> pyodideAsmWasmReader;
for (auto module: bundle.getModules()) {
if (module.getName().endsWith("emscriptenSetup.js")) {
emsciptenSetupJsReader = module.getData();
} else if (module.getName().endsWith("python_stdlib.zip")) {
pythonStdlibZipReader = module.getData();
} else if (module.getName().endsWith("pyodide.asm.wasm")) {
pyodideAsmWasmReader = module.getData();
}
}
auto context = js.v8Context();
Worker::setupContext(js, context, Worker::ConsoleMode::INSPECTOR_ONLY);
auto module = loadEmscriptenSetupModule(js, KJ_ASSERT_NONNULL(emsciptenSetupJsReader));
instantiateEmscriptenSetupModule(js, module);
auto instantiateEmscriptenModule = getInstantiateEmscriptenModule(js, module);
auto emscriptenModule = callInstantiateEmscriptenModule(js, instantiateEmscriptenModule,
isWorkerd, KJ_ASSERT_NONNULL(pythonStdlibZipReader), KJ_ASSERT_NONNULL(pyodideAsmWasmReader));
auto contextToken = jsg::JsValue(context->GetSecurityToken());
return EmscriptenRuntime{contextToken.addRef(js), emscriptenModule.addRef(js)};
}
} // namespace workerd::api::pyodide
12 changes: 12 additions & 0 deletions src/workerd/api/pyodide/setup-emscripten.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#include <workerd/io/compatibility-date.capnp.h>
#include <workerd/jsg/jsg.h>

namespace workerd::api::pyodide {
struct EmscriptenRuntime {
jsg::JsRef<jsg::JsValue> contextToken;
jsg::JsRef<jsg::JsValue> emscriptenRuntime;
static EmscriptenRuntime initialize(jsg::Lock& js, bool isWorkerd, jsg::Bundle::Reader bundle);
};
} // namespace workerd::api::pyodide
Loading

0 comments on commit eb843c6

Please sign in to comment.