From eb843c646b4f4470b8f28b29e02145f174cab649 Mon Sep 17 00:00:00 2001 From: Dan Lapid Date: Thu, 17 Oct 2024 15:58:58 +0000 Subject: [PATCH] Instantiate Emscripten Runtime for python workers separately. --- src/pyodide/BUILD.bazel | 18 ++-- src/pyodide/internal/pool/emscriptenSetup.ts | 10 ++- src/pyodide/internal/python.ts | 50 +++-------- src/pyodide/types/emscripten.d.ts | 4 + src/pyodide/types/setup-emscripten.d.ts | 5 ++ src/workerd/api/BUILD.bazel | 4 + src/workerd/api/pyodide/pyodide.c++ | 10 +++ src/workerd/api/pyodide/pyodide.h | 19 +++- src/workerd/api/pyodide/setup-emscripten.c++ | 94 ++++++++++++++++++++ src/workerd/api/pyodide/setup-emscripten.h | 12 +++ src/workerd/io/compatibility-date.capnp | 2 +- src/workerd/server/workerd-api.c++ | 16 +++- 12 files changed, 192 insertions(+), 52 deletions(-) create mode 100644 src/pyodide/types/setup-emscripten.d.ts create mode 100644 src/workerd/api/pyodide/setup-emscripten.c++ create mode 100644 src/workerd/api/pyodide/setup-emscripten.h diff --git a/src/pyodide/BUILD.bazel b/src/pyodide/BUILD.bazel index 0eabada074bb..ea1c76548d94 100644 --- a/src/pyodide/BUILD.bazel +++ b/src/pyodide/BUILD.bazel @@ -211,27 +211,27 @@ INTERNAL_DATA_MODULES = glob([ "internal/*.py", "internal/patches/*.py", "internal/topLevelEntropy/*.py", -]) +]) + [ + "generated/python_stdlib.zip", + "generated/pyodide.asm.wasm", + "generated/emscriptenSetup.js", +] wd_ts_bundle( name = "pyodide", eslintrc_json = "eslint.config.mjs", import_name = "pyodide", - internal_data_modules = ["generated/python_stdlib.zip"] + INTERNAL_DATA_MODULES, + internal_data_modules = INTERNAL_DATA_MODULES, internal_json_modules = [ "generated/pyodide-lock.json", "generated/pyodide-bucket.json", ], - internal_modules = [ - "generated/emscriptenSetup.js", - ] + INTERNAL_MODULES, - internal_wasm_modules = ["generated/pyodide.asm.wasm"], + internal_modules = INTERNAL_MODULES, js_deps = [ "generated/emscriptenSetup", - "pyodide.asm.js@rule", "pyodide.asm.wasm@rule", - "pyodide-lock.js@rule", "python_stdlib.zip@rule", + "pyodide-lock.js@rule", "pyodide-bucket.json@rule", ], lint = False, @@ -264,7 +264,7 @@ genrule( for m in INTERNAL_DATA_MODULES if m.endswith(".py") ] + [ - ":pyodide-internal_generated_emscriptenSetup", + ":pyodide-internal_generated_emscriptenSetup.js", ":pyodide-internal_generated_pyodide.asm.wasm", ":pyodide-internal_generated_python_stdlib.zip", ":pyodide-internal_generated_pyodide-lock.json", diff --git a/src/pyodide/internal/pool/emscriptenSetup.ts b/src/pyodide/internal/pool/emscriptenSetup.ts index b9b37c0ee993..d72946898c63 100644 --- a/src/pyodide/internal/pool/emscriptenSetup.ts +++ b/src/pyodide/internal/pool/emscriptenSetup.ts @@ -13,7 +13,7 @@ import { reportError } from 'pyodide-internal:util'; */ import { _createPyodideModule } from 'pyodide-internal:generated/pyodide.asm'; -export { +import { setUnsafeEval, setGetRandomValues, } from 'pyodide-internal:pool/builtin_wrappers'; @@ -56,7 +56,7 @@ function getWaitForDynlibs(resolveReadyPromise: PreRunHook): PreRunHook { * This is a simplified version of the `prepareFileSystem` function here: * https://github.com/pyodide/pyodide/blob/main/src/js/module.ts */ -function getPrepareFileSystem(pythonStdlib: Uint8Array): PreRunHook { +function getPrepareFileSystem(pythonStdlib: ArrayBuffer): PreRunHook { return function prepareFileSystem(Module: Module): void { try { const pymajor = Module._py_version_major(); @@ -118,7 +118,7 @@ function getInstantiateWasm( */ function getEmscriptenSettings( isWorkerd: boolean, - pythonStdlib: Uint8Array, + pythonStdlib: ArrayBuffer, pyodideWasmModule: WebAssembly.Module ): EmscriptenSettings { const config: PyodideConfig = { @@ -193,7 +193,7 @@ function* featureDetectionMonkeyPatchesContextManager() { */ export async function instantiateEmscriptenModule( isWorkerd: boolean, - pythonStdlib: Uint8Array, + pythonStdlib: ArrayBuffer, wasmModule: WebAssembly.Module ): Promise { const emscriptenSettings = getEmscriptenSettings( @@ -210,6 +210,8 @@ export async function instantiateEmscriptenModule( // Wait until we've executed all the preRun hooks before proceeding const emscriptenModule = await emscriptenSettings.readyPromise; + emscriptenModule.setUnsafeEval = setUnsafeEval; + emscriptenModule.setGetRandomValues = setGetRandomValues; return emscriptenModule; } catch (e) { console.warn('Error in instantiateEmscriptenModule'); diff --git a/src/pyodide/internal/python.ts b/src/pyodide/internal/python.ts index 96824c790799..baf8c1923a3d 100644 --- a/src/pyodide/internal/python.ts +++ b/src/pyodide/internal/python.ts @@ -19,38 +19,15 @@ import { entropyBeforeTopLevel, getRandomValues, } from 'pyodide-internal:topLevelEntropy/lib'; -import { default as UnsafeEval } from 'internal:unsafe-eval'; -import { simpleRunPython } from 'pyodide-internal:util'; - -/** - * This file is a simplified version of the Pyodide loader: - * https://github.com/pyodide/pyodide/blob/main/src/js/pyodide.ts - * - * In particular, it drops the package lock, which disables - * `pyodide.loadPackage`. In trade we add memory snapshots here. - */ - /** - * _createPyodideModule and pyodideWasmModule together are produced by the - * Emscripten linker + * SetupEmscripten is an internal module defined in setup-emscripten.h the module instantiates + * emscripten seperately from this code in another context. + * The underlying code for it can be found in pool/emscriptenSetup.ts. */ -import pyodideWasmModule from 'pyodide-internal:generated/pyodide.asm.wasm'; +import { default as SetupEmscripten } from 'internal:setup-emscripten'; -/** - * The Python and Pyodide stdlib zipped together. The zip format is convenient - * because Python has a "ziploader" that allows one to import directly from a - * zip file. - * - * The ziploader solves bootstrapping problems around unpacking: Python comes - * with a bunch of C libs to unpack various archive formats, but they need stuff - * in this zip file to initialize their runtime state. - */ -import pythonStdlib from 'pyodide-internal:generated/python_stdlib.zip'; -import { - instantiateEmscriptenModule, - setUnsafeEval, - setGetRandomValues, -} from 'pyodide-internal:generated/emscriptenSetup'; +import { default as UnsafeEval } from 'internal:unsafe-eval'; +import { simpleRunPython } from 'pyodide-internal:util'; import { loadPackages } from 'pyodide-internal:loadPackage'; /** @@ -59,7 +36,7 @@ import { loadPackages } from 'pyodide-internal:loadPackage'; * `noInitialRun: true` and so the C runtime is in an incoherent state until we * restore the linear memory from the snapshot. */ -async function prepareWasmLinearMemory(Module: Module): Promise { +function prepareWasmLinearMemory(Module: Module): void { // Note: if we are restoring from a snapshot, runtime is not initialized yet. Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT; @@ -92,15 +69,16 @@ export async function loadPyodide( lockfile: PackageLock, indexURL: string ): Promise { - const Module = await enterJaegerSpan('instantiate_emscripten', () => - instantiateEmscriptenModule(isWorkerd, pythonStdlib, pyodideWasmModule) + const Module = enterJaegerSpan('instantiate_emscripten', () => + SetupEmscripten.getModule() ); + Module.API.config.jsglobals = globalThis; if (isWorkerd) { Module.API.config.indexURL = indexURL; Module.API.config.resolveLockFilePromise!(lockfile); } - setUnsafeEval(UnsafeEval); - setGetRandomValues(getRandomValues); + Module.setUnsafeEval(UnsafeEval); + Module.setGetRandomValues(getRandomValues); mountSitePackages(Module, SITE_PACKAGES.rootInfo); entropyMountFiles(Module); @@ -110,7 +88,7 @@ export async function loadPyodide( loadPackages(Module, TRANSITIVE_REQUIREMENTS) ); - await enterJaegerSpan('prepare_wasm_linear_memory', () => + enterJaegerSpan('prepare_wasm_linear_memory', () => prepareWasmLinearMemory(Module) ); @@ -120,7 +98,7 @@ export async function loadPyodide( mountWorkerFiles(Module); // Finish setting up Pyodide's ffi so we can use the nice Python interface - await enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap); + enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap); const pyodide = Module.API.public_api; finishSnapshotSetup(pyodide); diff --git a/src/pyodide/types/emscripten.d.ts b/src/pyodide/types/emscripten.d.ts index e465da6c3771..5b2ccba9a6fa 100644 --- a/src/pyodide/types/emscripten.d.ts +++ b/src/pyodide/types/emscripten.d.ts @@ -68,4 +68,8 @@ interface Module { addRunDependency(x: string): void; removeRunDependency(x: string): void; noInitialRun: boolean; + setUnsafeEval(mod: typeof import('internal:unsafe-eval').default): void; + setGetRandomValues( + func: typeof import('pyodide-internal:topLevelEntropy/lib').getRandomValues + ): void; } diff --git a/src/pyodide/types/setup-emscripten.d.ts b/src/pyodide/types/setup-emscripten.d.ts new file mode 100644 index 000000000000..191e2ed3c5db --- /dev/null +++ b/src/pyodide/types/setup-emscripten.d.ts @@ -0,0 +1,5 @@ +declare namespace SetupEmscripten { + const getModule: () => Module; +} + +export default SetupEmscripten; diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel index 3c3528d0865c..f6ee880cdb46 100644 --- a/src/workerd/api/BUILD.bazel +++ b/src/workerd/api/BUILD.bazel @@ -14,6 +14,7 @@ filegroup( "html-rewriter.c++", "hyperdrive.c++", "pyodide/pyodide.c++", + "pyodide/setup-emscripten.c++", "memory-cache.c++", "r2*.c++", "rtti.c++", @@ -37,6 +38,7 @@ filegroup( "hyperdrive.h", "memory-cache.h", "pyodide/pyodide.h", + "pyodide/setup-emscripten.h", "modules.h", "r2*.h", "rtti.h", @@ -126,9 +128,11 @@ wd_cc_library( name = "pyodide", srcs = [ "pyodide/pyodide.c++", + "pyodide/setup-emscripten.c++", ], hdrs = [ "pyodide/pyodide.h", + "pyodide/setup-emscripten.h", "//src/pyodide:generated/pyodide_extra.capnp.h", ], implementation_deps = ["//src/workerd/util:string-buffer"], diff --git a/src/workerd/api/pyodide/pyodide.c++ b/src/workerd/api/pyodide/pyodide.c++ index e87ffcc09eb4..cbeeed5bf99d 100644 --- a/src/workerd/api/pyodide/pyodide.c++ +++ b/src/workerd/api/pyodide/pyodide.c++ @@ -3,6 +3,7 @@ // https://opensource.org/licenses/Apache-2.0 #include "pyodide.h" +#include #include #include @@ -483,6 +484,15 @@ void DiskCache::put(jsg::Lock& js, kj::String key, kj::Array data) { } } +jsg::JsValue SetupEmscripten::getModule(jsg::Lock& js) { + js.v8Context()->SetSecurityToken(emscriptenRuntime.contextToken.getHandle(js)); + return emscriptenRuntime.emscriptenRuntime.getHandle(js); +} + +void SetupEmscripten::visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(emscriptenRuntime.emscriptenRuntime); +} + bool hasPythonModules(capnp::List::Reader modules) { for (auto module: modules) { if (module.isPythonModule()) { diff --git a/src/workerd/api/pyodide/pyodide.h b/src/workerd/api/pyodide/pyodide.h index 57774143b08f..b4b419664c7e 100644 --- a/src/workerd/api/pyodide/pyodide.h +++ b/src/workerd/api/pyodide/pyodide.h @@ -5,6 +5,7 @@ #include "workerd/util/wait-list.h" +#include #include #include #include @@ -408,6 +409,22 @@ class SimplePythonLimiter: public jsg::Object { } }; +class SetupEmscripten: public jsg::Object { + public: + SetupEmscripten(EmscriptenRuntime emscriptenRuntime) + : emscriptenRuntime(kj::mv(emscriptenRuntime)) {}; + + jsg::JsValue getModule(jsg::Lock& js); + + JSG_RESOURCE_TYPE(SetupEmscripten) { + JSG_METHOD(getModule); + } + + private: + EmscriptenRuntime emscriptenRuntime; + void visitForGc(jsg::GcVisitor& visitor); +}; + using Worker = server::config::Worker; jsg::Ref makePyodideMetadataReader( @@ -419,6 +436,6 @@ bool hasPythonModules(capnp::List::Reader module api::pyodide::ReadOnlyBuffer, api::pyodide::PyodideMetadataReader, \ api::pyodide::ArtifactBundler, api::pyodide::DiskCache, \ api::pyodide::DisabledInternalJaeger, api::pyodide::SimplePythonLimiter, \ - api::pyodide::MemorySnapshotResult + api::pyodide::MemorySnapshotResult, api::pyodide::SetupEmscripten } // namespace workerd::api::pyodide diff --git a/src/workerd/api/pyodide/setup-emscripten.c++ b/src/workerd/api/pyodide/setup-emscripten.c++ new file mode 100644 index 000000000000..b0ae7aed0391 --- /dev/null +++ b/src/workerd/api/pyodide/setup-emscripten.c++ @@ -0,0 +1,94 @@ +#include "setup-emscripten.h" + +#include +#include + +namespace workerd::api::pyodide { + +v8::Local loadEmscriptenSetupModule( + jsg::Lock& js, capnp::Data::Reader emsciptenSetupJsReader) { + v8::Local contentStr = jsg::v8Str(js.v8Isolate, emsciptenSetupJsReader.asChars()); + v8::ScriptOrigin origin( + jsg::v8StrIntern(js.v8Isolate, "pyodide-internal:generated/emscriptenSetup"), 0, 0, false, -1, + {}, false, false, true); + v8::ScriptCompiler::Source source(contentStr, origin); + return jsg::check(v8::ScriptCompiler::CompileModule(js.v8Isolate, &source)); +} + +jsg::JsValue resolvePromise(jsg::Lock& js, jsg::JsValue prom) { + auto promise = KJ_ASSERT_NONNULL(prom.tryCast()); + if (promise.state() == jsg::PromiseState::PENDING) { + js.runMicrotasks(); + } + KJ_ASSERT(promise.state() == jsg::PromiseState::FULFILLED); + return promise.result(); +} + +void instantiateEmscriptenSetupModule(jsg::Lock& js, v8::Local& module) { + jsg::instantiateModule(js, module); + auto evalPromise = KJ_ASSERT_NONNULL( + jsg::JsValue(jsg::check(module->Evaluate(js.v8Context()))).tryCast()); + resolvePromise(js, evalPromise); + KJ_ASSERT(module->GetStatus() == v8::Module::kEvaluated); +} + +v8::Local getInstantiateEmscriptenModule( + jsg::Lock& js, v8::Local& module) { + auto instantiateEmscriptenModule = + js.v8Get(module->GetModuleNamespace().As(), "instantiateEmscriptenModule"_kj); + KJ_ASSERT(instantiateEmscriptenModule->IsFunction()); + return instantiateEmscriptenModule.As(); +} + +template +jsg::JsValue callFunction(jsg::Lock& js, v8::Local& func, Args... args) { + v8::LocalVector argv( + js.v8Isolate, std::initializer_list>{args...}); + return jsg::JsValue( + jsg::check(func->Call(js.v8Context(), js.v8Null(), argv.size(), argv.data()))); +} + +jsg::JsValue callInstantiateEmscriptenModule(jsg::Lock& js, + v8::Local& func, + bool isWorkerd, + capnp::Data::Reader pythonStdlibZipReader, + capnp::Data::Reader pyodideAsmWasmReader) { + AllowV8BackgroundThreadsScope scope; + js.setAllowEval(true); + KJ_DEFER(js.setAllowEval(false)); + + auto pythonStdlibZip = v8::ArrayBuffer::New(js.v8Isolate, pythonStdlibZipReader.size(), + v8::BackingStoreInitializationMode::kUninitialized); + memcpy(pythonStdlibZip->Data(), pythonStdlibZipReader.begin(), pythonStdlibZipReader.size()); + auto pyodideAsmWasm = jsg::check(v8::WasmModuleObject::Compile(js.v8Isolate, + v8::MemorySpan(pyodideAsmWasmReader.begin(), pyodideAsmWasmReader.size()))); + return resolvePromise(js, + callFunction( + js, func, js.boolean(isWorkerd), kj::mv(pythonStdlibZip), kj::mv(pyodideAsmWasm))); +} + +EmscriptenRuntime EmscriptenRuntime::initialize( + jsg::Lock& js, bool isWorkerd, jsg::Bundle::Reader bundle) { + kj::Maybe emsciptenSetupJsReader; + kj::Maybe pythonStdlibZipReader; + kj::Maybe pyodideAsmWasmReader; + for (auto module: bundle.getModules()) { + if (module.getName().endsWith("emscriptenSetup.js")) { + emsciptenSetupJsReader = module.getData(); + } else if (module.getName().endsWith("python_stdlib.zip")) { + pythonStdlibZipReader = module.getData(); + } else if (module.getName().endsWith("pyodide.asm.wasm")) { + pyodideAsmWasmReader = module.getData(); + } + } + auto context = js.v8Context(); + Worker::setupContext(js, context, Worker::ConsoleMode::INSPECTOR_ONLY); + auto module = loadEmscriptenSetupModule(js, KJ_ASSERT_NONNULL(emsciptenSetupJsReader)); + instantiateEmscriptenSetupModule(js, module); + auto instantiateEmscriptenModule = getInstantiateEmscriptenModule(js, module); + auto emscriptenModule = callInstantiateEmscriptenModule(js, instantiateEmscriptenModule, + isWorkerd, KJ_ASSERT_NONNULL(pythonStdlibZipReader), KJ_ASSERT_NONNULL(pyodideAsmWasmReader)); + auto contextToken = jsg::JsValue(context->GetSecurityToken()); + return EmscriptenRuntime{contextToken.addRef(js), emscriptenModule.addRef(js)}; +} +} // namespace workerd::api::pyodide diff --git a/src/workerd/api/pyodide/setup-emscripten.h b/src/workerd/api/pyodide/setup-emscripten.h new file mode 100644 index 000000000000..3e788feba30f --- /dev/null +++ b/src/workerd/api/pyodide/setup-emscripten.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +namespace workerd::api::pyodide { +struct EmscriptenRuntime { + jsg::JsRef contextToken; + jsg::JsRef emscriptenRuntime; + static EmscriptenRuntime initialize(jsg::Lock& js, bool isWorkerd, jsg::Bundle::Reader bundle); +}; +} // namespace workerd::api::pyodide diff --git a/src/workerd/io/compatibility-date.capnp b/src/workerd/io/compatibility-date.capnp index d4e4e6f0608d..4b41f4622eb5 100644 --- a/src/workerd/io/compatibility-date.capnp +++ b/src/workerd/io/compatibility-date.capnp @@ -430,7 +430,7 @@ struct CompatibilityFlags @0x8f8c1b68151b6cef { pythonWorkers @43 :Bool $compatEnableFlag("python_workers") $pythonSnapshotRelease(pyodide = "0.26.0a2", pyodideRevision = "2024-03-01", - packages = "2024-03-01", backport = 3, + packages = "2024-03-01", backport = 9, baselineSnapshotHash = "d13ce2f4a0ade2e09047b469874dacf4d071ed3558fec4c26f8d0b99d95f77b5") $impliedByAfterDate(name = "pythonWorkersDevPyodide", date = "2000-01-01"); # Enables Python Workers. Access to this flag is not restricted, instead bundles containing diff --git a/src/workerd/server/workerd-api.c++ b/src/workerd/server/workerd-api.c++ index 60d4cb048e81..b11b81f537a5 100644 --- a/src/workerd/server/workerd-api.c++ +++ b/src/workerd/server/workerd-api.c++ @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -536,11 +537,24 @@ void WorkerdApi::compileModules(jsg::Lock& lockParam, if (hasPythonModules(confModules)) { KJ_REQUIRE(featureFlags.getPythonWorkers(), "The python_workers compatibility flag is required to use Python."); - // Inject Pyodide bundle auto pythonRelease = KJ_ASSERT_NONNULL(getPythonSnapshotRelease(featureFlags)); auto version = getPythonBundleName(pythonRelease); auto bundle = KJ_ASSERT_NONNULL( fetchPyodideBundle(impl->pythonConfig, version), "Failed to get Pyodide bundle"); + // Inject SetupEmscripten module + { + auto& lock = kj::downcast(lockParam); + auto context = lock.newContext({}, lock.v8Isolate); + v8::Context::Scope scope(context.getHandle(lock)); + // Init emscripten synchronously, the python script will import setup-emscripten and + // call setEmscriptenModele + auto emscriptenRuntime = api::pyodide::EmscriptenRuntime::initialize(lock, true, bundle); + modules->addBuiltinModule("internal:setup-emscripten", + jsg::alloc(kj::mv(emscriptenRuntime)), + workerd::jsg::ModuleRegistry::Type::INTERNAL); + } + + // Inject Pyodide bundle modules->addBuiltinBundle(bundle, kj::none); // Inject pyodide bootstrap module (TODO: load this from the capnproto bundle?) {