diff --git a/src/pyodide/BUILD.bazel b/src/pyodide/BUILD.bazel index 0eabada074b..ea1c76548d9 100644 --- a/src/pyodide/BUILD.bazel +++ b/src/pyodide/BUILD.bazel @@ -211,27 +211,27 @@ INTERNAL_DATA_MODULES = glob([ "internal/*.py", "internal/patches/*.py", "internal/topLevelEntropy/*.py", -]) +]) + [ + "generated/python_stdlib.zip", + "generated/pyodide.asm.wasm", + "generated/emscriptenSetup.js", +] wd_ts_bundle( name = "pyodide", eslintrc_json = "eslint.config.mjs", import_name = "pyodide", - internal_data_modules = ["generated/python_stdlib.zip"] + INTERNAL_DATA_MODULES, + internal_data_modules = INTERNAL_DATA_MODULES, internal_json_modules = [ "generated/pyodide-lock.json", "generated/pyodide-bucket.json", ], - internal_modules = [ - "generated/emscriptenSetup.js", - ] + INTERNAL_MODULES, - internal_wasm_modules = ["generated/pyodide.asm.wasm"], + internal_modules = INTERNAL_MODULES, js_deps = [ "generated/emscriptenSetup", - "pyodide.asm.js@rule", "pyodide.asm.wasm@rule", - "pyodide-lock.js@rule", "python_stdlib.zip@rule", + "pyodide-lock.js@rule", "pyodide-bucket.json@rule", ], lint = False, @@ -264,7 +264,7 @@ genrule( for m in INTERNAL_DATA_MODULES if m.endswith(".py") ] + [ - ":pyodide-internal_generated_emscriptenSetup", + ":pyodide-internal_generated_emscriptenSetup.js", ":pyodide-internal_generated_pyodide.asm.wasm", ":pyodide-internal_generated_python_stdlib.zip", ":pyodide-internal_generated_pyodide-lock.json", diff --git a/src/pyodide/internal/pool/emscriptenSetup.ts b/src/pyodide/internal/pool/emscriptenSetup.ts index b9b37c0ee99..d72946898c6 100644 --- a/src/pyodide/internal/pool/emscriptenSetup.ts +++ b/src/pyodide/internal/pool/emscriptenSetup.ts @@ -13,7 +13,7 @@ import { reportError } from 'pyodide-internal:util'; */ import { _createPyodideModule } from 'pyodide-internal:generated/pyodide.asm'; -export { +import { setUnsafeEval, setGetRandomValues, } from 'pyodide-internal:pool/builtin_wrappers'; @@ -56,7 +56,7 @@ function getWaitForDynlibs(resolveReadyPromise: PreRunHook): PreRunHook { * This is a simplified version of the `prepareFileSystem` function here: * https://github.com/pyodide/pyodide/blob/main/src/js/module.ts */ -function getPrepareFileSystem(pythonStdlib: Uint8Array): PreRunHook { +function getPrepareFileSystem(pythonStdlib: ArrayBuffer): PreRunHook { return function prepareFileSystem(Module: Module): void { try { const pymajor = Module._py_version_major(); @@ -118,7 +118,7 @@ function getInstantiateWasm( */ function getEmscriptenSettings( isWorkerd: boolean, - pythonStdlib: Uint8Array, + pythonStdlib: ArrayBuffer, pyodideWasmModule: WebAssembly.Module ): EmscriptenSettings { const config: PyodideConfig = { @@ -193,7 +193,7 @@ function* featureDetectionMonkeyPatchesContextManager() { */ export async function instantiateEmscriptenModule( isWorkerd: boolean, - pythonStdlib: Uint8Array, + pythonStdlib: ArrayBuffer, wasmModule: WebAssembly.Module ): Promise { const emscriptenSettings = getEmscriptenSettings( @@ -210,6 +210,8 @@ export async function instantiateEmscriptenModule( // Wait until we've executed all the preRun hooks before proceeding const emscriptenModule = await emscriptenSettings.readyPromise; + emscriptenModule.setUnsafeEval = setUnsafeEval; + emscriptenModule.setGetRandomValues = setGetRandomValues; return emscriptenModule; } catch (e) { console.warn('Error in instantiateEmscriptenModule'); diff --git a/src/pyodide/internal/python.ts b/src/pyodide/internal/python.ts index b036b5f2886..04c0a1c85ae 100644 --- a/src/pyodide/internal/python.ts +++ b/src/pyodide/internal/python.ts @@ -18,38 +18,15 @@ import { entropyBeforeTopLevel, getRandomValues, } from 'pyodide-internal:topLevelEntropy/lib'; -import { default as UnsafeEval } from 'internal:unsafe-eval'; -import { simpleRunPython } from 'pyodide-internal:util'; - -/** - * This file is a simplified version of the Pyodide loader: - * https://github.com/pyodide/pyodide/blob/main/src/js/pyodide.ts - * - * In particular, it drops the package lock, which disables - * `pyodide.loadPackage`. In trade we add memory snapshots here. - */ - /** - * _createPyodideModule and pyodideWasmModule together are produced by the - * Emscripten linker + * SetupEmscripten is an internal module defined in setup-emscripten.h the module instantiates + * emscripten seperately from this code in another context. + * The underlying code for it can be found in pool/emscriptenSetup.ts. */ -import pyodideWasmModule from 'pyodide-internal:generated/pyodide.asm.wasm'; +import { default as SetupEmscripten } from 'internal:setup-emscripten'; -/** - * The Python and Pyodide stdlib zipped together. The zip format is convenient - * because Python has a "ziploader" that allows one to import directly from a - * zip file. - * - * The ziploader solves bootstrapping problems around unpacking: Python comes - * with a bunch of C libs to unpack various archive formats, but they need stuff - * in this zip file to initialize their runtime state. - */ -import pythonStdlib from 'pyodide-internal:generated/python_stdlib.zip'; -import { - instantiateEmscriptenModule, - setUnsafeEval, - setGetRandomValues, -} from 'pyodide-internal:generated/emscriptenSetup'; +import { default as UnsafeEval } from 'internal:unsafe-eval'; +import { simpleRunPython } from 'pyodide-internal:util'; /** * After running `instantiateEmscriptenModule` but before calling into any C @@ -57,7 +34,7 @@ import { * `noInitialRun: true` and so the C runtime is in an incoherent state until we * restore the linear memory from the snapshot. */ -async function prepareWasmLinearMemory(Module: Module): Promise { +function prepareWasmLinearMemory(Module: Module): void { // Note: if we are restoring from a snapshot, runtime is not initialized yet. mountSitePackages(Module, SITE_PACKAGES.rootInfo); entropyMountFiles(Module); @@ -86,21 +63,22 @@ async function prepareWasmLinearMemory(Module: Module): Promise { adjustSysPath(Module); } -export async function loadPyodide( +export function loadPyodide( isWorkerd: boolean, lockfile: PackageLock, indexURL: string -): Promise { - const Module = await enterJaegerSpan('instantiate_emscripten', () => - instantiateEmscriptenModule(isWorkerd, pythonStdlib, pyodideWasmModule) +): Pyodide { + const Module = enterJaegerSpan('instantiate_emscripten', () => + SetupEmscripten.getModule() ); + Module.API.config.jsglobals = globalThis; if (isWorkerd) { Module.API.config.indexURL = indexURL; Module.API.config.resolveLockFilePromise!(lockfile); } - setUnsafeEval(UnsafeEval); - setGetRandomValues(getRandomValues); - await enterJaegerSpan('prepare_wasm_linear_memory', () => + Module.setUnsafeEval(UnsafeEval); + Module.setGetRandomValues(getRandomValues); + enterJaegerSpan('prepare_wasm_linear_memory', () => prepareWasmLinearMemory(Module) ); maybeSetupSnapshotUpload(Module); @@ -109,7 +87,7 @@ export async function loadPyodide( mountWorkerFiles(Module); // Finish setting up Pyodide's ffi so we can use the nice Python interface - await enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap); + enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap); const pyodide = Module.API.public_api; finishSnapshotSetup(pyodide); return pyodide; diff --git a/src/pyodide/python-entrypoint-helper.ts b/src/pyodide/python-entrypoint-helper.ts index 188613106d4..94d4e881db9 100644 --- a/src/pyodide/python-entrypoint-helper.ts +++ b/src/pyodide/python-entrypoint-helper.ts @@ -31,14 +31,14 @@ function pyimportMainModule(pyodide: Pyodide): PyModule { return pyodide.pyimport(mainModuleName); } -let pyodidePromise: Promise | undefined; -function getPyodide(): Promise { +let pyodideInstance: Pyodide | undefined; +function getPyodide(): Pyodide { return enterJaegerSpan('get_pyodide', () => { - if (pyodidePromise) { - return pyodidePromise; + if (pyodideInstance) { + return pyodideInstance; } - pyodidePromise = loadPyodide(IS_WORKERD, LOCKFILE, WORKERD_INDEX_URL); - return pyodidePromise; + pyodideInstance = loadPyodide(IS_WORKERD, LOCKFILE, WORKERD_INDEX_URL); + return pyodideInstance!; }); } @@ -118,7 +118,7 @@ function getMainModule(): Promise { return mainModulePromise; } mainModulePromise = (async function () { - const pyodide = await getPyodide(); + const pyodide = getPyodide(); await setupPackages(pyodide); Limiter.beginStartup(); try { @@ -134,7 +134,7 @@ function getMainModule(): Promise { } async function preparePython(): Promise { - const pyodide = await getPyodide(); + const pyodide = getPyodide(); const mainModule = await getMainModule(); entropyBeforeRequest(pyodide._module); return mainModule; @@ -177,7 +177,7 @@ try { if (IS_WORKERD) { // If we're in workerd, we have to do setupPackages in the IoContext, so don't start it yet. // TODO: fix this. - await getPyodide(); + getPyodide(); } else { // If we're not in workerd, setupPackages doesn't require IO so we can do it all here. await getMainModule(); diff --git a/src/pyodide/types/emscripten.d.ts b/src/pyodide/types/emscripten.d.ts index e465da6c377..5b2ccba9a6f 100644 --- a/src/pyodide/types/emscripten.d.ts +++ b/src/pyodide/types/emscripten.d.ts @@ -68,4 +68,8 @@ interface Module { addRunDependency(x: string): void; removeRunDependency(x: string): void; noInitialRun: boolean; + setUnsafeEval(mod: typeof import('internal:unsafe-eval').default): void; + setGetRandomValues( + func: typeof import('pyodide-internal:topLevelEntropy/lib').getRandomValues + ): void; } diff --git a/src/pyodide/types/setup-emscripten.d.ts b/src/pyodide/types/setup-emscripten.d.ts new file mode 100644 index 00000000000..191e2ed3c5d --- /dev/null +++ b/src/pyodide/types/setup-emscripten.d.ts @@ -0,0 +1,5 @@ +declare namespace SetupEmscripten { + const getModule: () => Module; +} + +export default SetupEmscripten; diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel index 9cba4624afc..62584250280 100644 --- a/src/workerd/api/BUILD.bazel +++ b/src/workerd/api/BUILD.bazel @@ -14,6 +14,7 @@ filegroup( "html-rewriter.c++", "hyperdrive.c++", "pyodide/pyodide.c++", + "pyodide/setup-emscripten.c++", "memory-cache.c++", "r2*.c++", "rtti.c++", @@ -37,6 +38,7 @@ filegroup( "hyperdrive.h", "memory-cache.h", "pyodide/pyodide.h", + "pyodide/setup-emscripten.h", "modules.h", "r2*.h", "rtti.h", @@ -126,9 +128,11 @@ wd_cc_library( name = "pyodide", srcs = [ "pyodide/pyodide.c++", + "pyodide/setup-emscripten.c++", ], hdrs = [ "pyodide/pyodide.h", + "pyodide/setup-emscripten.h", "//src/pyodide:generated/pyodide_extra.capnp.h", ], implementation_deps = ["//src/workerd/util:string-buffer"], diff --git a/src/workerd/api/pyodide/pyodide.c++ b/src/workerd/api/pyodide/pyodide.c++ index 9bde1f3e0bc..8125785af20 100644 --- a/src/workerd/api/pyodide/pyodide.c++ +++ b/src/workerd/api/pyodide/pyodide.c++ @@ -3,6 +3,7 @@ // https://opensource.org/licenses/Apache-2.0 #include "pyodide.h" +#include #include #include @@ -484,6 +485,15 @@ void DiskCache::put(jsg::Lock& js, kj::String key, kj::Array data) { } } +jsg::JsValue SetupEmscripten::getModule(jsg::Lock& js) { + js.v8Context()->SetSecurityToken(emscriptenRuntime.contextToken.getHandle(js)); + return emscriptenRuntime.emscriptenRuntime.getHandle(js); +} + +void SetupEmscripten::visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(emscriptenRuntime.emscriptenRuntime); +} + bool hasPythonModules(capnp::List::Reader modules) { for (auto module: modules) { if (module.isPythonModule()) { diff --git a/src/workerd/api/pyodide/pyodide.h b/src/workerd/api/pyodide/pyodide.h index c964f490a33..ec102c76cdd 100644 --- a/src/workerd/api/pyodide/pyodide.h +++ b/src/workerd/api/pyodide/pyodide.h @@ -5,6 +5,7 @@ #include "workerd/util/wait-list.h" +#include #include #include #include @@ -408,6 +409,22 @@ class SimplePythonLimiter: public jsg::Object { } }; +class SetupEmscripten: public jsg::Object { +public: + SetupEmscripten(EmscriptenRuntime emscriptenRuntime) + : emscriptenRuntime(kj::mv(emscriptenRuntime)) {}; + + jsg::JsValue getModule(jsg::Lock& js); + + JSG_RESOURCE_TYPE(SetupEmscripten) { + JSG_METHOD(getModule); + } + +private: + EmscriptenRuntime emscriptenRuntime; + void visitForGc(jsg::GcVisitor& visitor); +}; + using Worker = server::config::Worker; jsg::Ref makePyodideMetadataReader( @@ -419,6 +436,6 @@ bool hasPythonModules(capnp::List::Reader module api::pyodide::ReadOnlyBuffer, api::pyodide::PyodideMetadataReader, \ api::pyodide::ArtifactBundler, api::pyodide::DiskCache, \ api::pyodide::DisabledInternalJaeger, api::pyodide::SimplePythonLimiter, \ - api::pyodide::MemorySnapshotResult + api::pyodide::MemorySnapshotResult, api::pyodide::SetupEmscripten } // namespace workerd::api::pyodide diff --git a/src/workerd/api/pyodide/setup-emscripten.c++ b/src/workerd/api/pyodide/setup-emscripten.c++ new file mode 100644 index 00000000000..b0ae7aed039 --- /dev/null +++ b/src/workerd/api/pyodide/setup-emscripten.c++ @@ -0,0 +1,94 @@ +#include "setup-emscripten.h" + +#include +#include + +namespace workerd::api::pyodide { + +v8::Local loadEmscriptenSetupModule( + jsg::Lock& js, capnp::Data::Reader emsciptenSetupJsReader) { + v8::Local contentStr = jsg::v8Str(js.v8Isolate, emsciptenSetupJsReader.asChars()); + v8::ScriptOrigin origin( + jsg::v8StrIntern(js.v8Isolate, "pyodide-internal:generated/emscriptenSetup"), 0, 0, false, -1, + {}, false, false, true); + v8::ScriptCompiler::Source source(contentStr, origin); + return jsg::check(v8::ScriptCompiler::CompileModule(js.v8Isolate, &source)); +} + +jsg::JsValue resolvePromise(jsg::Lock& js, jsg::JsValue prom) { + auto promise = KJ_ASSERT_NONNULL(prom.tryCast()); + if (promise.state() == jsg::PromiseState::PENDING) { + js.runMicrotasks(); + } + KJ_ASSERT(promise.state() == jsg::PromiseState::FULFILLED); + return promise.result(); +} + +void instantiateEmscriptenSetupModule(jsg::Lock& js, v8::Local& module) { + jsg::instantiateModule(js, module); + auto evalPromise = KJ_ASSERT_NONNULL( + jsg::JsValue(jsg::check(module->Evaluate(js.v8Context()))).tryCast()); + resolvePromise(js, evalPromise); + KJ_ASSERT(module->GetStatus() == v8::Module::kEvaluated); +} + +v8::Local getInstantiateEmscriptenModule( + jsg::Lock& js, v8::Local& module) { + auto instantiateEmscriptenModule = + js.v8Get(module->GetModuleNamespace().As(), "instantiateEmscriptenModule"_kj); + KJ_ASSERT(instantiateEmscriptenModule->IsFunction()); + return instantiateEmscriptenModule.As(); +} + +template +jsg::JsValue callFunction(jsg::Lock& js, v8::Local& func, Args... args) { + v8::LocalVector argv( + js.v8Isolate, std::initializer_list>{args...}); + return jsg::JsValue( + jsg::check(func->Call(js.v8Context(), js.v8Null(), argv.size(), argv.data()))); +} + +jsg::JsValue callInstantiateEmscriptenModule(jsg::Lock& js, + v8::Local& func, + bool isWorkerd, + capnp::Data::Reader pythonStdlibZipReader, + capnp::Data::Reader pyodideAsmWasmReader) { + AllowV8BackgroundThreadsScope scope; + js.setAllowEval(true); + KJ_DEFER(js.setAllowEval(false)); + + auto pythonStdlibZip = v8::ArrayBuffer::New(js.v8Isolate, pythonStdlibZipReader.size(), + v8::BackingStoreInitializationMode::kUninitialized); + memcpy(pythonStdlibZip->Data(), pythonStdlibZipReader.begin(), pythonStdlibZipReader.size()); + auto pyodideAsmWasm = jsg::check(v8::WasmModuleObject::Compile(js.v8Isolate, + v8::MemorySpan(pyodideAsmWasmReader.begin(), pyodideAsmWasmReader.size()))); + return resolvePromise(js, + callFunction( + js, func, js.boolean(isWorkerd), kj::mv(pythonStdlibZip), kj::mv(pyodideAsmWasm))); +} + +EmscriptenRuntime EmscriptenRuntime::initialize( + jsg::Lock& js, bool isWorkerd, jsg::Bundle::Reader bundle) { + kj::Maybe emsciptenSetupJsReader; + kj::Maybe pythonStdlibZipReader; + kj::Maybe pyodideAsmWasmReader; + for (auto module: bundle.getModules()) { + if (module.getName().endsWith("emscriptenSetup.js")) { + emsciptenSetupJsReader = module.getData(); + } else if (module.getName().endsWith("python_stdlib.zip")) { + pythonStdlibZipReader = module.getData(); + } else if (module.getName().endsWith("pyodide.asm.wasm")) { + pyodideAsmWasmReader = module.getData(); + } + } + auto context = js.v8Context(); + Worker::setupContext(js, context, Worker::ConsoleMode::INSPECTOR_ONLY); + auto module = loadEmscriptenSetupModule(js, KJ_ASSERT_NONNULL(emsciptenSetupJsReader)); + instantiateEmscriptenSetupModule(js, module); + auto instantiateEmscriptenModule = getInstantiateEmscriptenModule(js, module); + auto emscriptenModule = callInstantiateEmscriptenModule(js, instantiateEmscriptenModule, + isWorkerd, KJ_ASSERT_NONNULL(pythonStdlibZipReader), KJ_ASSERT_NONNULL(pyodideAsmWasmReader)); + auto contextToken = jsg::JsValue(context->GetSecurityToken()); + return EmscriptenRuntime{contextToken.addRef(js), emscriptenModule.addRef(js)}; +} +} // namespace workerd::api::pyodide diff --git a/src/workerd/api/pyodide/setup-emscripten.h b/src/workerd/api/pyodide/setup-emscripten.h new file mode 100644 index 00000000000..3e788feba30 --- /dev/null +++ b/src/workerd/api/pyodide/setup-emscripten.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +namespace workerd::api::pyodide { +struct EmscriptenRuntime { + jsg::JsRef contextToken; + jsg::JsRef emscriptenRuntime; + static EmscriptenRuntime initialize(jsg::Lock& js, bool isWorkerd, jsg::Bundle::Reader bundle); +}; +} // namespace workerd::api::pyodide diff --git a/src/workerd/server/workerd-api.c++ b/src/workerd/server/workerd-api.c++ index 327ec5eec7e..2bbbf8d7b9c 100644 --- a/src/workerd/server/workerd-api.c++ +++ b/src/workerd/server/workerd-api.c++ @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -536,11 +537,24 @@ void WorkerdApi::compileModules(jsg::Lock& lockParam, if (hasPythonModules(confModules)) { KJ_REQUIRE(featureFlags.getPythonWorkers(), "The python_workers compatibility flag is required to use Python."); - // Inject Pyodide bundle auto pythonRelease = KJ_ASSERT_NONNULL(getPythonSnapshotRelease(featureFlags)); auto version = getPythonBundleName(pythonRelease); auto bundle = KJ_ASSERT_NONNULL( fetchPyodideBundle(impl->pythonConfig, version), "Failed to get Pyodide bundle"); + // Inject SetupEmscripten module + { + auto& lock = kj::downcast(lockParam); + auto context = lock.newContext({}, lock.v8Isolate); + v8::Context::Scope scope(context.getHandle(lock)); + // Init emscripten synchronously, the python script will import setup-emscripten and + // call setEmscriptenModele + auto emscriptenRuntime = api::pyodide::EmscriptenRuntime::initialize(lock, true, bundle); + modules->addBuiltinModule("internal:setup-emscripten", + jsg::alloc(kj::mv(emscriptenRuntime)), + workerd::jsg::ModuleRegistry::Type::INTERNAL); + } + + // Inject Pyodide bundle modules->addBuiltinBundle(bundle, kj::none); // Inject pyodide bootstrap module (TODO: load this from the capnproto bundle?) {