Skip to content

Commit

Permalink
hmm
Browse files Browse the repository at this point in the history
  • Loading branch information
Phil Cummins committed Sep 27, 2024
1 parent c189e6a commit 0e6cf53
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 146 deletions.
8 changes: 5 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,16 @@ switch.
### Prerequisites

- Tools needed to build [CPython](https://github.com/python/cpython) (Make, Clang, etc.)
- [Rust](https://rustup.rs/) stable 1.81.0 or later, including the `wasm32-wasip1` and `wasm32-unknown-unknown` targets
- Note that we currently use the `-Z build-std` Cargo option to build the `componentize-py` runtime with position-independent code (which is not the default for `wasm32-wasip1`) and this requires using a recent nightly build of Rust.
- [Rust](https://rustup.rs/) stable 1.71 or later *and* nightly 2023-07-27 or later, including the `wasm32-wasi` and `wasm32-unknown-unknown` targets
- Note that we currently use the `-Z build-std` Cargo option to build the `componentize-py` runtime with position-independent code (which is not the default for `wasm32-wasi`) and this requires using a recent nightly build of Rust.

For Rust, something like this should work once you have `rustup`:

```shell
rustup update
rustup target add wasm32-wasip1 wasm32-unknown-unknown
rustup install nightly
rustup component add rust-src --toolchain nightly
rustup target add wasm32-wasi wasm32-unknown-unknown
```

### Building and Running
Expand Down
143 changes: 4 additions & 139 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use {
prelink::{embedded_helper_utils, embedded_python_standard_library},
serde::Deserialize,
std::{
collections::{HashMap, HashSet},
collections::HashMap,
env, fs,
io::Cursor,
iter,
Expand Down Expand Up @@ -49,7 +49,6 @@ mod summary;
mod test;
mod util;

static NATIVE_EXTENSION_SUFFIX: &str = ".cpython-312-wasm32-wasi.so";

wasmtime::component::bindgen!({
path: "wit",
Expand Down Expand Up @@ -213,73 +212,14 @@ pub async fn componentize(
.filter_map(|&s| Path::new(s).exists().then_some(s))
.collect::<Vec<_>>();

let embedded_python_standard_lib = embedded_python_standard_library().unwrap();
let embedded_helper_utils = embedded_helper_utils().unwrap();

// Search `python_path` for native extension libraries and/or componentize-py.toml files. Packages containing
// the latter may contain their own WIT files defining their own worlds (in addition to what the caller
// specified as paramters), which we'll try to match up with `module_worlds` in the next step.
let mut raw_configs: Vec<crate::ConfigContext<crate::RawComponentizePyConfig>> = Vec::new();
let mut library_path: Vec<(&str, Vec<std::path::PathBuf>)> =
Vec::with_capacity(python_path.len());
for path in python_path {
let mut libraries = Vec::new();
search_directory(
Path::new(path),
Path::new(path),
&mut libraries,
&mut raw_configs,
&mut HashSet::new(),
)?;
library_path.push((*path, libraries));
}

let mut libraries = prelink::bundle_libraries(library_path)?;

// Validate the paths parsed from any componentize-py.toml files discovered above and match them up with
// `module_worlds` entries. Note that we use an `IndexMap` to preserve the order specified in `module_worlds`,
// which is required to be topologically sorted with respect to package dependencies.
//
// For any packages which contain componentize-py.toml files but no corresponding `module_worlds` entry, we use
// the `world` parameter as a default.
let configs = {
let mut configs = raw_configs
.into_iter()
.map(|raw_config| {
let config =
ComponentizePyConfig::try_from((raw_config.path.deref(), raw_config.config))?;

Ok((
raw_config.module.clone(),
ConfigContext {
module: raw_config.module,
root: raw_config.root,
path: raw_config.path,
config,
},
))
})
.collect::<Result<HashMap<_, _>>>()?;

let mut ordered = IndexMap::new();
for (module, world) in module_worlds {
if let Some(config) = configs.remove(*module) {
ordered.insert((*module).to_owned(), (config, Some(*world)));
} else {
bail!("no `componentize-py.toml` file found for module `{module}`");
}
}
let embedded_python_standard_lib = embedded_python_standard_library()?;
let embedded_helper_utils = embedded_helper_utils()?;

for (module, config) in configs {
ordered.insert(module, (config, world));
}
let (configs, mut libraries) = prelink::search_for_libraries_and_configs(python_path, module_worlds)?;

ordered
};

// Next, iterate over all the WIT directories, merging them into a single `Resolve`, and matching Python
// packages to `WorldId`s.

let (mut resolve, mut main_world) = if let Some(path) = wit_path {
let (resolve, world) = parse_wit(path, world)?;
(Some(resolve), Some(world))
Expand Down Expand Up @@ -735,83 +675,8 @@ fn add_wasi_and_stubs(
Ok(())
}

fn search_directory(
root: &Path,
path: &Path,
libraries: &mut Vec<PathBuf>,
configs: &mut Vec<ConfigContext<RawComponentizePyConfig>>,
modules_seen: &mut HashSet<String>,
) -> Result<()> {
if path.is_dir() {
for entry in fs::read_dir(path)? {
search_directory(root, &entry?.path(), libraries, configs, modules_seen)?;
}
} else if let Some(name) = path.file_name().and_then(|name| name.to_str()) {
if name.ends_with(NATIVE_EXTENSION_SUFFIX) {
libraries.push(path.to_owned());
} else if name == "componentize-py.toml" {
let root = root.canonicalize()?;
let path = path.canonicalize()?;

let module = module_name(&root, &path)
.ok_or_else(|| anyhow!("unable to determine module name for {}", path.display()))?;

let mut push = true;
for existing in &mut *configs {
if path == existing.path.join("componentize-py.toml") {
// When one directory in `PYTHON_PATH` is a subdirectory of the other, we consider the
// subdirectory to be the true owner of the file. This is important later, when we derive a
// package name by stripping the root directory from the file path.
if root > existing.root {
module.clone_into(&mut existing.module);
root.clone_into(&mut existing.root);
path.parent().unwrap().clone_into(&mut existing.path);
}
push = false;
break;
} else {
// If we find a componentize-py.toml file under a Python module which will not be used because
// we already found a version of that module in an earlier `PYTHON_PATH` directory, we'll
// ignore the latest one.
//
// For example, if the module `foo_sdk` appears twice in `PYTHON_PATH`, and both versions have
// a componentize-py.toml file, we'll ignore the second one just as Python will ignore the
// second module.

if modules_seen.contains(&module) {
bail!("multiple `componentize-py.toml` files found in module `{module}`");
}

modules_seen.insert(module.clone());

if module == existing.module {
push = false;
break;
}
}
}

if push {
configs.push(ConfigContext {
module,
root: root.to_owned(),
path: path.parent().unwrap().to_owned(),
config: toml::from_str::<RawComponentizePyConfig>(&fs::read_to_string(path)?)?,
});
}
}
}

Ok(())
}

fn module_name(root: &Path, path: &Path) -> Option<String> {
if let [first, _, ..] = &path.strip_prefix(root).ok()?.iter().collect::<Vec<_>>()[..] {
first.to_str().map(|s| s.to_owned())
} else {
None
}
}

fn add_wasi_imports<'a>(
module: &'a [u8],
Expand Down
151 changes: 147 additions & 4 deletions src/prelink.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
#![deny(warnings)]

use std::{
fs::{self},
io::{self, Cursor},
collections::{HashMap, HashSet}, fs::{self}, io::{self, Cursor}, ops::Deref, path::{Path, PathBuf}
};

use anyhow::Context;
use anyhow::{anyhow, bail, Context, Result};
use indexmap::IndexMap;
use tar::Archive;
use tempfile::TempDir;
use zstd::Decoder;

use crate::Library;
use crate::{ComponentizePyConfig, ConfigContext, Library, RawComponentizePyConfig};

static NATIVE_EXTENSION_SUFFIX: &str = ".cpython-312-wasm32-wasi.so";

pub fn embedded_python_standard_library() -> Result<TempDir, io::Error> {
// Untar the embedded copy of the Python standard library into a temporary directory
Expand Down Expand Up @@ -87,3 +89,144 @@ fn library_from_so(library_name: &str) -> Result<Library, io::Error> {
dl_openable: false,
})
}

pub fn search_for_libraries_and_configs<'a>(python_path: &'a Vec<&'a str>, module_worlds: &'a [(&'a str, &'a str)])
-> Result<(IndexMap<String, (ConfigContext<ComponentizePyConfig>, Option<&'a str>)>, Vec<Library>), anyhow::Error> {
let mut raw_configs: Vec<crate::ConfigContext<crate::RawComponentizePyConfig>> = Vec::new();
let mut library_path: Vec<(&str, Vec<std::path::PathBuf>)> =
Vec::with_capacity(python_path.len());
for path in python_path {
let mut libraries = Vec::new();
search_directory(
Path::new(path),
Path::new(path),
&mut libraries,
&mut raw_configs,
&mut HashSet::new(),
)?;
library_path.push((*path, libraries));
}

let mut libraries = bundle_libraries(library_path)?;

// Validate the paths parsed from any componentize-py.toml files discovered above and match them up with
// `module_worlds` entries. Note that we use an `IndexMap` to preserve the order specified in `module_worlds`,
// which is required to be topologically sorted with respect to package dependencies.
//
// For any packages which contain componentize-py.toml files but no corresponding `module_worlds` entry, we use
// the `world` parameter as a default.
let configs: IndexMap<String, (ConfigContext<ComponentizePyConfig>, Option<&str>)> = {
let mut configs = raw_configs
.into_iter()
.map(|raw_config| {
let config =
ComponentizePyConfig::try_from((raw_config.path.deref(), raw_config.config))?;

Ok((
raw_config.module.clone(),
ConfigContext {
module: raw_config.module,
root: raw_config.root,
path: raw_config.path,
config,
},
))
})
.collect::<Result<HashMap<_, _>>>()?;

let mut ordered = IndexMap::new();
for (module, world) in module_worlds {
if let Some(config) = configs.remove(*module) {
ordered.insert((*module).to_owned(), (config, Some(*world)));
} else {
bail!("no `componentize-py.toml` file found for module `{module}`");
}
}

for (module, config) in configs {
ordered.insert(module, (config, world));
}

ordered
};

Ok((configs, libraries))
}

fn search_directory(
root: &Path,
path: &Path,
libraries: &mut Vec<PathBuf>,
configs: &mut Vec<ConfigContext<RawComponentizePyConfig>>,
modules_seen: &mut HashSet<String>,
) -> Result<(), anyhow::Error> {
if path.is_dir() {
for entry in fs::read_dir(path)? {
search_directory(root, &entry?.path(), libraries, configs, modules_seen)?;
}
} else if let Some(name) = path.file_name().and_then(|name| name.to_str()) {
if name.ends_with(NATIVE_EXTENSION_SUFFIX) {
libraries.push(path.to_owned());
} else if name == "componentize-py.toml" {
let root = root.canonicalize()?;
let path = path.canonicalize()?;

let module = module_name(&root, &path)
.ok_or_else(|| anyhow!("unable to determine module name for {}", path.display()))?;

let mut push = true;
for existing in &mut *configs {
if path == existing.path.join("componentize-py.toml") {
// When one directory in `PYTHON_PATH` is a subdirectory of the other, we consider the
// subdirectory to be the true owner of the file. This is important later, when we derive a
// package name by stripping the root directory from the file path.
if root > existing.root {
module.clone_into(&mut existing.module);
root.clone_into(&mut existing.root);
path.parent().unwrap().clone_into(&mut existing.path);
}
push = false;
break;
} else {
// If we find a componentize-py.toml file under a Python module which will not be used because
// we already found a version of that module in an earlier `PYTHON_PATH` directory, we'll
// ignore the latest one.
//
// For example, if the module `foo_sdk` appears twice in `PYTHON_PATH`, and both versions have
// a componentize-py.toml file, we'll ignore the second one just as Python will ignore the
// second module.

if modules_seen.contains(&module) {
bail!("multiple `componentize-py.toml` files found in module `{module}`");
}

modules_seen.insert(module.clone());

if module == existing.module {
push = false;
break;
}
}
}

if push {
configs.push(ConfigContext {
module,
root: root.to_owned(),
path: path.parent().unwrap().to_owned(),
config: toml::from_str::<RawComponentizePyConfig>(&fs::read_to_string(path)?)?,
});
}
}
}

Ok(())
}

fn module_name(root: &Path, path: &Path) -> Option<String> {
if let [first, _, ..] = &path.strip_prefix(root).ok()?.iter().collect::<Vec<_>>()[..] {
first.to_str().map(|s| s.to_owned())
} else {
None
}
}

0 comments on commit 0e6cf53

Please sign in to comment.