bytecodealliance · saulecabrera · Jan 10, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025
@@ -1972,7 +1972,6 @@ impl Config {
             Some(Strategy::Winch) => {
                 let mut unsupported = WasmFeatures::GC
                     | WasmFeatures::FUNCTION_REFERENCES
-                    | WasmFeatures::THREADS
                     | WasmFeatures::RELAXED_SIMD
                     | WasmFeatures::TAIL_CALL
                     | WasmFeatures::GC_TYPES;
@@ -1985,6 +1984,7 @@ impl Config {
                         // winch on aarch64 but this helps gate most spec tests
                         // by default which otherwise currently cause panics.
                         unsupported |= WasmFeatures::REFERENCE_TYPES;
+                        unsupported |= WasmFeatures::THREADS
                     }
 
                     // Winch doesn't support other non-x64 architectures at this

@@ -0,0 +1,30 @@
+;;! target = "x86_64"
+;;! test = "winch"
+
+(module 
+  (import "env" "memory" (memory 1 1 shared))
+  (func (param $foo i32) (result i32)
+        (i32.atomic.load
+          (local.get $foo))))
+;; wasm[0]::function[0]:
+;;       pushq   %rbp
+;;       movq    %rsp, %rbp
+;;       movq    8(%rdi), %r11
+;;       movq    0x10(%r11), %r11
+;;       addq    $0x20, %r11
+;;       cmpq    %rsp, %r11
+;;       ja      0x47
+;;   1c: movq    %rdi, %r14
+;;       subq    $0x20, %rsp
+;;       movq    %rdi, 0x18(%rsp)
+;;       movq    %rsi, 0x10(%rsp)
+;;       movl    %edx, 0xc(%rsp)
+;;       movl    0xc(%rsp), %eax
+;;       movq    0x58(%r14), %r11
+;;       movq    (%r11), %rcx
+;;       addq    %rax, %rcx
+;;       movl    (%rcx), %eax
+;;       addq    $0x20, %rsp
+;;       popq    %rbp
+;;       retq
+;;   47: ud2
@@ -0,0 +1,30 @@
+;;! target = "x86_64"
+;;! test = "winch"
+
+(module 
+  (import "env" "memory" (memory 1 1 shared))
+  (func (param $foo i32) (result i32)
+        (i32.atomic.load16_u
+          (local.get $foo))))
+;; wasm[0]::function[0]:
+;;       pushq   %rbp
+;;       movq    %rsp, %rbp
+;;       movq    8(%rdi), %r11
+;;       movq    0x10(%r11), %r11
+;;       addq    $0x20, %r11
+;;       cmpq    %rsp, %r11
+;;       ja      0x49
+;;   1c: movq    %rdi, %r14
+;;       subq    $0x20, %rsp
+;;       movq    %rdi, 0x18(%rsp)
+;;       movq    %rsi, 0x10(%rsp)
+;;       movl    %edx, 0xc(%rsp)
+;;       movl    0xc(%rsp), %eax
+;;       movq    0x58(%r14), %r11
+;;       movq    (%r11), %rcx
+;;       addq    %rax, %rcx
+;;       movzwq  (%rcx), %rax
+;;       addq    $0x20, %rsp
+;;       popq    %rbp
+;;       retq
+;;   49: ud2
@@ -0,0 +1,28 @@
+;;! target = "x86_64"
+;;! test = "winch"
+
+(module 
+  (memory (data "\00\00\00\00\00\00\f4\7f"))
+
+  (func (result i32)
+        (i32.atomic.load8_u (i32.const 0))))
+;; wasm[0]::function[0]:
+;;       pushq   %rbp
+;;       movq    %rsp, %rbp
+;;       movq    8(%rdi), %r11
+;;       movq    0x10(%r11), %r11
+;;       addq    $0x10, %r11
+;;       cmpq    %rsp, %r11
+;;       ja      0x42
+;;   1c: movq    %rdi, %r14
+;;       subq    $0x10, %rsp
+;;       movq    %rdi, 8(%rsp)
+;;       movq    %rsi, (%rsp)
+;;       movl    $0, %eax
+;;       movq    0x60(%r14), %rcx
+;;       addq    %rax, %rcx
+;;       movzbq  (%rcx), %rax
+;;       addq    $0x10, %rsp
+;;       popq    %rbp
+;;       retq
+;;   42: ud2
@@ -0,0 +1,29 @@
+;;! target = "x86_64"
+;;! test = "winch"
+
+(module 
+  (memory (data "\00\00\00\00\00\00\f4\7f"))
+
+  (func (result i64)
+        (i64.atomic.load
+          (i32.const 0))))
+;; wasm[0]::function[0]:
+;;       pushq   %rbp
+;;       movq    %rsp, %rbp
+;;       movq    8(%rdi), %r11
+;;       movq    0x10(%r11), %r11
+;;       addq    $0x10, %r11
+;;       cmpq    %rsp, %r11
+;;       ja      0x41
+;;   1c: movq    %rdi, %r14
+;;       subq    $0x10, %rsp
+;;       movq    %rdi, 8(%rsp)
+;;       movq    %rsi, (%rsp)
+;;       movl    $0, %eax
+;;       movq    0x60(%r14), %rcx
+;;       addq    %rax, %rcx
+;;       movq    (%rcx), %rax
+;;       addq    $0x10, %rsp
+;;       popq    %rbp
+;;       retq
+;;   41: ud2
@@ -0,0 +1,29 @@
+;;! target = "x86_64"
+;;! test = "winch"
+
+(module 
+  (memory (data "\00\00\00\00\00\00\f4\7f"))
+
+  (func (result i64)
+        (i64.atomic.load16_u
+          (i32.const 0))))
+;; wasm[0]::function[0]:
+;;       pushq   %rbp
+;;       movq    %rsp, %rbp
+;;       movq    8(%rdi), %r11
+;;       movq    0x10(%r11), %r11
+;;       addq    $0x10, %r11
+;;       cmpq    %rsp, %r11
+;;       ja      0x42
+;;   1c: movq    %rdi, %r14
+;;       subq    $0x10, %rsp
+;;       movq    %rdi, 8(%rsp)
+;;       movq    %rsi, (%rsp)
+;;       movl    $0, %eax
+;;       movq    0x60(%r14), %rcx
+;;       addq    %rax, %rcx
+;;       movzwq  (%rcx), %rax
+;;       addq    $0x10, %rsp
+;;       popq    %rbp
+;;       retq
+;;   42: ud2
@@ -0,0 +1,29 @@
+;;! target = "x86_64"
+;;! test = "winch"
+
+(module 
+  (memory (data "\00\00\00\00\00\00\f4\7f"))
+
+  (func (result i64)
+        (i64.atomic.load32_u
+          (i32.const 0))))
+;; wasm[0]::function[0]:
+;;       pushq   %rbp
+;;       movq    %rsp, %rbp
+;;       movq    8(%rdi), %r11
+;;       movq    0x10(%r11), %r11
+;;       addq    $0x10, %r11
+;;       cmpq    %rsp, %r11
+;;       ja      0x40
+;;   1c: movq    %rdi, %r14
+;;       subq    $0x10, %rsp
+;;       movq    %rdi, 8(%rsp)
+;;       movq    %rsi, (%rsp)
+;;       movl    $0, %eax
+;;       movq    0x60(%r14), %rcx
+;;       addq    %rax, %rcx
+;;       movl    (%rcx), %eax
+;;       addq    $0x10, %rsp
+;;       popq    %rbp
+;;       retq
+;;   40: ud2
@@ -0,0 +1,29 @@
+;;! target = "x86_64"
+;;! test = "winch"
+
+(module 
+  (memory (data "\00\00\00\00\00\00\f4\7f"))
+
+  (func (result i64)
+        (i64.atomic.load8_u
+          (i32.const 0))))
+;; wasm[0]::function[0]:
+;;       pushq   %rbp
+;;       movq    %rsp, %rbp
+;;       movq    8(%rdi), %r11
+;;       movq    0x10(%r11), %r11
+;;       addq    $0x10, %r11
+;;       cmpq    %rsp, %r11
+;;       ja      0x42
+;;   1c: movq    %rdi, %r14
+;;       subq    $0x10, %rsp
+;;       movq    %rdi, 8(%rsp)
+;;       movq    %rsi, (%rsp)
+;;       movl    $0, %eax
+;;       movq    0x60(%r14), %rcx
+;;       addq    %rax, %rcx
+;;       movzbq  (%rcx), %rax
+;;       addq    $0x10, %rsp
+;;       popq    %rbp
+;;       retq
+;;   42: ud2
@@ -847,8 +847,29 @@ where
         size: OperandSize,
         sextend: Option<ExtendKind>,
     ) -> Result<()> {
-        let addr = self.emit_compute_heap_address(&arg, size)?;
-        if let Some(addr) = addr {
+        self.emit_wasm_load_full(arg, ty, size, sextend, false)
+    }
+
+    /// Emit a WebAssembly atomic load.
+    pub fn emit_wasm_load_atomic(
+        &mut self,
+        arg: &MemArg,
+        ty: WasmValType,
+        size: OperandSize,
+        sextend: Option<ExtendKind>,
+    ) -> Result<()> {
+        self.emit_wasm_load_full(arg, ty, size, sextend, true)
+    }
+
+    pub fn emit_wasm_load_full(
+        &mut self,
+        arg: &MemArg,
+        ty: WasmValType,
+        size: OperandSize,
+        sextend: Option<ExtendKind>,
+        atomic: bool,
+    ) -> Result<()> {
+        if let Some(addr) = self.emit_compute_heap_address(&arg, size)? {
             let dst = match ty {
                 WasmValType::I32 | WasmValType::I64 => self.context.any_gpr(self.masm)?,
                 WasmValType::F32 | WasmValType::F64 => self.context.any_fpr(self.masm)?,
@@ -857,7 +878,12 @@ where
             };
 
             let src = self.masm.address_at_reg(addr, 0)?;
-            self.masm.wasm_load(src, writable!(dst), size, sextend)?;
+            if atomic {
+                self.masm
+                    .wasm_load_atomic(src, writable!(dst), size, sextend)?;
+            } else {
+                self.masm.wasm_load(src, writable!(dst), size, sextend)?;
+            }
             self.context.stack.push(TypedReg::new(ty, dst).into());
             self.context.free_reg(addr);
         }

@@ -870,6 +870,16 @@ impl Masm for MacroAssembler {
         let _ = (context, kind);
         Err(anyhow!(CodeGenError::unimplemented_masm_instruction()))
     }
+
+    fn wasm_load_atomic(
+        &mut self,
+        _src: Self::Address,
+        _dst: WritableReg,
+        _size: OperandSize,
+        _sextend: Option<ExtendKind>,
+    ) -> Result<()> {
+        todo!()
+    }
 }
 
 impl MacroAssembler {

@@ -1211,6 +1211,18 @@ impl Masm for MacroAssembler {
 
         Ok(())
     }
+
+    fn wasm_load_atomic(
+        &mut self,
+        src: Self::Address,
+        dst: WritableReg,
+        size: OperandSize,
+        kind: Option<ExtendKind>,
+    ) {
+        // The guarantees of the x86-64 memory model ensure that `SeqCst`
+        // loads are equivalent to normal loads.
+        self.wasm_load(src, dst, size, kind);
+    }
 }
 
 impl MacroAssembler {

@@ -1134,4 +1134,12 @@ pub(crate) trait MacroAssembler {
     /// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
     fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
         -> Result<()>;
+
+    fn wasm_load_atomic(
+        &mut self,
+        src: Self::Address,
+        dst: WritableReg,
+        size: OperandSize,
+        sextend: Option<ExtendKind>,
+    ) -> Result<()>;
 }
@@ -253,6 +253,13 @@ macro_rules! def_unsupported {
     (emit I64Sub128 $($rest:tt)*) => {};
     (emit I64MulWideS $($rest:tt)*) => {};
     (emit I64MulWideU $($rest:tt)*) => {};
+    (emit I32AtomicLoad8U $($rest:tt)*) => {};
+    (emit I32AtomicLoad16U $($rest:tt)*) => {};
+    (emit I32AtomicLoad $($rest:tt)*) => {};
+    (emit I64AtomicLoad8U $($rest:tt)*) => {};
+    (emit I64AtomicLoad16U $($rest:tt)*) => {};
+    (emit I64AtomicLoad32U $($rest:tt)*) => {};
+    (emit I64AtomicLoad $($rest:tt)*) => {};
 
     (emit $unsupported:tt $($rest:tt)*) => {$($rest)*};
 }
@@ -2139,6 +2146,34 @@ where
         self.masm.mul_wide(&mut self.context, MulWideKind::Unsigned)
     }
 
+    fn visit_i32_atomic_load8_u(&mut self, memarg: wasmparser::MemArg) -> Self::Output {
+        self.emit_wasm_load_atomic(&memarg, WasmValType::I32, OperandSize::S8, None)
+    }
+
+    fn visit_i32_atomic_load16_u(&mut self, memarg: wasmparser::MemArg) -> Self::Output {
+        self.emit_wasm_load_atomic(&memarg, WasmValType::I32, OperandSize::S16, None)
+    }
+
+    fn visit_i32_atomic_load(&mut self, memarg: wasmparser::MemArg) -> Self::Output {
+        self.emit_wasm_load_atomic(&memarg, WasmValType::I32, OperandSize::S32, None)
+    }
+
+    fn visit_i64_atomic_load8_u(&mut self, memarg: wasmparser::MemArg) -> Self::Output {
+        self.emit_wasm_load_atomic(&memarg, WasmValType::I64, OperandSize::S8, None)
+    }
+
+    fn visit_i64_atomic_load16_u(&mut self, memarg: wasmparser::MemArg) -> Self::Output {
+        self.emit_wasm_load_atomic(&memarg, WasmValType::I64, OperandSize::S16, None)
+    }
+
+    fn visit_i64_atomic_load32_u(&mut self, memarg: wasmparser::MemArg) -> Self::Output {
+        self.emit_wasm_load_atomic(&memarg, WasmValType::I64, OperandSize::S32, None)
+    }
+
+    fn visit_i64_atomic_load(&mut self, memarg: wasmparser::MemArg) -> Self::Output {
+        self.emit_wasm_load_atomic(&memarg, WasmValType::I64, OperandSize::S64, None)
+    }
+
     wasmparser::for_each_visit_operator!(def_unsupported);
 }