Skip to content

Commit

Permalink
Winch: Add SIMD load and extend and load and splat instructions for x…
Browse files Browse the repository at this point in the history
…64 (#9950)

* Winch: Add SIMD load and extend and load and splat instructions

* Try removing simd_align from the unsupported for Winch list

* No more todos and rename loadkind variant

* Use AVX instructions for loads

* Adjust load with splat instructions to use AVX

* SIMD spec tests for Winch should fail on x64 MacOS

* Refine check for should-fail

* Adjust conditional check to avoid compile error
  • Loading branch information
jeffcharles authored Jan 12, 2025
1 parent a1511a0 commit dd18896
Show file tree
Hide file tree
Showing 18 changed files with 638 additions and 64 deletions.
11 changes: 10 additions & 1 deletion crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,6 @@ impl WastTest {
"misc_testsuite/simd/spillslot-size-fuzzbug.wast",
"misc_testsuite/simd/unaligned-load.wast",
"multi-memory/simd_memory-multi.wast",
"spec_testsuite/simd_align.wast",
"spec_testsuite/simd_bit_shift.wast",
"spec_testsuite/simd_bitwise.wast",
"spec_testsuite/simd_boolean.wast",
Expand Down Expand Up @@ -526,6 +525,16 @@ impl WastTest {
if unsupported.iter().any(|part| self.path.ends_with(part)) {
return true;
}

// SIMD on Winch requires AVX instructions.
#[cfg(target_arch = "x86_64")]
if !(std::is_x86_feature_detected!("avx") && std::is_x86_feature_detected!("avx2")) {
let unsupported = ["spec_testsuite/simd_align.wasm"];

if unsupported.iter().any(|part| self.path.ends_with(part)) {
return true;
}
}
}

for part in self.path.iter() {
Expand Down
29 changes: 29 additions & 0 deletions tests/disas/winch/x64/load/v128_load16_splat_avx2.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true", "-Ccranelift-has-avx2=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load16_splat") (result v128) (v128.load16_splat (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x43
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; vpbroadcastw (%rcx), %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 43: ud2
29 changes: 29 additions & 0 deletions tests/disas/winch/x64/load/v128_load16x4_s_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load16x4_s") (result v128) (v128.load16x4_s (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x43
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; vpmovsxwd (%rcx), %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 43: ud2
29 changes: 29 additions & 0 deletions tests/disas/winch/x64/load/v128_load16x4_u_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load16x4_u") (result v128) (v128.load16x4_u (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x43
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; vpmovzxwd (%rcx), %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 43: ud2
29 changes: 29 additions & 0 deletions tests/disas/winch/x64/load/v128_load32_splat_avx2.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true", "-Ccranelift-has-avx2=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load32_splat") (result v128) (v128.load32_splat (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x43
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; vpbroadcastd (%rcx), %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 43: ud2
29 changes: 29 additions & 0 deletions tests/disas/winch/x64/load/v128_load32x2_s_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load32x2_s") (result v128) (v128.load32x2_s (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x43
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; vpmovsxdq (%rcx), %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 43: ud2
29 changes: 29 additions & 0 deletions tests/disas/winch/x64/load/v128_load32x2_u_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load32x2_u") (result v128) (v128.load32x2_u (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x43
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; vpmovzxdq (%rcx), %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 43: ud2
30 changes: 30 additions & 0 deletions tests/disas/winch/x64/load/v128_load64_splat_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load64_splat") (result v128) (v128.load64_splat (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x47
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; movsd (%rcx), %xmm0
;; vpshufd $0x44, %xmm0, %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 47: ud2
29 changes: 29 additions & 0 deletions tests/disas/winch/x64/load/v128_load8_splat_avx2.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true", "-Ccranelift-has-avx2=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load8_splat") (result v128) (v128.load8_splat (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x43
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; vpbroadcastb (%rcx), %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 43: ud2
29 changes: 29 additions & 0 deletions tests/disas/winch/x64/load/v128_load8x8_s_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load8x8_s") (result v128) (v128.load8x8_s (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x43
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; vpmovsxbw (%rcx), %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 43: ud2
29 changes: 29 additions & 0 deletions tests/disas/winch/x64/load/v128_load8x8_u_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx=true" ]

(module
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))

(func (export "v128.load8x8_u") (result v128) (v128.load8x8_u (i32.const 0)))
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x43
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movl $0, %eax
;; movq 0x60(%r14), %rcx
;; addq %rax, %rcx
;; vpmovzxbw (%rcx), %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 43: ud2
6 changes: 6 additions & 0 deletions winch/codegen/src/codegen/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ pub(crate) enum CodeGenError {
/// Unimplemented MacroAssembler instruction.
#[error("Unimplemented Masm instruction")]
UnimplementedMasmInstruction,
/// Unimplemented Wasm load kind.
#[error("Unimplemented Wasm load kind")]
UnimplementedWasmLoadKind,
/// Unimplemented due to requiring AVX.
#[error("Instruction not implemented for CPUs without AVX support")]
UnimplementedForNoAvx,
/// Unsupported eager initialization of tables.
#[error("Unsupported eager initialization of tables")]
UnsupportedTableEagerInit,
Expand Down
8 changes: 4 additions & 4 deletions winch/codegen/src/codegen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use crate::{
codegen::BlockSig,
isa::reg::{writable, Reg},
masm::{
ExtendKind, IntCmpKind, MacroAssembler, MemOpKind, OperandSize, RegImm, SPOffset,
ShiftKind, TrapCode,
IntCmpKind, LoadKind, MacroAssembler, MemOpKind, OperandSize, RegImm, SPOffset, ShiftKind,
TrapCode,
},
stack::TypedReg,
};
Expand Down Expand Up @@ -846,7 +846,7 @@ where
arg: &MemArg,
ty: WasmValType,
size: OperandSize,
sextend: Option<ExtendKind>,
kind: LoadKind,
op_kind: MemOpKind,
) -> Result<()> {
if let Some(addr) = self.emit_compute_heap_address(&arg, size)? {
Expand All @@ -859,7 +859,7 @@ where

let src = self.masm.address_at_reg(addr, 0)?;
self.masm
.wasm_load(src, writable!(dst), size, sextend, op_kind)?;
.wasm_load(src, writable!(dst), size, kind, op_kind)?;
self.context.stack.push(TypedReg::new(ty, dst).into());
self.context.free_reg(addr);
}
Expand Down
Loading

0 comments on commit dd18896

Please sign in to comment.