Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cranelift/x64 backend: do not use one-way branches. #10086

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 29 additions & 20 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -578,17 +578,16 @@
;; Jump to a known target: jmp simm32.
(JmpKnown (dst MachLabel))

;; One-way conditional branch: jcond cond target.
;; Low-level one-way conditional branch: jcond cond target.
;;
;; This instruction is useful when we have conditional jumps depending on
;; more than two conditions, see for instance the lowering of Brif
;; with Fcmp inputs.
;;
;; A note of caution: in contexts where the branch target is another
;; block, this has to be the same successor as the one specified in the
;; terminator branch of the current block. Otherwise, this might confuse
;; register allocation by creating new invisible edges.
(JmpIf (cc CC)
;; This instruction is useful only for lower-level code
;; generators that use the Cranelift instruction backend as an
;; assembler library. The instruction is thus named after its
;; primary user, Winch. This instruction *should not* be used
;; by Cranelift proper and placed into VCode: it does not
;; adhere to the basic-block invariant, namely that branches
;; always end a block (with no fallthrough).
(WinchJmpIf (cc CC)
(taken MachLabel))

;; Two-way conditional branch: jcond cond target target.
Expand All @@ -599,6 +598,17 @@
(taken MachLabel)
(not_taken MachLabel))

;; Two-way conditional branch with a combination of conditions:
;;
;; j(cc1 or cc2) target1 target2
;;
;; Emitted as a compound sequence of three branches -- `jcc1
;; target1`, `jcc2 target1`, `jmp target2`.
(JmpCondOr (cc1 CC)
(cc2 CC)
(taken MachLabel)
(not_taken MachLabel))

;; Jump-table sequence, as one compound instruction (see note in lower.rs
;; for rationale).
;;
Expand Down Expand Up @@ -5030,15 +5040,16 @@
(rule (jmp_known target)
(SideEffectNoResult.Inst (MInst.JmpKnown target)))

(decl jmp_if (CC MachLabel) ConsumesFlags)
(rule (jmp_if cc taken)
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.JmpIf cc taken)))

;; Conditional jump based on the condition code.
(decl jmp_cond (CC MachLabel MachLabel) ConsumesFlags)
(rule (jmp_cond cc taken not_taken)
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.JmpCond cc taken not_taken)))

;; Conditional jump based on the OR of two condition codes.
(decl jmp_cond_or (CC CC MachLabel MachLabel) ConsumesFlags)
(rule (jmp_cond_or cc1 cc2 taken not_taken)
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.JmpCondOr cc1 cc2 taken not_taken)))

;; Conditional jump based on the result of an icmp.
(decl jmp_cond_icmp (IcmpCondResult MachLabel MachLabel) SideEffectNoResult)
(rule (jmp_cond_icmp (IcmpCondResult.Condition producer cc) taken not_taken)
Expand All @@ -5050,14 +5061,12 @@
(with_flags_side_effect producer (jmp_cond cc taken not_taken)))
(rule (jmp_cond_fcmp (FcmpCondResult.AndCondition producer cc1 cc2) taken not_taken)
(with_flags_side_effect producer
(consumes_flags_concat
(jmp_if (cc_invert cc1) not_taken)
(jmp_cond (cc_invert cc2) not_taken taken))))
;; DeMorgan's rule: to get cc1 AND cc2, we do NOT (NOT cc1 OR NOT cc2).
;; The outer NOT comes from flipping `not_taken` and `taken`.
(jmp_cond_or (cc_invert cc1) (cc_invert cc2) not_taken taken)))
(rule (jmp_cond_fcmp (FcmpCondResult.OrCondition producer cc1 cc2) taken not_taken)
(with_flags_side_effect producer
(consumes_flags_concat
(jmp_if cc1 taken)
(jmp_cond cc2 taken not_taken))))
(jmp_cond_or cc1 cc2 taken not_taken)))

;; Emit the compound instruction that does:
;;
Expand Down
75 changes: 73 additions & 2 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1622,7 +1622,8 @@ pub(crate) fn emit(
inst.emit(sink, info, state);

// jne .loop_start
// TODO: Encoding the JmpIf as a short jump saves us 4 bytes here.
// TODO: Encoding the conditional jump as a short jump
// could save us us 4 bytes here.
one_way_jmp(sink, CC::NZ, loop_start);

// The regular prologue code is going to emit a `sub` after this, so we need to
Expand Down Expand Up @@ -1891,7 +1892,7 @@ pub(crate) fn emit(
sink.put4(0x0);
}

Inst::JmpIf { cc, taken } => {
Inst::WinchJmpIf { cc, taken } => {
let cond_start = sink.cur_offset();
let cond_disp_off = cond_start + 2;

Expand Down Expand Up @@ -1936,6 +1937,76 @@ pub(crate) fn emit(
sink.put4(0x0);
}

Inst::JmpCondOr {
cc1,
cc2,
taken,
not_taken,
} => {
// Emit:
// jcc1 taken
// jcc2 taken
// jmp not_taken
//
// Note that we enroll both conditionals in the
// branch-chomping mechanism because MachBuffer
// simplification can continue upward as long as it keeps
// chomping branches. In the best case, if taken ==
// not_taken and that one block is the fallthrough block,
// all three branches can disappear.

// jcc1 taken
let cond_1_start = sink.cur_offset();
let cond_1_disp_off = cond_1_start + 2;
let cond_1_end = cond_1_start + 6;

sink.use_label_at_offset(cond_1_disp_off, *taken, LabelUse::JmpRel32);
let inverted: [u8; 6] = [
0x0F,
0x80 + (cc1.invert().get_enc()),
0x00,
0x00,
0x00,
0x00,
];
sink.add_cond_branch(cond_1_start, cond_1_end, *taken, &inverted[..]);

sink.put1(0x0F);
sink.put1(0x80 + cc1.get_enc());
sink.put4(0x0);

// jcc2 taken
let cond_2_start = sink.cur_offset();
let cond_2_disp_off = cond_2_start + 2;
let cond_2_end = cond_2_start + 6;

sink.use_label_at_offset(cond_2_disp_off, *taken, LabelUse::JmpRel32);
let inverted: [u8; 6] = [
0x0F,
0x80 + (cc2.invert().get_enc()),
0x00,
0x00,
0x00,
0x00,
];
sink.add_cond_branch(cond_2_start, cond_2_end, *taken, &inverted[..]);

sink.put1(0x0F);
sink.put1(0x80 + cc2.get_enc());
sink.put4(0x0);

// jmp not_taken
let uncond_start = sink.cur_offset();
let uncond_disp_off = uncond_start + 1;
let uncond_end = uncond_start + 5;

sink.use_label_at_offset(uncond_disp_off, *not_taken, LabelUse::JmpRel32);
sink.add_uncond_branch(uncond_start, uncond_end, *not_taken);

sink.put1(0xE9);
sink.put4(0x0);
}

Inst::JmpUnknown { target } => {
let target = target.clone();

Expand Down
28 changes: 25 additions & 3 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ impl Inst {
| Inst::Hlt
| Inst::Imm { .. }
| Inst::JmpCond { .. }
| Inst::JmpIf { .. }
| Inst::JmpCondOr { .. }
| Inst::WinchJmpIf { .. }
| Inst::JmpKnown { .. }
| Inst::JmpTableSeq { .. }
| Inst::JmpUnknown { .. }
Expand Down Expand Up @@ -1738,12 +1739,24 @@ impl PrettyPrint for Inst {
format!("{op} {dst}")
}

Inst::JmpIf { cc, taken } => {
Inst::WinchJmpIf { cc, taken } => {
let taken = taken.to_string();
let op = ljustify2("j".to_string(), cc.to_string());
format!("{op} {taken}")
}

Inst::JmpCondOr {
cc1,
cc2,
taken,
not_taken,
} => {
let taken = taken.to_string();
let not_taken = not_taken.to_string();
let op = ljustify(format!("j{cc1},{cc2}"));
format!("{op} {taken}; j {not_taken}")
}

Inst::JmpCond {
cc,
taken,
Expand Down Expand Up @@ -2657,8 +2670,9 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
}

Inst::JmpKnown { .. }
| Inst::JmpIf { .. }
| Inst::WinchJmpIf { .. }
| Inst::JmpCond { .. }
| Inst::JmpCondOr { .. }
| Inst::Ret { .. }
| Inst::Nop { .. }
| Inst::TrapIf { .. }
Expand Down Expand Up @@ -2775,12 +2789,20 @@ impl MachInst for Inst {
}
&Self::JmpKnown { .. } => MachTerminator::Uncond,
&Self::JmpCond { .. } => MachTerminator::Cond,
&Self::JmpCondOr { .. } => MachTerminator::Cond,
&Self::JmpTableSeq { .. } => MachTerminator::Indirect,
// All other cases are boring.
_ => MachTerminator::None,
}
}

fn is_low_level_branch(&self) -> bool {
match self {
&Self::WinchJmpIf { .. } => true,
_ => false,
}
}

fn is_mem_access(&self) -> bool {
panic!("TODO FILL ME OUT")
}
Expand Down
3 changes: 2 additions & 1 deletion cranelift/codegen/src/isa/x64/pcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -840,8 +840,9 @@ pub(crate) fn check(
| Inst::ReturnCallKnown { .. }
| Inst::JmpKnown { .. }
| Inst::Ret { .. }
| Inst::JmpIf { .. }
| Inst::WinchJmpIf { .. }
| Inst::JmpCond { .. }
| Inst::JmpCondOr { .. }
| Inst::TrapIf { .. }
| Inst::TrapIfAnd { .. }
| Inst::TrapIfOr { .. }
Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/machinst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,14 @@ pub trait MachInst: Clone + Debug {
/// architecture.
fn function_alignment() -> FunctionAlignment;

/// Is this a low-level, one-way branch, not meant for use in a
/// VCode body? These instructions are meant to be used only when
/// directly emitted, i.e. when `MachInst` is used as an assembler
/// library.
fn is_low_level_branch(&self) -> bool {
false
}

/// A label-use kind: a type that describes the types of label references that
/// can occur in an instruction.
type LabelUse: MachInstLabelUse;
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/machinst/vcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ impl<I: VCodeInst> VCodeBuilder<I> {
/// Push an instruction for the current BB and current IR inst
/// within the BB.
pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
self.vcode.insts.push(insn);
self.vcode.srclocs.push(loc);
}
Expand Down
64 changes: 52 additions & 12 deletions cranelift/filetests/filetests/isa/x64/branches.clif
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,7 @@ block2:
; movq %rsp, %rbp
; block0:
; ucomiss %xmm1, %xmm0
; jp label1
; jnz label1; j label2
; jp,nz label1; j label2
; block1:
; movl $2, %eax
; movq %rbp, %rsp
Expand Down Expand Up @@ -216,8 +215,7 @@ block2:
; movq %rsp, %rbp
; block0:
; ucomiss %xmm1, %xmm0
; jp label1
; jnz label1; j label2
; jp,nz label1; j label2
; block1:
; movl $1, %eax
; movq %rbp, %rsp
Expand Down Expand Up @@ -265,8 +263,7 @@ block2:
; movq %rsp, %rbp
; block0:
; ucomiss %xmm1, %xmm0
; jp label2
; jnz label2; j label1
; jp,nz label2; j label1
; block1:
; movl $1, %eax
; movq %rbp, %rsp
Expand Down Expand Up @@ -631,8 +628,7 @@ block202:
; movq %rsp, %rbp
; block0:
; ucomiss const(1), %xmm0
; jp label2
; jnz label2; j label1
; jp,nz label2; j label1
; block1:
; jmp label5
; block2:
Expand Down Expand Up @@ -753,8 +749,7 @@ block2:
; movq %rsp, %rbp
; block0:
; ucomiss %xmm1, %xmm0
; jp label1
; jnz label1; j label2
; jp,nz label1; j label2
; block1:
; movl $2, %eax
; movq %rbp, %rsp
Expand Down Expand Up @@ -855,8 +850,7 @@ block2:
; movq %rsp, %rbp
; block0:
; ucomiss %xmm1, %xmm0
; jp label1
; jnz label1; j label2
; jp,nz label1; j label2
; block1:
; movl $2, %eax
; movq %rbp, %rsp
Expand Down Expand Up @@ -887,6 +881,52 @@ block2:
; popq %rbp
; retq

function %brif_i8_fcmp_same_target(f32, f32) -> i32 {
block0(v0: f32, v1: f32):
v2 = fcmp eq v0, v1
v3 = uextend.i32 v2
;; This test should demonstrate branch-chomping work on the combo
;; two-condition branch lowered from `fcmp`; in fact this case is
;; even more interesting because critical-edge splitting will create
;; edge blocks (block1 and block2 in lowered VCode below), since
;; otherwise we have multiple outs from first block and multiple ins
;; to second block; and then branch-chomping elides five (!)
;; cascading branches in a row.
brif v3, block1, block1

block1:
v4 = iconst.i32 1
return v4
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; ucomiss %xmm1, %xmm0
; jp,nz label2; j label1
; block1:
; jmp label3
; block2:
; jmp label3
; block3:
; movl $1, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; ucomiss %xmm1, %xmm0
; block2: ; offset 0x7
; movl $1, %eax
; movq %rbp, %rsp
; popq %rbp
; retq

function %br_table_i32(i32) -> i32 {
block0(v0: i32):
br_table v0, block4, [block1, block2, block2, block3]
Expand Down
Loading
Loading