Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ dummy_func(
assert(next_instr->op.code == STORE_FAST);
next_oparg = next_instr->op.arg;
#else
next_oparg = (int)CURRENT_OPERAND0();
next_oparg = (int)CURRENT_OPERAND0_16();
#endif
_PyStackRef *target_local = &GETLOCAL(next_oparg);
assert(PyUnicode_CheckExact(left_o));
Expand Down
8 changes: 6 additions & 2 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,8 +450,12 @@ do { \
} while (0)

#define CURRENT_OPARG() (next_uop[-1].oparg)
#define CURRENT_OPERAND0() (next_uop[-1].operand0)
#define CURRENT_OPERAND1() (next_uop[-1].operand1)
#define CURRENT_OPERAND0_64() (next_uop[-1].operand0)
#define CURRENT_OPERAND1_64() (next_uop[-1].operand1)
#define CURRENT_OPERAND0_32() (next_uop[-1].operand0)
#define CURRENT_OPERAND1_32() (next_uop[-1].operand1)
#define CURRENT_OPERAND0_16() (next_uop[-1].operand0)
#define CURRENT_OPERAND1_16() (next_uop[-1].operand1)
#define CURRENT_TARGET() (next_uop[-1].target)

#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
Expand Down
234 changes: 117 additions & 117 deletions Python/executor_cases.c.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions Tools/cases_generator/tier2_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,13 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack, offset_strs: dict[str, t
idx = 0
for cache in uop.caches:
if cache.name != "unused":
bits = cache.size*16
if cache.size == 4:
type = cast = "PyObject *"
else:
type = f"uint{cache.size*16}_t "
cast = f"uint{cache.size*16}_t"
emitter.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND{idx}();\n")
type = f"uint{bits}_t "
cast = f"uint{bits}_t"
emitter.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND{idx}_{bits}();\n")
idx += 1
reachable, storage = emitter.emit_tokens(uop, storage, None, False)
if reachable:
Expand Down
129 changes: 128 additions & 1 deletion Tools/jit/_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ class InstructionKind(enum.Enum):
LONG_BRANCH = enum.auto()
SHORT_BRANCH = enum.auto()
RETURN = enum.auto()
SMALL_CONST_1 = enum.auto()
SMALL_CONST_2 = enum.auto()
OTHER = enum.auto()


Expand Down Expand Up @@ -172,6 +174,7 @@ class Optimizer:
)
# Override everything that follows in subclasses:
_supports_external_relocations = True
supports_small_constants = False
_branches: typing.ClassVar[dict[str, tuple[str | None, str | None]]] = {}
# Short branches are instructions that can branch within a micro-op,
# but might not have the reach to branch anywhere within a trace.
Expand All @@ -184,6 +187,9 @@ class Optimizer:
_re_return: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH
text: str = ""
globals: set[str] = dataclasses.field(default_factory=set)
_re_small_const_1 = _RE_NEVER_MATCH
_re_small_const_2 = _RE_NEVER_MATCH
const_reloc = "<Not supported>"

def __post_init__(self) -> None:
# Split the code into a linked list of basic blocks. A basic block is an
Expand Down Expand Up @@ -253,6 +259,14 @@ def _parse_instruction(self, line: str) -> Instruction:
elif match := self._re_return.match(line):
name = line
kind = InstructionKind.RETURN
elif match := self._re_small_const_1.match(line):
target = match["value"]
name = match["instruction"]
kind = InstructionKind.SMALL_CONST_1
elif match := self._re_small_const_2.match(line):
target = match["value"]
name = match["instruction"]
kind = InstructionKind.SMALL_CONST_2
else:
name, *_ = line.split(" ")
kind = InstructionKind.OTHER
Expand Down Expand Up @@ -385,7 +399,7 @@ def _remove_redundant_jumps(self) -> None:
block.fallthrough = True
block.instructions.pop()
# Before:
# br ? FOO:
# branch FOO:
# ...
# FOO:
# jump BAR
Expand Down Expand Up @@ -461,6 +475,70 @@ def _fixup_external_labels(self) -> None:
)
block.instructions.append(branch.update_target("0"))

def _make_temp_label(self, index: int) -> Instruction:
marker = f"jit_temp_{index}:"
return Instruction(InstructionKind.OTHER, "", marker, None)

def _fixup_constants(self) -> None:
if not self.supports_small_constants:
return
index = 0
for block in self._blocks():
fixed: list[Instruction] = []
small_const_index = -1
for inst in block.instructions:
if inst.kind == InstructionKind.SMALL_CONST_1:
marker = f"jit_pending_{inst.target}{index}:"
fixed.append(self._make_temp_label(index))
index += 1
small_const_index = len(fixed)
fixed.append(inst)
elif inst.kind == InstructionKind.SMALL_CONST_2:
if small_const_index < 0:
fixed.append(inst)
continue
small_const_1 = fixed[small_const_index]
if not self._small_consts_match(small_const_1, inst):
small_const_index = -1
fixed.append(inst)
continue
assert small_const_1.target is not None
if small_const_1.target.endswith("16"):
fixed[small_const_index] = self._make_temp_label(index)
index += 1
else:
assert small_const_1.target.endswith("32")
patch_kind, replacement = self._small_const_1(small_const_1)
if replacement is not None:
label = f"{self.const_reloc}{patch_kind}_JIT_RELOCATION_CONST{small_const_1.target[:-3]}_JIT_RELOCATION_{index}:"
index += 1
fixed[small_const_index - 1] = Instruction(
InstructionKind.OTHER, "", label, None
)
fixed[small_const_index] = replacement
patch_kind, replacement = self._small_const_2(inst)
if replacement is not None:
assert inst.target is not None
label = f"{self.const_reloc}{patch_kind}_JIT_RELOCATION_CONST{inst.target[:-3]}_JIT_RELOCATION_{index}:"
index += 1
fixed.append(
Instruction(InstructionKind.OTHER, "", label, None)
)
fixed.append(replacement)
small_const_index = -1
else:
fixed.append(inst)
block.instructions = fixed

def _small_const_1(self, inst: Instruction) -> tuple[str, Instruction | None]:
raise NotImplementedError()

def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]:
raise NotImplementedError()

def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool:
raise NotImplementedError()

def run(self) -> None:
"""Run this optimizer."""
self._insert_continue_label()
Expand All @@ -472,6 +550,7 @@ def run(self) -> None:
self._remove_redundant_jumps()
self._remove_unreachable()
self._fixup_external_labels()
self._fixup_constants()
self.path.write_text(self._body())


Expand All @@ -492,6 +571,54 @@ class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods
# https://developer.arm.com/documentation/ddi0602/2025-09/Base-Instructions/RET--Return-from-subroutine-
_re_return = re.compile(r"\s*ret\b")

supports_small_constants = True
_re_small_const_1 = re.compile(
r"\s*(?P<instruction>adrp)\s+.*(?P<value>_JIT_OP(ARG|ERAND(0|1))_(16|32)).*"
)
_re_small_const_2 = re.compile(
r"\s*(?P<instruction>ldr)\s+.*(?P<value>_JIT_OP(ARG|ERAND(0|1))_(16|32)).*"
)
const_reloc = "CUSTOM_AARCH64_CONST"

def _get_reg(self, inst: Instruction) -> str:
_, rest = inst.text.split(inst.name)
reg, *_ = rest.split(",")
return reg.strip()

def _small_const_1(self, inst: Instruction) -> tuple[str, Instruction | None]:
assert inst.kind is InstructionKind.SMALL_CONST_1
assert inst.target is not None
if "16" in inst.target:
return "", None
pre, _ = inst.text.split(inst.name)
return "16a", Instruction(
InstructionKind.OTHER, "movz", f"{pre}movz {self._get_reg(inst)}, 0", None
)

def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]:
assert inst.kind is InstructionKind.SMALL_CONST_2
assert inst.target is not None
pre, _ = inst.text.split(inst.name)
if "16" in inst.target:
return "16a", Instruction(
InstructionKind.OTHER,
"movz",
f"{pre}movz {self._get_reg(inst)}, 0",
None,
)
else:
return "16b", Instruction(
InstructionKind.OTHER,
"movk",
f"{pre}movk {self._get_reg(inst)}, 0, lsl #16",
None,
)

def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool:
reg1 = self._get_reg(inst1)
reg2 = self._get_reg(inst2)
return reg1 == reg2


class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods
"""i686-pc-windows-msvc/x86_64-apple-darwin/x86_64-unknown-linux-gnu"""
Expand Down
3 changes: 3 additions & 0 deletions Tools/jit/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
"ARM64_RELOC_PAGE21",
"ARM64_RELOC_PAGEOFF12",
"ARM64_RELOC_UNSIGNED",
"CUSTOM_AARCH64_BRANCH19",
"CUSTOM_AARCH64_CONST_16",
"CUSTOM_AARCH64_CONST_32",
"IMAGE_REL_AMD64_REL32",
"IMAGE_REL_ARM64_BRANCH19",
"IMAGE_REL_ARM64_BRANCH26",
Expand Down
19 changes: 18 additions & 1 deletion Tools/jit/_stencils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ class HoleValue(enum.Enum):
# The current uop's operand0 on 32-bit platforms (exposed as _JIT_OPERAND0_HI/LO):
OPERAND0_HI = enum.auto()
OPERAND0_LO = enum.auto()
# 16 and 32 bit versions of OPARG, OPERAND0 and OPERAND1
OPARG_16 = enum.auto()
OPERAND0_16 = enum.auto()
OPERAND1_16 = enum.auto()
OPERAND0_32 = enum.auto()
OPERAND1_32 = enum.auto()
# The current uop's operand1 on 64-bit platforms (exposed as _JIT_OPERAND1):
OPERAND1 = enum.auto()
# The current uop's operand1 on 32-bit platforms (exposed as _JIT_OPERAND1_HI/LO):
Expand Down Expand Up @@ -59,6 +65,8 @@ class HoleValue(enum.Enum):
"ARM64_RELOC_PAGEOFF12": "patch_aarch64_12",
"ARM64_RELOC_UNSIGNED": "patch_64",
"CUSTOM_AARCH64_BRANCH19": "patch_aarch64_19r",
"CUSTOM_AARCH64_CONST16a": "patch_aarch64_16a",
"CUSTOM_AARCH64_CONST16b": "patch_aarch64_16b",
# x86_64-pc-windows-msvc:
"IMAGE_REL_AMD64_REL32": "patch_x86_64_32rx",
# aarch64-pc-windows-msvc:
Expand Down Expand Up @@ -95,6 +103,7 @@ class HoleValue(enum.Enum):
"X86_64_RELOC_SIGNED": "patch_32r",
"X86_64_RELOC_UNSIGNED": "patch_64",
}

# Translate HoleValues to C expressions:
_HOLE_EXPRS = {
HoleValue.CODE: "(uintptr_t)code",
Expand All @@ -103,10 +112,15 @@ class HoleValue(enum.Enum):
HoleValue.GOT: "",
# These should all have been turned into DATA values by process_relocations:
HoleValue.OPARG: "instruction->oparg",
HoleValue.OPARG_16: "instruction->oparg",
HoleValue.OPERAND0: "instruction->operand0",
HoleValue.OPERAND0_16: "instruction->operand0",
HoleValue.OPERAND0_32: "instruction->operand0",
HoleValue.OPERAND0_HI: "(instruction->operand0 >> 32)",
HoleValue.OPERAND0_LO: "(instruction->operand0 & UINT32_MAX)",
HoleValue.OPERAND1: "instruction->operand1",
HoleValue.OPERAND1_16: "instruction->operand1",
HoleValue.OPERAND1_32: "instruction->operand1",
HoleValue.OPERAND1_HI: "(instruction->operand1 >> 32)",
HoleValue.OPERAND1_LO: "(instruction->operand1 & UINT32_MAX)",
HoleValue.TARGET: "instruction->target",
Expand Down Expand Up @@ -201,7 +215,10 @@ def as_c(self, where: str) -> str:
if self.symbol:
if value:
value += " + "
value += f"(uintptr_t)&{self.symbol}"
if self.symbol.startswith("CONST"):
value += f"instruction->{self.symbol[10:].lower()}"
else:
value += f"(uintptr_t)&{self.symbol}"
if _signed(self.addend) or not value:
if value:
value += " + "
Expand Down
1 change: 1 addition & 0 deletions Tools/jit/_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ async def _compile(
f"--target={self.triple}",
"-DPy_BUILD_CORE_MODULE",
"-D_DEBUG" if self.debug else "-DNDEBUG",
f"-DSUPPORTS_SMALL_CONSTS={1 if self.optimizer.supports_small_constants else 0}",
f"-D_JIT_OPCODE={opname}",
"-D_PyJIT_ACTIVE",
"-D_Py_JIT",
Expand Down
50 changes: 41 additions & 9 deletions Tools/jit/template.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,38 @@

#include "jit.h"


#undef CURRENT_OPERAND0_64
#define CURRENT_OPERAND0_64() (_operand0_64)

#undef CURRENT_OPERAND1_64
#define CURRENT_OPERAND1_64() (_operand1_64)


#undef CURRENT_OPARG
#undef CURRENT_OPERAND0_16
#undef CURRENT_OPERAND0_32
#undef CURRENT_OPERAND1_16
#undef CURRENT_OPERAND1_32

#if SUPPORTS_SMALL_CONSTS

#define CURRENT_OPARG() (_oparg_16)
#define CURRENT_OPERAND0_32() (_operand0_32)
#define CURRENT_OPERAND0_16() (_operand0_16)
#define CURRENT_OPERAND1_32() (_operand1_32)
#define CURRENT_OPERAND1_16() (_operand1_16)

#else

#define CURRENT_OPARG() (_oparg)
#define CURRENT_OPERAND0_32() (_operand0_64)
#define CURRENT_OPERAND0_16() (_operand0_64)
#define CURRENT_OPERAND1_32() (_operand1_64)
#define CURRENT_OPERAND1_16() (_operand1_64)

#undef CURRENT_OPERAND0
#define CURRENT_OPERAND0() (_operand0)
#endif

#undef CURRENT_OPERAND1
#define CURRENT_OPERAND1() (_operand1)

#undef CURRENT_TARGET
#define CURRENT_TARGET() (_target)
Expand Down Expand Up @@ -105,18 +129,26 @@ _JIT_ENTRY(
int uopcode = _JIT_OPCODE;
_Py_CODEUNIT *next_instr;
// Other stuff we need handy:
PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
#if SIZEOF_VOID_P == 8
PATCH_VALUE(uint64_t, _operand0, _JIT_OPERAND0)
PATCH_VALUE(uint64_t, _operand1, _JIT_OPERAND1)
PATCH_VALUE(uint64_t, _operand0_64, _JIT_OPERAND0)
PATCH_VALUE(uint64_t, _operand1_64, _JIT_OPERAND1)
#else
assert(SIZEOF_VOID_P == 4);
PATCH_VALUE(uint32_t, _operand0_hi, _JIT_OPERAND0_HI)
PATCH_VALUE(uint32_t, _operand0_lo, _JIT_OPERAND0_LO)
uint64_t _operand0 = ((uint64_t)_operand0_hi << 32) | _operand0_lo;
uint64_t _operand0_64 = ((uint64_t)_operand0_hi << 32) | _operand0_lo;
PATCH_VALUE(uint32_t, _operand1_hi, _JIT_OPERAND1_HI)
PATCH_VALUE(uint32_t, _operand1_lo, _JIT_OPERAND1_LO)
uint64_t _operand1 = ((uint64_t)_operand1_hi << 32) | _operand1_lo;
uint64_t _operand1_64 = ((uint64_t)_operand1_hi << 32) | _operand1_lo;
#endif
#if SUPPORTS_SMALL_CONSTS
PATCH_VALUE(uint32_t, _operand0_32, _JIT_OPERAND0_32)
PATCH_VALUE(uint32_t, _operand1_32, _JIT_OPERAND1_32)
PATCH_VALUE(uint16_t, _operand0_16, _JIT_OPERAND0_16)
PATCH_VALUE(uint16_t, _operand1_16, _JIT_OPERAND1_16)
PATCH_VALUE(uint16_t, _oparg_16, _JIT_OPARG_16)
#else
PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
#endif
PATCH_VALUE(uint32_t, _target, _JIT_TARGET)
OPT_STAT_INC(uops_executed);
Expand Down
Loading