Skip to content

Commit 469f191

Browse files
authored
GH-135379: Top of stack caching for the JIT. (GH-135465)
Uses three registers to cache values at the top of the evaluation stack This significantly reduces memory traffic for smaller, more common uops.
1 parent 80c9756 commit 469f191

30 files changed

+17395
-1887
lines changed

Include/cpython/pystats.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
# error "this header file must not be included directly"
3030
#endif
3131

32-
#define PYSTATS_MAX_UOP_ID 1024
32+
#define PYSTATS_MAX_UOP_ID 2000
3333

3434
#define SPECIALIZATION_FAILURE_KINDS 60
3535

Include/internal/pycore_jit.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ extern "C" {
1818

1919
#ifdef _Py_JIT
2020

21-
typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate);
21+
typedef _Py_CODEUNIT *(*jit_func)(
22+
_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate,
23+
_PyStackRef _tos_cache0, _PyStackRef _tos_cache1, _PyStackRef _tos_cache2
24+
);
2225

2326
int _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length);
2427
void _PyJIT_Free(_PyExecutorObject *executor);

Include/internal/pycore_opcode_metadata.h

Lines changed: 27 additions & 25 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_optimizer.h

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,20 @@ typedef struct _PyExecutorLinkListNode {
2424
typedef struct {
2525
uint8_t opcode;
2626
uint8_t oparg;
27-
uint8_t valid:1;
28-
uint8_t linked:1;
29-
uint8_t chain_depth:6; // Must be big enough for MAX_CHAIN_DEPTH - 1.
27+
uint8_t valid;
28+
uint8_t linked;
29+
uint8_t chain_depth; // Must be big enough for MAX_CHAIN_DEPTH - 1.
3030
bool warm;
31-
int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
31+
int32_t index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
3232
_PyBloomFilter bloom;
3333
_PyExecutorLinkListNode links;
3434
PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR).
3535
} _PyVMData;
3636

3737
typedef struct _PyExitData {
3838
uint32_t target;
39-
uint16_t index:14;
39+
uint16_t index:12;
40+
uint16_t stack_cache:2;
4041
uint16_t is_dynamic:1;
4142
uint16_t is_control_flow:1;
4243
_Py_BackoffCounter temperature;
@@ -344,17 +345,6 @@ extern _PyExecutorObject *_PyExecutor_GetColdDynamicExecutor(void);
344345

345346
PyAPI_FUNC(void) _PyExecutor_ClearExit(_PyExitData *exit);
346347

347-
static inline int is_terminator(const _PyUOpInstruction *uop)
348-
{
349-
int opcode = uop->opcode;
350-
return (
351-
opcode == _EXIT_TRACE ||
352-
opcode == _DEOPT ||
353-
opcode == _JUMP_TO_TOP ||
354-
opcode == _DYNAMIC_EXIT
355-
);
356-
}
357-
358348
extern void _PyExecutor_Free(_PyExecutorObject *self);
359349

360350
PyAPI_FUNC(int) _PyDumpExecutors(FILE *out);

Include/internal/pycore_stackref.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ static const _PyStackRef PyStackRef_ERROR = { .index = (1 << Py_TAGGED_SHIFT) };
7676

7777
#define INITIAL_STACKREF_INDEX (5 << Py_TAGGED_SHIFT)
7878

79+
#define PyStackRef_ZERO_BITS PyStackRef_NULL
80+
7981
static inline _PyStackRef
8082
PyStackRef_Wrap(void *ptr)
8183
{
@@ -369,6 +371,10 @@ PyStackRef_IsNullOrInt(_PyStackRef ref);
369371

370372
static const _PyStackRef PyStackRef_ERROR = { .bits = Py_TAG_INVALID };
371373

374+
/* For use in the JIT to clear an unused value.
375+
* PyStackRef_ZERO_BITS has no meaning and should not be used other than by the JIT. */
376+
static const _PyStackRef PyStackRef_ZERO_BITS = { .bits = 0 };
377+
372378
/* Wrap a pointer in a stack ref.
373379
* The resulting stack reference is not safe and should only be used
374380
* in the interpreter to pass values from one uop to another.
@@ -922,6 +928,17 @@ _PyThreadState_PopCStackRef(PyThreadState *tstate, _PyCStackRef *ref)
922928
PyStackRef_XCLOSE(ref->ref);
923929
}
924930

931+
static inline _PyStackRef
932+
_PyThreadState_PopCStackRefSteal(PyThreadState *tstate, _PyCStackRef *ref)
933+
{
934+
#ifdef Py_GIL_DISABLED
935+
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
936+
assert(tstate_impl->c_stack_refs == ref);
937+
tstate_impl->c_stack_refs = ref->next;
938+
#endif
939+
return ref->ref;
940+
}
941+
925942
#ifdef Py_GIL_DISABLED
926943

927944
static inline int

Include/internal/pycore_uop.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ typedef struct _PyUOpInstruction{
3838
// This is the length of the trace we translate initially.
3939
#ifdef Py_DEBUG
4040
// With asserts, the stencils are a lot larger
41-
#define UOP_MAX_TRACE_LENGTH 1000
41+
#define UOP_MAX_TRACE_LENGTH 2000
4242
#else
43-
#define UOP_MAX_TRACE_LENGTH 3000
43+
#define UOP_MAX_TRACE_LENGTH 5000
4444
#endif
4545
#define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction))
4646

0 commit comments

Comments
 (0)