From 86a606a29ec760518103fad779a7a039070ab884 Mon Sep 17 00:00:00 2001 From: Tom Bannink Date: Mon, 29 Jun 2026 17:39:21 +0200 Subject: [PATCH 1/2] [mypyc] Fix reference leak when setting unboxed refcounted attrs via descriptor The generated getset descriptor setter for a native attribute over-increfed refcounted unboxed values. For such a type emit_unbox already produces a new (owned) reference (e.g. CPyTagged_FromObject increfs the heap-boxed int case), and generate_setter then applied an additional emit_inc_ref, taking two references while the deallocator releases only one. The result was a leaked reference on every set through the setter. This is hit whenever the attribute is set from interpreted code, most notably the __init__ that the dataclasses module synthesizes for a mypyc-compiled @dataclass: `self.v = v` goes through the descriptor, so every constructed instance with a heap-boxed int (>= 2**62) field leaked one PyLong. Tuple attributes with refcounted items were affected the same way. Fix: unbox with borrow=True so all three branches of generate_setter produce a borrowed value and the single emit_inc_ref takes exactly one owned reference. borrow=True is a no-op for non-refcounted unboxed types and is propagated correctly through RTuple unboxing. Co-Authored-By: Claude Opus 4.8 (1M context) --- mypyc/codegen/emitclass.py | 10 +++++- mypyc/test-data/run-classes.test | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index db94f1de9406e..6054a934b25e1 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -1211,7 +1211,15 @@ def generate_setter(cl: ClassIR, attr: str, rtype: RType, emitter: Emitter) -> N emitter.emit_line("if (value != NULL) {") if rtype.is_unboxed: - emitter.emit_unbox("value", "tmp", rtype, error=ReturnHandler("-1"), declare_dest=True) + # Borrow the unboxed value: emit_inc_ref below takes the single owned + # reference, matching the borrowed-then-incref pattern of the other two + # branches. Without borrow=True, emit_unbox already creates a new + # reference for refcounted unboxed types (e.g. CPyTagged boxed ints, + # tuples with refcounted fields), so the emit_inc_ref would double the + # reference and leak the stored value on every set via this setter. + emitter.emit_unbox( + "value", "tmp", rtype, error=ReturnHandler("-1"), declare_dest=True, borrow=True + ) elif is_same_type(rtype, object_rprimitive): emitter.emit_line("PyObject *tmp = value;") else: diff --git a/mypyc/test-data/run-classes.test b/mypyc/test-data/run-classes.test index 7722cf26ca910..084cb4ec6e67c 100644 --- a/mypyc/test-data/run-classes.test +++ b/mypyc/test-data/run-classes.test @@ -6006,3 +6006,58 @@ for _ in range(100): check(foo) after = sys.getrefcount(foo.obj) assert after - init == 0, f"Leaked {after - init} refs" + +[case testNativeAttrSetterRefcountLeak] +# Setting a native attribute from interpreted code goes through the generated +# getset descriptor setter. For refcounted unboxed types (heap-boxed ints, +# tuples with refcounted items) the setter must take exactly one reference to +# the stored value, not two. +from dataclasses import dataclass +from typing import Tuple + +@dataclass +class IntField: + v: int + +@dataclass +class TupleField: + v: Tuple[int, int] + +[file driver.py] +import sys +from native import IntField, TupleField + +# A heap-boxed int (>= 2**62) is stored as a refcounted PyObject*, unlike small +# inline-tagged ints, so an over-incref strands a real reference. +BIG = 1 << 70 + +def check_no_leak(make, value) -> None: + base = sys.getrefcount(value) + objs = [make(value) for _ in range(100)] + alive = sys.getrefcount(value) + # Each live instance must hold exactly one reference to the field value. + assert alive - base == 100, f"expected 100 live refs, got {alive - base}" + del objs + after = sys.getrefcount(value) + assert after == base, f"leaked {after - base} refs" + +# The dataclass-generated __init__ stores self.v = v via the descriptor setter. +check_no_leak(IntField, BIG) + +# Tuple[int, int] is stored as an unboxed RTuple; its boxed-int elements are +# refcounted, so the setter must not over-incref them either. Use the same int +# in both slots so each instance holds exactly two references to it. +ELEM = 1 << 71 +base = sys.getrefcount(ELEM) +objs = [TupleField((ELEM, ELEM)) for _ in range(100)] +alive = sys.getrefcount(ELEM) +assert alive - base == 200, f"expected 200 live refs, got {alive - base}" +del objs +assert sys.getrefcount(ELEM) == base, f"tuple field leaked {sys.getrefcount(ELEM) - base} refs" + +# Re-assigning through the setter must release the previous value too. +o = IntField(BIG) +base = sys.getrefcount(BIG) +o.v = BIG +o.v = BIG +assert sys.getrefcount(BIG) == base, "reassignment leaked refs" From 76ce52354fd4553de6ecf517d444f964468a2891 Mon Sep 17 00:00:00 2001 From: Tom Bannink Date: Tue, 30 Jun 2026 11:54:00 +0200 Subject: [PATCH 2/2] [mypyc] Make setter refcount-leak test work on free-threaded builds On free-threaded CPython, all code constants are immortal, so the folded literals `1 << 70` / `1 << 71` in the test driver had an unchanging refcount and sys.getrefcount could not observe the leak (the test saw delta 0 instead of 100 on py314t). Compute the values at runtime via a variable shift so they stay mortal and the leak is detectable on both GIL and free-threaded builds. Co-Authored-By: Claude Opus 4.8 (1M context) --- mypyc/test-data/run-classes.test | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mypyc/test-data/run-classes.test b/mypyc/test-data/run-classes.test index 084cb4ec6e67c..d830b0c04fa6f 100644 --- a/mypyc/test-data/run-classes.test +++ b/mypyc/test-data/run-classes.test @@ -6028,8 +6028,11 @@ import sys from native import IntField, TupleField # A heap-boxed int (>= 2**62) is stored as a refcounted PyObject*, unlike small -# inline-tagged ints, so an over-incref strands a real reference. -BIG = 1 << 70 +# inline-tagged ints, so an over-incref strands a real reference. Compute the +# values at runtime (not as folded literals): on free-threaded builds code +# constants are immortal, and getrefcount could not observe a leak on them. +shift = 70 +BIG = 1 << shift def check_no_leak(make, value) -> None: base = sys.getrefcount(value) @@ -6047,7 +6050,7 @@ check_no_leak(IntField, BIG) # Tuple[int, int] is stored as an unboxed RTuple; its boxed-int elements are # refcounted, so the setter must not over-incref them either. Use the same int # in both slots so each instance holds exactly two references to it. -ELEM = 1 << 71 +ELEM = 1 << (shift + 1) base = sys.getrefcount(ELEM) objs = [TupleField((ELEM, ELEM)) for _ in range(100)] alive = sys.getrefcount(ELEM)