diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index db94f1de9406..6054a934b25e 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -1211,7 +1211,15 @@ def generate_setter(cl: ClassIR, attr: str, rtype: RType, emitter: Emitter) -> N emitter.emit_line("if (value != NULL) {") if rtype.is_unboxed: - emitter.emit_unbox("value", "tmp", rtype, error=ReturnHandler("-1"), declare_dest=True) + # Borrow the unboxed value: emit_inc_ref below takes the single owned + # reference, matching the borrowed-then-incref pattern of the other two + # branches. Without borrow=True, emit_unbox already creates a new + # reference for refcounted unboxed types (e.g. CPyTagged boxed ints, + # tuples with refcounted fields), so the emit_inc_ref would double the + # reference and leak the stored value on every set via this setter. + emitter.emit_unbox( + "value", "tmp", rtype, error=ReturnHandler("-1"), declare_dest=True, borrow=True + ) elif is_same_type(rtype, object_rprimitive): emitter.emit_line("PyObject *tmp = value;") else: diff --git a/mypyc/test-data/run-classes.test b/mypyc/test-data/run-classes.test index 7722cf26ca91..d830b0c04fa6 100644 --- a/mypyc/test-data/run-classes.test +++ b/mypyc/test-data/run-classes.test @@ -6006,3 +6006,61 @@ for _ in range(100): check(foo) after = sys.getrefcount(foo.obj) assert after - init == 0, f"Leaked {after - init} refs" + +[case testNativeAttrSetterRefcountLeak] +# Setting a native attribute from interpreted code goes through the generated +# getset descriptor setter. For refcounted unboxed types (heap-boxed ints, +# tuples with refcounted items) the setter must take exactly one reference to +# the stored value, not two. +from dataclasses import dataclass +from typing import Tuple + +@dataclass +class IntField: + v: int + +@dataclass +class TupleField: + v: Tuple[int, int] + +[file driver.py] +import sys +from native import IntField, TupleField + +# A heap-boxed int (>= 2**62) is stored as a refcounted PyObject*, unlike small +# inline-tagged ints, so an over-incref strands a real reference. Compute the +# values at runtime (not as folded literals): on free-threaded builds code +# constants are immortal, and getrefcount could not observe a leak on them. +shift = 70 +BIG = 1 << shift + +def check_no_leak(make, value) -> None: + base = sys.getrefcount(value) + objs = [make(value) for _ in range(100)] + alive = sys.getrefcount(value) + # Each live instance must hold exactly one reference to the field value. + assert alive - base == 100, f"expected 100 live refs, got {alive - base}" + del objs + after = sys.getrefcount(value) + assert after == base, f"leaked {after - base} refs" + +# The dataclass-generated __init__ stores self.v = v via the descriptor setter. +check_no_leak(IntField, BIG) + +# Tuple[int, int] is stored as an unboxed RTuple; its boxed-int elements are +# refcounted, so the setter must not over-incref them either. Use the same int +# in both slots so each instance holds exactly two references to it. +ELEM = 1 << (shift + 1) +base = sys.getrefcount(ELEM) +objs = [TupleField((ELEM, ELEM)) for _ in range(100)] +alive = sys.getrefcount(ELEM) +assert alive - base == 200, f"expected 200 live refs, got {alive - base}" +del objs +assert sys.getrefcount(ELEM) == base, f"tuple field leaked {sys.getrefcount(ELEM) - base} refs" + +# Re-assigning through the setter must release the previous value too. +o = IntField(BIG) +base = sys.getrefcount(BIG) +o.v = BIG +o.v = BIG +assert sys.getrefcount(BIG) == base, "reassignment leaked refs"