diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 1d30227fed..b8b71d3205 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -512,6 +512,13 @@ def logcdf(value, mu, sigma): msg="sigma > 0", ) + def logccdf(value, mu, sigma): + return check_parameters( + normal_lccdf(mu, sigma, value), + sigma > 0, + msg="sigma > 0", + ) + def icdf(value, mu, sigma): res = mu + sigma * -np.sqrt(2.0) * pt.erfcinv(2 * value) res = check_icdf_value(res, value) diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index 27d53c8687..d9e54c648e 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -50,7 +50,7 @@ rv_size_is_none, shape_from_dims, ) -from pymc.logprob.abstract import MeasurableOp, _icdf, _logcdf, _logprob +from pymc.logprob.abstract import MeasurableOp, _icdf, _logccdf, _logcdf, _logprob from pymc.logprob.basic import logp from pymc.logprob.rewriting import logprob_rewrites_db from pymc.printing import str_for_dist @@ -150,6 +150,17 @@ def logcdf(op, value, *dist_params, **kwargs): dist_params = [dist_params[i] for i in params_idxs] return class_logcdf(value, *dist_params) + class_logccdf = clsdict.get("logccdf") + if class_logccdf: + + @_logccdf.register(rv_type) + def logccdf(op, value, *dist_params, **kwargs): + if isinstance(op, RandomVariable): + rng, size, *dist_params = dist_params + elif params_idxs: + dist_params = [dist_params[i] for i in params_idxs] + return class_logccdf(value, *dist_params) + class_icdf = clsdict.get("icdf") if class_icdf: diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py index 4b984e4c41..d8a0aa1462 100644 --- a/pymc/distributions/truncated.py +++ b/pymc/distributions/truncated.py @@ -44,7 +44,7 @@ from pymc.distributions.transforms import _default_transform from pymc.exceptions import TruncationError from pymc.logprob.abstract import _logcdf, _logprob -from pymc.logprob.basic import icdf, logcdf, logp +from pymc.logprob.basic import icdf, logccdf, logcdf, logp from pymc.math import logdiffexp from pymc.pytensorf import collect_default_updates from pymc.util import check_dist_not_registered @@ -211,6 +211,23 @@ def _create_logcdf_exprs( upper_logcdf = graph_replace(lower_logcdf, {lower_value: upper_value}) return lower_logcdf, upper_logcdf + @staticmethod + def _create_lower_logccdf_expr( + base_rv: TensorVariable, + value: TensorVariable, + lower: TensorVariable, + ) -> TensorVariable: + """Create logccdf expression at lower bound for base_rv. + + Uses `value` as a template for broadcasting. This is numerically more + stable than computing log(1 - exp(logcdf)) for distributions that have + a registered logccdf method. + """ + # For left truncated discrete RVs, we need to include the whole lower bound. + lower_value = lower - 1 if base_rv.type.dtype.startswith("int") else lower + lower_value = pt.full_like(value, lower_value, dtype=config.floatX) + return logccdf(base_rv, lower_value, warn_rvs=False) + def update(self, node: Apply): """Return the update mapping for the internal RNGs. @@ -401,7 +418,8 @@ def truncated_logprob(op, values, *inputs, **kwargs): if is_lower_bounded and is_upper_bounded: lognorm = logdiffexp(upper_logcdf, lower_logcdf) elif is_lower_bounded: - lognorm = pt.log1mexp(lower_logcdf) + # Use numerically stable logccdf instead of log(1 - exp(logcdf)) + lognorm = TruncatedRV._create_lower_logccdf_expr(base_rv, value, lower) elif is_upper_bounded: lognorm = upper_logcdf @@ -438,7 +456,8 @@ def truncated_logcdf(op: TruncatedRV, value, *inputs, **kwargs): if is_lower_bounded and is_upper_bounded: lognorm = logdiffexp(upper_logcdf, lower_logcdf) elif is_lower_bounded: - lognorm = pt.log1mexp(lower_logcdf) + # Use numerically stable logccdf instead of log(1 - exp(logcdf)) + lognorm = TruncatedRV._create_lower_logccdf_expr(base_rv, value, lower) elif is_upper_bounded: lognorm = upper_logcdf diff --git a/pymc/logprob/__init__.py b/pymc/logprob/__init__.py index 2e67a6c55b..5f0dc65c6b 100644 --- a/pymc/logprob/__init__.py +++ b/pymc/logprob/__init__.py @@ -39,6 +39,7 @@ from pymc.logprob.basic import ( conditional_logp, icdf, + logccdf, logcdf, logp, transformed_conditional_logp, @@ -59,6 +60,7 @@ __all__ = ( "icdf", + "logccdf", "logcdf", "logp", ) diff --git a/pymc/logprob/abstract.py b/pymc/logprob/abstract.py index 4b8808a3bd..0e9867e0b3 100644 --- a/pymc/logprob/abstract.py +++ b/pymc/logprob/abstract.py @@ -40,6 +40,8 @@ from collections.abc import Sequence from functools import singledispatch +import pytensor.tensor as pt + from pytensor.graph import Apply, Op, Variable from pytensor.graph.utils import MetaType from pytensor.tensor import TensorVariable @@ -108,6 +110,45 @@ def _logcdf_helper(rv, value, **kwargs): return logcdf +@singledispatch +def _logccdf( + op: Op, + value: TensorVariable, + *inputs: TensorVariable, + **kwargs, +): + """Create a graph for the log complementary CDF (log survival function) of a ``RandomVariable``. + + This function dispatches on the type of ``op``, which should be a subclass + of ``RandomVariable``. If you want to implement new logccdf graphs + for a ``RandomVariable``, register a new function on this dispatcher. + + The log complementary CDF is defined as log(1 - CDF(x)), also known as the + log survival function. For distributions with a numerically stable implementation, + this should be used instead of computing log(1 - exp(logcdf)). + """ + raise NotImplementedError(f"LogCCDF method not implemented for {op}") + + +def _logccdf_helper(rv, value, **kwargs): + """Helper that calls `_logccdf` dispatcher with fallback to log1mexp(logcdf). + + If a numerically stable `_logccdf` implementation is registered for the + distribution, it will be used. Otherwise, falls back to computing + `log(1 - exp(logcdf))` which may be numerically unstable in the tails. + """ + try: + logccdf = _logccdf(rv.owner.op, value, *rv.owner.inputs, name=rv.name, **kwargs) + except NotImplementedError: + logcdf = _logcdf_helper(rv, value, **kwargs) + logccdf = pt.log1mexp(logcdf) + + if rv.name: + logccdf.name = f"{rv.name}_logccdf" + + return logccdf + + @singledispatch def _icdf( op: Op, diff --git a/pymc/logprob/basic.py b/pymc/logprob/basic.py index e45e14a723..348e513a77 100644 --- a/pymc/logprob/basic.py +++ b/pymc/logprob/basic.py @@ -53,6 +53,7 @@ from pymc.logprob.abstract import ( MeasurableOp, _icdf_helper, + _logccdf_helper, _logcdf_helper, _logprob, _logprob_helper, @@ -302,6 +303,70 @@ def normal_logcdf(value, mu, sigma): return expr +def logccdf(rv: TensorVariable, value: TensorLike, warn_rvs=True, **kwargs) -> TensorVariable: + """Create a graph for the log complementary CDF (log survival function) of a random variable. + + The log complementary CDF is defined as log(1 - CDF(x)), also known as the + log survival function. For distributions with a numerically stable implementation, + this is more accurate than computing log(1 - exp(logcdf)). + + Parameters + ---------- + rv : TensorVariable + value : tensor_like + Should be the same type (shape and dtype) as the rv. + warn_rvs : bool, default True + Warn if RVs were found in the logccdf graph. + This can happen when a variable has other random variables as inputs. + In that case, those random variables should be replaced by their respective values. + + Returns + ------- + logccdf : TensorVariable + + Raises + ------ + RuntimeError + If the logccdf cannot be derived. + + Examples + -------- + Create a compiled function that evaluates the logccdf of a variable + + .. code-block:: python + + import pymc as pm + import pytensor.tensor as pt + + mu = pt.scalar("mu") + rv = pm.Normal.dist(mu, 1.0) + + value = pt.scalar("value") + rv_logccdf = pm.logccdf(rv, value) + + # Use .eval() for debugging + print(rv_logccdf.eval({value: 0.9, mu: 0.0})) # -1.5272506 + + # Compile a function for repeated evaluations + rv_logccdf_fn = pm.compile_pymc([value, mu], rv_logccdf) + print(rv_logccdf_fn(value=0.9, mu=0.0)) # -1.5272506 + + """ + value = pt.as_tensor_variable(value, dtype=rv.dtype) + try: + return _logccdf_helper(rv, value, **kwargs) + except NotImplementedError: + # Try to rewrite rv + fgraph = construct_ir_fgraph({rv: value}) + [ir_valued_rv] = fgraph.outputs + [ir_rv, ir_value] = ir_valued_rv.owner.inputs + expr = _logccdf_helper(ir_rv, ir_value, **kwargs) + [expr] = cleanup_ir([expr]) + if warn_rvs: + _warn_rvs_in_inferred_graph([expr]) + return expr + + def icdf(rv: TensorVariable, value: TensorLike, warn_rvs=True, **kwargs) -> TensorVariable: """Create a graph for the inverse CDF of a random variable. diff --git a/pymc/logprob/binary.py b/pymc/logprob/binary.py index 27449e2d2c..9dfd273365 100644 --- a/pymc/logprob/binary.py +++ b/pymc/logprob/binary.py @@ -25,6 +25,7 @@ from pymc.logprob.abstract import ( MeasurableElemwise, + _logccdf_helper, _logcdf_helper, _logprob, _logprob_helper, @@ -95,7 +96,7 @@ def comparison_logprob(op, values, base_rv, operand, **kwargs): base_rv_op = base_rv.owner.op logcdf = _logcdf_helper(base_rv, operand, **kwargs) - logccdf = pt.log1mexp(logcdf) + logccdf = _logccdf_helper(base_rv, operand, **kwargs) condn_exp = pt.eq(value, np.array(True)) diff --git a/pymc/logprob/censoring.py b/pymc/logprob/censoring.py index 411b8162a8..bb39a39b04 100644 --- a/pymc/logprob/censoring.py +++ b/pymc/logprob/censoring.py @@ -47,7 +47,7 @@ from pytensor.tensor.math import ceil, clip, floor, round_half_to_even from pytensor.tensor.variable import TensorConstant -from pymc.logprob.abstract import MeasurableElemwise, _logcdf, _logprob +from pymc.logprob.abstract import MeasurableElemwise, _logccdf_helper, _logcdf, _logprob from pymc.logprob.rewriting import measurable_ir_rewrites_db from pymc.logprob.utils import CheckParameterValue, filter_measurable_variables @@ -119,7 +119,9 @@ def clip_logprob(op, values, base_rv, lower_bound, upper_bound, **kwargs): if not (isinstance(upper_bound, TensorConstant) and np.all(np.isinf(upper_bound.value))): is_upper_bounded = True - logccdf = pt.log1mexp(logcdf) + # Use numerically stable logccdf (falls back to log1mexp if not available) + logccdf = _logccdf_helper(base_rv, value, **kwargs) + # For right clipped discrete RVs, we need to add an extra term # corresponding to the pmf at the upper bound if base_rv.dtype.startswith("int"): diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py index 8d2bbacd26..2242911c5f 100644 --- a/pymc/logprob/transforms.py +++ b/pymc/logprob/transforms.py @@ -111,6 +111,7 @@ MeasurableOp, _icdf, _icdf_helper, + _logccdf_helper, _logcdf, _logcdf_helper, _logprob, @@ -248,9 +249,11 @@ def measurable_transform_logcdf(op: MeasurableTransform, value, *inputs, **kwarg logcdf = _logcdf_helper(measurable_input, backward_value) if is_discrete: + # For discrete distributions, use the logcdf at the previous value logccdf = pt.log1mexp(_logcdf_helper(measurable_input, backward_value - 1)) else: - logccdf = pt.log1mexp(logcdf) + # Use numerically stable logccdf (falls back to log1mexp if not available) + logccdf = _logccdf_helper(measurable_input, backward_value) if isinstance(op.scalar_op, MONOTONICALLY_INCREASING_OPS): pass diff --git a/tests/distributions/test_distribution.py b/tests/distributions/test_distribution.py index 48ad0579b6..a7d2e22c54 100644 --- a/tests/distributions/test_distribution.py +++ b/tests/distributions/test_distribution.py @@ -237,6 +237,73 @@ def rv_op(cls, size=None, rng=None): resized_rv = change_dist_size(rv, new_size=5, expand=True) assert resized_rv.type.shape == (5,) + def test_logccdf_with_extended_signature(self): + """Test logccdf registration for SymbolicRandomVariable with extended_signature. + + What: Tests that a custom Distribution subclass using SymbolicRandomVariable + with an extended_signature can define a logccdf method that gets properly + registered and dispatched. + + Why: The DistributionMeta metaclass has two code paths for registering + distribution methods like logp, logcdf, logccdf: + 1. For standard RandomVariable ops: unpack (rng, size, *params) + 2. For SymbolicRandomVariable with extended_signature: use params_idxs + + This test specifically exercises path #2 (the params_idxs branch) to ensure + logccdf works for custom distributions that wrap other distributions with + additional graph structure. + + How: + 1. Creates a custom Distribution (TestDistWithLogccdf) that: + - Uses a SymbolicRandomVariable with extended_signature + - Wraps a Normal distribution internally + - Defines a logccdf method using normal_lccdf + 2. Creates an instance with mu=0, sigma=1 + 3. Evaluates pm.logccdf at value=0.5 + 4. Compares against scipy.stats.norm.logsf reference + + The extended_signature "[rng],[size],(),()->[rng],()" means: + - Inputs: rng, size, and two scalar params (mu, sigma) + - Outputs: next_rng and scalar draws + """ + from pymc.distributions.dist_math import normal_lccdf + from pymc.distributions.distribution import Distribution + + class TestDistWithLogccdf(Distribution): + # Create a SymbolicRandomVariable type with extended_signature + rv_type = type( + "TestRVWithLogccdf", + (SymbolicRandomVariable,), + {"extended_signature": "[rng],[size],(),()->[rng],()"}, + ) + + @classmethod + def dist(cls, mu, sigma, **kwargs): + mu = pt.as_tensor(mu) + sigma = pt.as_tensor(sigma) + return super().dist([mu, sigma], **kwargs) + + @classmethod + def rv_op(cls, mu, sigma, size=None, rng=None): + rng = normalize_rng_param(rng) + size = normalize_size_param(size) + # Internally uses Normal, but wrapped in SymbolicRandomVariable + next_rng, draws = Normal.dist(mu, sigma, size=size, rng=rng).owner.outputs + return cls.rv_type( + inputs=[rng, size, mu, sigma], + outputs=[next_rng, draws], + ndim_supp=0, + )(rng, size, mu, sigma) + + # This logccdf will be registered via params_idxs path + def logccdf(value, mu, sigma): + return normal_lccdf(mu, sigma, value) + + rv = TestDistWithLogccdf.dist(0, 1) + result = pm.logccdf(rv, 0.5).eval() + expected = st.norm(0, 1).logsf(0.5) # ≈ -0.994 + npt.assert_allclose(result, expected) + def test_distribution_op_registered(): """Test that returned Ops are registered as virtual subclasses of the respective PyMC distributions.""" diff --git a/tests/logprob/test_abstract.py b/tests/logprob/test_abstract.py index 5d8024cdca..792c808da7 100644 --- a/tests/logprob/test_abstract.py +++ b/tests/logprob/test_abstract.py @@ -45,8 +45,13 @@ import pymc as pm -from pymc.logprob.abstract import MeasurableElemwise, MeasurableOp, _logcdf_helper -from pymc.logprob.basic import logcdf +from pymc.logprob.abstract import ( + MeasurableElemwise, + MeasurableOp, + _logccdf_helper, + _logcdf_helper, +) +from pymc.logprob.basic import logccdf, logcdf def assert_equal_hash(classA, classB): @@ -80,6 +85,32 @@ def test_logcdf_helper(): np.testing.assert_almost_equal(x_logcdf.eval(), sp.norm(0, 1).logcdf([0, 1])) +def test_logccdf_helper(): + """Test the internal _logccdf_helper function for basic correctness. + + What: Tests that _logccdf_helper correctly computes log(1 - CDF(x)), + also known as the log survival function (logsf). + + Why: The _logccdf_helper is the internal dispatcher that routes logccdf + computations to distribution-specific implementations. It needs to work + with both symbolic (TensorVariable) and concrete values. + + How: Creates a Normal(0, 1) distribution and computes logccdf at values + [0, 1]. Compares against scipy's logsf which is the reference implementation. + Tests both symbolic input (pt.vector) and concrete input ([0, 1]). + """ + value = pt.vector("value") + x = pm.Normal.dist(0, 1) + + # Test with symbolic value input + x_logccdf = _logccdf_helper(x, value) + np.testing.assert_almost_equal(x_logccdf.eval({value: [0, 1]}), sp.norm(0, 1).logsf([0, 1])) + + # Test with concrete value input + x_logccdf = _logccdf_helper(x, [0, 1]) + np.testing.assert_almost_equal(x_logccdf.eval(), sp.norm(0, 1).logsf([0, 1])) + + def test_logcdf_transformed_argument(): with pm.Model() as m: sigma = pm.HalfFlat("sigma") @@ -95,3 +126,146 @@ def test_logcdf_transformed_argument(): pm.TruncatedNormal.dist(0, sigma_value, lower=None, upper=1.0), x_value ).eval() assert np.isclose(observed, expected) + + +def test_logccdf(): + """Test the public pm.logccdf function for basic correctness. + + What: Tests that the public logccdf API correctly computes the log + complementary CDF (log survival function) for a Normal distribution. + + Why: pm.logccdf is the user-facing function that wraps _logccdf_helper + and handles IR graph rewriting when needed. It should produce correct + results for direct RandomVariable inputs. + + How: Creates Normal(0, 1), computes logccdf at [0, 1], and compares + against scipy.stats.norm.logsf reference values. + - logsf(0) = log(0.5) ≈ -0.693 (50% probability of exceeding 0) + - logsf(1) ≈ -1.84 (about 15.9% probability of exceeding 1) + """ + value = pt.vector("value") + x = pm.Normal.dist(0, 1) + + x_logccdf = logccdf(x, value) + np.testing.assert_almost_equal(x_logccdf.eval({value: [0, 1]}), sp.norm(0, 1).logsf([0, 1])) + + +def test_logccdf_numerical_stability(): + """Test numerical stability of pm.logccdf in the extreme right tail. + + What: Verifies the public logccdf function is numerically stable when + evaluating far in the distribution's tail. + + Why: This is the primary use case that motivated adding logccdf support. + In censored/truncated distributions, we need log(1 - CDF(bound)) at the + censoring/truncation point. When this point is far in the tail: + - Naive: log(1 - exp(logcdf)) = log(1 - 1) = log(0) = -inf + - Stable: Uses erfcx-based computation → correct finite value + + How: Evaluates logccdf at x=100 for Normal(0,1) and verifies: + 1. Result is finite (not -inf or nan) + 2. Result matches scipy.logsf within relative tolerance + + The expected value is approximately -5005.5, representing the log + probability of a standard normal exceeding 100 sigma. + Using 100 sigma future-proofs against any improvements in naive methods. + """ + x = pm.Normal.dist(0, 1) + + far_tail_value = 100.0 + + result = logccdf(x, far_tail_value).eval() + expected = sp.norm(0, 1).logsf(far_tail_value) # ≈ -5005.5 + + # Must be finite, not -inf (which naive computation would give) + assert np.isfinite(result) + # Use rtol for relative tolerance (float32 has ~7 significant digits) + np.testing.assert_allclose(result, expected, rtol=1e-6) + + +def test_logccdf_helper_fallback(): + """Test that _logccdf_helper falls back to log1mexp(logcdf) for distributions without logccdf. + + What: Verifies that the helper's NotImplementedError fallback branch is exercised + and produces the correct graph structure. + + Why: Distributions without a registered _logccdf method should still work via + the fallback computation log(1 - exp(logcdf)) = log1mexp(logcdf). + + How: Uses Uniform distribution (which has logcdf but no logccdf) and inspects + the resulting computation graph. For Uniform, the graph should contain log1mexp. + For Normal (which has logccdf), the graph should NOT contain log1mexp. + """ + from pytensor.scalar.math import Log1mexp + from pytensor.tensor.elemwise import Elemwise + + def graph_contains_log1mexp(var, depth=0, visited=None): + """Recursively check if computation graph contains Log1mexp scalar op.""" + if visited is None: + visited = set() + if id(var) in visited or depth > 20: + return False + visited.add(id(var)) + if var.owner: + op = var.owner.op + if isinstance(op, Elemwise) and isinstance(op.scalar_op, Log1mexp): + return True + for inp in var.owner.inputs: + if graph_contains_log1mexp(inp, depth + 1, visited): + return True + return False + + # Uniform has logcdf but no logccdf - should use log1mexp fallback + uniform_rv = pm.Uniform.dist(0, 1) + uniform_logccdf = _logccdf_helper(uniform_rv, 0.5) + assert graph_contains_log1mexp(uniform_logccdf), "Uniform logccdf should use log1mexp fallback" + + # Normal has logccdf - should NOT use log1mexp fallback + normal_rv = pm.Normal.dist(0, 1) + normal_logccdf = _logccdf_helper(normal_rv, 0.5) + assert not graph_contains_log1mexp(normal_logccdf), ( + "Normal logccdf should use specialized implementation" + ) + + +def test_logccdf_transformed_argument(): + """Test logccdf with a transformed random variable requiring IR graph rewriting. + + What: Tests that pm.logccdf works when the random variable has been + transformed (e.g., sigma is log-transformed), which requires the + IR (intermediate representation) graph rewriting path. + + Why: When a random variable depends on transformed parameters, the + direct _logccdf_helper call fails because the RV isn't in the expected + form. The public logccdf function catches this and rewrites the graph + using construct_ir_fgraph to make it work. This test ensures that + fallback path is covered and correct. + + How: + 1. Creates a model where x ~ Normal(0, sigma) with sigma ~ HalfFlat + (HalfFlat gets log-transformed automatically) + 2. Adds a Potential using logccdf(x, 1.0) + 3. Compiles and evaluates the model's logp + 4. Verifies the result equals: + logp(Normal(0, sigma), x_value) + logsf(1.0; 0, sigma) + + The IR rewriting is triggered because x's distribution depends on + the transformed sigma parameter. + """ + with pm.Model() as m: + sigma = pm.HalfFlat("sigma") + x = pm.Normal("x", 0, sigma) + pm.Potential("norm_term", logccdf(x, 1.0)) + + sigma_value_log = -1.0 + sigma_value = np.exp(sigma_value_log) # sigma ≈ 0.368 + x_value = 0.5 + + observed = m.compile_logp(jacobian=False)({"sigma_log__": sigma_value_log, "x": x_value}) + + # Expected = logp(x | sigma) + logccdf(Normal(0, sigma), 1.0) + expected_logp = pm.logp(pm.Normal.dist(0, sigma_value), x_value).eval() + expected_logsf = sp.norm(0, sigma_value).logsf(1.0) + expected = expected_logp + expected_logsf + + assert np.isclose(observed, expected) diff --git a/tests/logprob/test_censoring.py b/tests/logprob/test_censoring.py index c778f7a9b4..a32020f86b 100644 --- a/tests/logprob/test_censoring.py +++ b/tests/logprob/test_censoring.py @@ -41,6 +41,8 @@ import scipy as sp import scipy.stats as st +import pymc as pm + from pymc import logp from pymc.logprob import conditional_logp from pymc.logprob.transform_value import TransformValuesRewrite @@ -261,3 +263,71 @@ def test_rounding(rounding_op): logprob.eval({xr_vv: test_value}), expected_logp, ) + + +@pytest.mark.parametrize( + "censoring_side,bound_value", + [ + ("right", 100.0), # Far right tail: CDF ≈ 1, need stable log(1-CDF) + ("left", -100.0), # Far left tail: CDF ≈ 0, need stable log(CDF) + ], +) +def test_censored_logprob_numerical_stability(censoring_side, bound_value): + """Test numerical stability of pm.Censored at extreme tail values. + + What: Verifies that the log-probability of a censored Normal distribution + is computed correctly when the censoring bound is far in the tail + (100 standard deviations from the mean). + + Why: Censored distributions require computing: + - Right-censored at upper bound: log(P(X > upper)) = log(1 - CDF(upper)) = logccdf + - Left-censored at lower bound: log(P(X < lower)) = log(CDF(lower)) = logcdf + + At extreme tail values (100 sigma): + - CDF(100) is indistinguishable from 1.0 in float64 + - CDF(-100) is indistinguishable from 0.0 in float64 + + Naive computation would give: + - Right: log(1 - 1) = log(0) = -inf ✗ + - Left: log(0) = -inf ✗ + + With stable logccdf/logcdf: + - Right: ≈ -5005.5 ✓ + - Left: ≈ -5005.5 ✓ + + How: + 1. Creates pm.Censored with Normal(0, 1) base distribution + 2. Sets censoring bound at ±100 (100 standard deviations) + 3. Evaluates logp at the bound value + 4. Compares against scipy.stats.norm.logsf (right) or logcdf (left) + 5. Verifies result is finite and matches reference within tolerance + + Using 100 sigma future-proofs against any improvements in naive methods. + This is the primary integration test for the logccdf feature. + """ + ref_scipy = st.norm(0, 1) + + with pm.Model() as model: + normal_dist = pm.Normal.dist(mu=0.0, sigma=1.0) + if censoring_side == "right": + # Right-censored: values > upper are censored to upper + # logp(y=upper) = log(P(X >= upper)) = logsf(upper) + pm.Censored("y", normal_dist, lower=None, upper=bound_value) + expected_logp = ref_scipy.logsf(bound_value) + else: + # Left-censored: values < lower are censored to lower + # logp(y=lower) = log(P(X <= lower)) = logcdf(lower) + pm.Censored("y", normal_dist, lower=bound_value, upper=None) + expected_logp = ref_scipy.logcdf(bound_value) + + logp_fn = model.compile_logp() + logp_at_bound = logp_fn({"y": bound_value}) + + # Must be finite (not -inf from naive computation) + assert np.isfinite(logp_at_bound), ( + f"logp at {censoring_side} bound should be finite, got {logp_at_bound}" + ) + # Must match scipy reference (≈ -5005.5 for ±100 sigma) + assert np.isclose(logp_at_bound, expected_logp, rtol=1e-6), ( + f"logp at {censoring_side} bound: got {logp_at_bound}, expected {expected_logp}" + )