diff --git a/doxygen/doxygen.cfg b/doxygen/doxygen.cfg index ab2846ee7bc..f6e2a29f0dc 100644 --- a/doxygen/doxygen.cfg +++ b/doxygen/doxygen.cfg @@ -2743,15 +2743,17 @@ MSCGEN_TOOL = MSCFILE_DIRS = ALIASES += laplace_options="\ -\param[in] theta_0 the initial guess for the Laplace approximation. \ -\param[in] tolerance controls the convergence criterion when finding the mode in the Laplace approximation. \ -\param[in] max_num_steps maximum number of steps before the Newton solver breaks and returns an error. \ -\param[in] hessian_block_size Block size of Hessian of log likelihood w.r.t latent Gaussian variable theta. \ -\param[in] solver Type of Newton solver. Each corresponds to a distinct choice of B matrix (i.e. application SWM formula): \ -1. computes square-root of negative Hessian. \ -2. computes square-root of covariance matrix. \ -3. computes no square-root and uses LU decomposition. \ -\param[in] max_steps_line_search Number of steps after which the algorithm gives up on doing a line search. If 0, no linesearch. \ +\param[in] ops Options for controlling Laplace approximation. The following options are available: \ + - theta_0 the initial guess for the Laplace approximation. \ + - tolerance controls the convergence criterion when finding the mode in the Laplace approximation. \ + - max_num_steps maximum number of steps before the Newton solver breaks and returns an error. \ + - hessian_block_size Block size of Hessian of log likelihood w.r.t latent Gaussian variable theta. \ + - solver Type of Newton solver. Each corresponds to a distinct choice of B matrix (i.e. application SWM formula): \ + 1. computes square-root of negative Hessian. \ + 2. computes square-root of covariance matrix. \ + 3. computes no square-root and uses LU decomposition. \ + - max_steps_line_search Number of steps after which the algorithm gives up on doing a line search. If 0, no linesearch. \ + - allow_fallthrough If true, if solver 1 fails then solver 2 is tried, and if that fails solver 3 is tried. \ " ALIASES += laplace_common_template_args="\ @@ -2761,6 +2763,7 @@ ALIASES += laplace_common_template_args="\ should be a type inheriting from `Eigen::EigenBase` with dynamic sized \ rows and columns. \ \tparam CovarArgs A tuple of types to passed as the first arguments of `CovarFun::operator()`\ + \tparam OpsTuple A tuple of laplace_options types \ " ALIASES += laplace_common_args="\ diff --git a/stan/math/mix/functor/barzilai_borwein_step_size.hpp b/stan/math/mix/functor/barzilai_borwein_step_size.hpp new file mode 100644 index 00000000000..875a84ab291 --- /dev/null +++ b/stan/math/mix/functor/barzilai_borwein_step_size.hpp @@ -0,0 +1,124 @@ +#ifndef STAN_MATH_MIX_FUNCTOR_BARZILAI_BORWEIN_STEP_SIZE_HPP +#define STAN_MATH_MIX_FUNCTOR_BARZILAI_BORWEIN_STEP_SIZE_HPP +#include +#include +#include +#include + +namespace stan::math::internal { +/** + * @brief Curvature-aware Barzilai–Borwein (BB) step length with robust + * safeguards. + * + * Given successive parameter displacements \f$s = x_k - x_{k-1}\f$ and + * gradients \f$g_k\f$, \f$g_{k-1}\f$, this routine forms + * \f$y = g_k - g_{k-1}\f$ and computes the two classical BB candidates + * + * \f{align*}{ + * \alpha_{\text{BB1}} &= \frac{\langle s,s\rangle}{\langle s,y\rangle},\\ + * \alpha_{\text{BB2}} &= \frac{\langle s,y\rangle}{\langle y,y\rangle}, + * \f} + * + * then chooses between them using the **spectral cosine** + * \f$r = \cos^2\!\angle(s,y) = \dfrac{\langle s,y\rangle^2} + * {\langle s,s\rangle\,\langle + * y,y\rangle}\in[0,1]\f$: + * + * - if \f$r > 0.9\f$ (well-aligned curvature) and the previous line search + * did **≤ 1** backtrack, prefer the “long” step \f$\alpha_{\text{BB1}}\f$; + * - if \f$0.1 \le r \le 0.9\f$, take the neutral geometric mean + * \f$\sqrt{\alpha_{\text{BB1}}\alpha_{\text{BB2}}}\f$; + * - otherwise default to the “short” step \f$\alpha_{\text{BB2}}\f$. + * + * All candidates are clamped into \f$[\text{min\_alpha},\,\text{max\_alpha}]\f$ + * and must be finite and positive. + * If the curvature scalars are ill-posed (non-finite or too small), + * \f$\langle s,y\rangle \le \varepsilon\f$, or if `last_backtracks == 99` + * (explicitly disabling BB for this iteration), the function falls back to a + * **safe** step: + * use `prev_step` when finite and positive, otherwise \f$1.0\f$, then clamp to + * \f$[\text{min\_alpha},\,\text{max\_alpha}]\f$. + * + * @param s Displacement between consecutive iterates + * (\f$s = x_k - x_{k-1}\f$). + * @param g_curr Gradient at the current iterate \f$g_k\f$. + * @param g_prev Gradient at the previous iterate \f$g_{k-1}\f$. + * @param prev_step Previously accepted step length (used by the fallback). + * @param last_backtracks + * Number of backtracking contractions performed by the most + * recent line search; set to 99 to force the safe fallback. + * @param min_alpha Lower bound for the returned step length. + * @param max_alpha Upper bound for the returned step length. + * + * @return A finite, positive BB-style step length \f$\alpha \in + * [\text{min\_alpha},\,\text{max\_alpha}]\f$ suitable for seeding a + * line search or as a spectral preconditioner scale. + * + * @note Uses \f$\varepsilon=10^{-16}\f$ to guard against division by very + * small curvature terms, and applies `std::abs` to BB ratios to avoid + * negative steps; descent is enforced by the line search. + * @warning The vectors must have identical size. Non-finite inputs yield the + * safe fallback. + */ +inline double barzilai_borwein_step_size(const Eigen::VectorXd& s, + const Eigen::VectorXd& g_curr, + const Eigen::VectorXd& g_prev, + double prev_step, int last_backtracks, + double min_alpha, double max_alpha) { + // Fallbacks + auto safe_fallback = [&]() -> double { + double a = std::clamp( + prev_step > 0.0 && std::isfinite(prev_step) ? prev_step : 1.0, + min_alpha, max_alpha); + return a; + }; + + const Eigen::VectorXd y = g_curr - g_prev; + const double sty = s.dot(y); + const double sts = s.squaredNorm(); + const double yty = y.squaredNorm(); + + // Basic validity checks + constexpr double eps = 1e-16; + if (!(std::isfinite(sty) && std::isfinite(sts) && std::isfinite(yty)) + || sts <= eps || yty <= eps || sty <= eps || last_backtracks == 99) { + return safe_fallback(); + } + + // BB candidates + double alpha_bb1 = std::clamp(std::abs(sts / sty), min_alpha, max_alpha); + double alpha_bb2 = std::clamp(std::abs(sty / yty), min_alpha, max_alpha); + + // Safeguard candidates + if (!std::isfinite(alpha_bb1) || !std::isfinite(alpha_bb2) || alpha_bb1 <= 0.0 + || alpha_bb2 <= 0.0) { + return safe_fallback(); + } + + // Spectral cosine r = cos^2(angle(s, y)) in [0,1] + const double r = (sty * sty) / (sts * yty); + + // Heuristic thresholds (robust defaults) + constexpr double kLoose = 0.9; // "nice" curvature + constexpr double kTight = 0.1; // "dodgy" curvature + + double alpha0 = alpha_bb2; // default to short BB for robustness + if (r > kLoose && last_backtracks <= 1) { + // Spectrum looks friendly and line search was not harsh -> try long BB + alpha0 = alpha_bb1; + } else if (r >= kTight && r <= kLoose) { + // Neither clearly friendly nor clearly dodgy -> neutral middle + alpha0 = std::sqrt(alpha_bb1 * alpha_bb2); + } // else keep alpha_bb2 + + // Clip to user bounds + alpha0 = std::clamp(alpha0, min_alpha, max_alpha); + + if (!std::isfinite(alpha0) || alpha0 <= 0.0) { + return safe_fallback(); + } + return alpha0; +} + +} // namespace stan::math::internal +#endif diff --git a/stan/math/mix/functor/conditional_copy_and_promote.hpp b/stan/math/mix/functor/conditional_copy_and_promote.hpp new file mode 100644 index 00000000000..cc9410bfa02 --- /dev/null +++ b/stan/math/mix/functor/conditional_copy_and_promote.hpp @@ -0,0 +1,106 @@ +#ifndef STAN_MATH_MIX_FUNCTOR_CONDITIONAL_COPY_AND_PROMOTE_HPP +#define STAN_MATH_MIX_FUNCTOR_CONDITIONAL_COPY_AND_PROMOTE_HPP + +#include +#include +#include + +namespace stan::math::internal { + +/** + * Decide if object should be deep or shallow copied when + * using @ref conditional_copy_and_promote . + */ +enum class COPY_TYPE : uint8_t { SHALLOW = 0, DEEP = 1 }; + +/** + * Conditional copy and promote a type's scalar type to a `PromotedType`. + * @tparam Filter type trait with a static constexpr bool member `value` + * that is true if the type should be promoted. Otherwise, the type is + * left unchanged. + * @tparam PromotedType type to promote the scalar to. + * @tparam CopyType type of copy to perform. + * @tparam Args variadic arguments. + * @param args variadic arguments to conditionally copy and promote. + * @return a tuple where each element is either a reference to the original + * argument or a promoted copy of the argument. + */ +template