diff --git a/src/instrumentation-client.ts b/src/instrumentation-client.ts index f9336e7a14d76..5154224013030 100644 --- a/src/instrumentation-client.ts +++ b/src/instrumentation-client.ts @@ -5,7 +5,7 @@ Sentry.init({ dsn: process.env.NEXT_PUBLIC_SENTRY_DSN, // Adjust this value in production, or use tracesSampler for greater control - tracesSampleRate: 1, + tracesSampleRate: 0.3, // Enable logs to be sent to Sentry enableLogs: true, diff --git a/src/instrumentation.ts b/src/instrumentation.ts index 074fa7bbd0a8b..dcd08ba8f6cc0 100644 --- a/src/instrumentation.ts +++ b/src/instrumentation.ts @@ -1,10 +1,12 @@ import * as Sentry from '@sentry/nextjs'; +import {tracesSampler} from './tracesSampler'; + export function register() { if (process.env.NEXT_RUNTIME === 'nodejs') { Sentry.init({ dsn: process.env.NEXT_PUBLIC_SENTRY_DSN, - tracesSampleRate: 1, + tracesSampler, enableLogs: true, debug: false, environment: process.env.NODE_ENV === 'development' ? 'development' : undefined, @@ -29,7 +31,7 @@ export function register() { if (process.env.NEXT_RUNTIME === 'edge') { Sentry.init({ dsn: process.env.NEXT_PUBLIC_SENTRY_DSN, - tracesSampleRate: 1, + tracesSampler, enableLogs: true, debug: false, environment: process.env.NODE_ENV === 'development' ? 'development' : undefined, diff --git a/src/tracesSampler.ts b/src/tracesSampler.ts new file mode 100644 index 0000000000000..4305e85073286 --- /dev/null +++ b/src/tracesSampler.ts @@ -0,0 +1,109 @@ +// Sampling context passed to tracesSampler +// Using inline type to avoid dependency on internal Sentry types +interface SamplingContext { + attributes?: Record; + name?: string; + normalizedRequest?: { + headers?: Record; + }; + parentSampled?: boolean; +} + +// AI agents we want to track for docs/markdown consumption visibility +// These fetch markdown content and we need performance data on serving to agentic tools +const AI_AGENT_PATTERN = new RegExp( + [ + 'claudebot', + 'claude-web', + 'anthropic', + 'gptbot', + 'chatgpt', + 'openai', + 'cursor', + 'codex', + 'copilot', + 'perplexity', + 'cohere', + 'gemini', + ].join('|'), + 'i' +); + +// Bots/crawlers to filter out (SEO crawlers, social media, testing tools, monitors) +// Uses specific bot names where possible, plus generic patterns for common crawler terms +const BOT_PATTERN = new RegExp( + [ + // Search engine crawlers + 'googlebot', + 'bingbot', + 'yandexbot', + 'baiduspider', + 'duckduckbot', + 'applebot', + // SEO tools + 'ahrefsbot', + 'semrushbot', + 'dotbot', + 'mj12bot', + // Social media + 'slackbot', + 'twitterbot', + 'linkedinbot', + 'telegrambot', + 'discordbot', + 'facebookexternalhit', + 'whatsapp', + // Generic patterns + 'crawler', + 'spider', + 'scraper', + 'headless', + // Testing/automation tools + 'phantomjs', + 'selenium', + 'puppeteer', + 'playwright', + // Performance/monitoring tools + 'lighthouse', + 'pagespeed', + 'gtmetrix', + 'pingdom', + 'uptimerobot', + ].join('|'), + 'i' +); + +// Default sample rate for real users +const DEFAULT_SAMPLE_RATE = 0.3; + +/** + * Determines trace sample rate based on user agent. + * - AI agents: 100% (we want full visibility into agentic docs consumption) + * - Bots/crawlers: 0% (filter out noise) + * - Real users: 30% + * + * AI agents are checked first, so if something matches both AI and bot patterns, we sample it. + */ +export function tracesSampler(samplingContext: SamplingContext): number { + // Try to get user agent from normalizedRequest headers (Sentry SDK provides this) + // Falls back to OTel semantic convention attributes if normalizedRequest not available + const userAgent = + samplingContext.normalizedRequest?.headers?.['user-agent'] ?? + (samplingContext.attributes?.['http.user_agent'] as string | undefined) ?? + (samplingContext.attributes?.['user_agent.original'] as string | undefined); + + if (!userAgent) { + return DEFAULT_SAMPLE_RATE; + } + + if (AI_AGENT_PATTERN.test(userAgent)) { + return 1; + } + + if (BOT_PATTERN.test(userAgent)) { + return 0; + } + + // Sample real users at default rate + return DEFAULT_SAMPLE_RATE; +}