getsentry · jaffrepaul · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026
diff --git a/src/instrumentation-client.ts b/src/instrumentation-client.ts
@@ -5,7 +5,7 @@ Sentry.init({
   dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,
 
   // Adjust this value in production, or use tracesSampler for greater control
-  tracesSampleRate: 1,
+  tracesSampleRate: 0.3,
 
   // Enable logs to be sent to Sentry
   enableLogs: true,

diff --git a/src/instrumentation.ts b/src/instrumentation.ts
@@ -1,10 +1,12 @@
 import * as Sentry from '@sentry/nextjs';
 
+import {tracesSampler} from './tracesSampler';
+
 export function register() {
   if (process.env.NEXT_RUNTIME === 'nodejs') {
     Sentry.init({
       dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,
-      tracesSampleRate: 1,
+      tracesSampler,
       enableLogs: true,
       debug: false,
       environment: process.env.NODE_ENV === 'development' ? 'development' : undefined,
@@ -29,7 +31,7 @@ export function register() {
   if (process.env.NEXT_RUNTIME === 'edge') {
     Sentry.init({
       dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,
-      tracesSampleRate: 1,
+      tracesSampler,
       enableLogs: true,
       debug: false,
       environment: process.env.NODE_ENV === 'development' ? 'development' : undefined,

diff --git a/src/tracesSampler.ts b/src/tracesSampler.ts
@@ -0,0 +1,109 @@
+// Sampling context passed to tracesSampler
+// Using inline type to avoid dependency on internal Sentry types
+interface SamplingContext {
+  attributes?: Record<string, unknown>;
+  name?: string;
+  normalizedRequest?: {
+    headers?: Record<string, string>;
+  };
+  parentSampled?: boolean;
+}
+
+// AI agents we want to track for docs/markdown consumption visibility
+// These fetch markdown content and we need performance data on serving to agentic tools
+const AI_AGENT_PATTERN = new RegExp(
+  [
+    'claudebot',
+    'claude-web',
+    'anthropic',
+    'gptbot',
+    'chatgpt',
+    'openai',
+    'cursor',
+    'codex',
+    'copilot',
+    'perplexity',
+    'cohere',
+    'gemini',
+  ].join('|'),
+  'i'
+);
+
+// Bots/crawlers to filter out (SEO crawlers, social media, testing tools, monitors)
+// Uses specific bot names where possible, plus generic patterns for common crawler terms
+const BOT_PATTERN = new RegExp(
+  [
+    // Search engine crawlers
+    'googlebot',
+    'bingbot',
+    'yandexbot',
+    'baiduspider',
+    'duckduckbot',
+    'applebot',
+    // SEO tools
+    'ahrefsbot',
+    'semrushbot',
+    'dotbot',
+    'mj12bot',
+    // Social media
+    'slackbot',
+    'twitterbot',
+    'linkedinbot',
+    'telegrambot',
+    'discordbot',
+    'facebookexternalhit',
+    'whatsapp',
+    // Generic patterns
+    'crawler',
+    'spider',
+    'scraper',
+    'headless',
+    // Testing/automation tools
+    'phantomjs',
+    'selenium',
+    'puppeteer',
+    'playwright',
+    // Performance/monitoring tools
+    'lighthouse',
+    'pagespeed',
+    'gtmetrix',
+    'pingdom',
+    'uptimerobot',
+  ].join('|'),
+  'i'
+);
+
+// Default sample rate for real users
+const DEFAULT_SAMPLE_RATE = 0.3;
+
+/**
+ * Determines trace sample rate based on user agent.
+ * - AI agents: 100% (we want full visibility into agentic docs consumption)
+ * - Bots/crawlers: 0% (filter out noise)
+ * - Real users: 30%
+ *
+ * AI agents are checked first, so if something matches both AI and bot patterns, we sample it.
+ */
+export function tracesSampler(samplingContext: SamplingContext): number {
+  // Try to get user agent from normalizedRequest headers (Sentry SDK provides this)
+  // Falls back to OTel semantic convention attributes if normalizedRequest not available
+  const userAgent =
+    samplingContext.normalizedRequest?.headers?.['user-agent'] ??
+    (samplingContext.attributes?.['http.user_agent'] as string | undefined) ??
+    (samplingContext.attributes?.['user_agent.original'] as string | undefined);
+
+  if (!userAgent) {
+    return DEFAULT_SAMPLE_RATE;
+  }
+
+  if (AI_AGENT_PATTERN.test(userAgent)) {
+    return 1;
+  }
+
+  if (BOT_PATTERN.test(userAgent)) {
+    return 0;
+  }
+
+  // Sample real users at default rate
+  return DEFAULT_SAMPLE_RATE;
+}