NVIDIA · rapids-bot · Dec 12, 2025 · Nov 24, 2025 · Nov 24, 2025 · Nov 25, 2025
@@ -105,6 +105,8 @@ reviews:
           digit version (ex: `~=1.0`).
         - Not all packages contain Python code, if they do they should also contain their own set of tests, in a
           `tests/` directory at the same level as the `pyproject.toml` file.
+        - When adding a new package, that new package name (as defined in the `pyproject.toml` file) should 
+          be added as a dependency to the nvidia-nat-all package in `packages/nvidia_nat_all/pyproject.toml`
 
     - path: "tests/**/*.py"
       instructions: >-

@@ -97,6 +97,14 @@
         r"^docs/source/workflows/mcp/.*\.md$",
         r"^ghcr\.io/github/github-mcp-server",
     ),
+    (
+        r"^examples/finetuning/rl_with_openpipe_art/.*/configs/config.*\.yml$",
+        r"^examples/finetuning/rl_with_openpipe_art/.*/data/.*",
+    ),
+    (
+        r"^examples/finetuning/rl_with_openpipe_art/src/rl_with_openpipe_art/configs/config\.yml$",
+        r"^examples/finetuning/rl_with_openpipe_art/.*/data/.*",
+    ),
 }
 
 ALLOWLISTED_WORDS: set[str] = {
@@ -155,6 +163,7 @@
     "mistralai/[Mm]ixtral.*",
     "microsoft/[Pp]hi.*",
     "ssmits/[Qq]wen.*",
+    "Qwen/Qwen.*",
     "deepseek-ai/deepseek-.*",  #
     # MIME types
     "(application|text|image|video|audio|model|dataset|token|other)/.*",  #

@@ -6,6 +6,7 @@
 [Aa]gno
 AIQ
 API(s?)
+ART
 Arize
 arXiv
 [Aa]sync
@@ -61,6 +62,10 @@ etcd
 [Ee]xfiltration
 [Ee]xplainability
 Faiss
+[Ff]inetune(d?)
+[Ff]inetune(r|rs|ing)
+[Ff]inetuning
+[Ff]inetunable
 Gantt
 [Gg]eneratable
 GitHub
@@ -86,6 +91,8 @@ LangSmith
 [Ll]aunchable(s?)
 # libcudf isn't styled in the way that cuDF is https://docs.rapids.ai/api/libcudf/stable/
 libcudf
+[Ll]earnable
+[Ll]ogprob(s?)
 LLM(s?)
 # https://github.com/logpai/loghub/
 Loghub
@@ -96,6 +103,7 @@ Milvus
 MLflow
 MLOps
 Morpheus
+Minimax
 [Mm]ultimodal
 [Nn]amespac(e|ed|es|ing)
 NeMo
@@ -104,12 +112,14 @@ NIC
 NIM(s?)
 npm
 NumPy
+NAT's
 NVIDIA
 Nemotron
 OAuth
 URIs
 OTel
 onboarding
+OpenPipe
 [Oo]verfitting
 pandas
 [Pp]arallelization
@@ -120,6 +130,7 @@ PCIe
 PDF(s?)
 [Pp]ostprocess
 [Pp]ostprocessing
+[Pp]luggable
 [Pp]reprocess
 [Pp]retrained
 [Pp]rofiler
@@ -133,13 +144,15 @@ pytest
 [Rr]epo
 [Rr]etarget(ed?)
 [Rr]eusability
+[Rr]ollout(s?)
 [Rr]untime(s?)
 [Ss]andboxing
 [Ss]erializable
 [Ss]treamable
 [Ss]ubclassing
 [Ss]ubcard(s?)
 [Ss]ubgraph(s?)
+[Ss]ubsampl(e|ing)
 [Ss]ubpackage(s?)
 [Ss]ubword(s?)
 [Ss]uperset(s?)

@@ -152,6 +152,7 @@ Middleware <./reference/middleware.md>
 Optimizer <./reference/optimizer.md>
 Test Time Compute <./reference/test-time-compute.md>
 Troubleshooting <./troubleshooting.md>
+Finetuning <./reference/finetuning/index.md>
 ```
 
 ```{toctree}

@@ -40,6 +40,7 @@ nat
 │       ├── remove
 │       └── update
 ├── eval
+├── finetune
 ├── info
 │   ├── channels
 │   └── components
@@ -547,6 +548,115 @@ Options:
   --help                      Show this message and exit.
 ```
 
+## Finetune
+
+:::{warning}
+**Experimental Feature**: The Finetuning Harness is experimental and may change in future releases. Future versions may introduce breaking changes without notice.
+:::
+
+The `nat finetune` command provides access to the finetuning harness for **in-situ reinforcement learning** of agentic LLM workflows. This enables iterative improvement of agents through experience, allowing models to learn from their interactions with environments, tools, and users.
+
+The finetuning process:
+1. Loads the configuration with finetuning settings
+2. Initializes the finetuning runner
+3. Runs evaluation to collect trajectories
+4. Submits trajectories for training
+5. Monitors training progress
+
+For detailed information on finetuning concepts, configuration, and extending the harness, see the [Finetuning Harness](../reference/finetuning/index.md) documentation.
+
+The `nat finetune --help` utility provides a brief overview of the command and its available options:
+
+```console
+$ nat finetune --help
+Usage: nat finetune [OPTIONS]
+
+  Run finetuning on a workflow using collected trajectories.
+
+Options:
+  --config_file FILE              Path to the configuration file containing
+                                  finetuning settings  [required]
+  --dataset FILE                  A json file with questions and ground truth
+                                  answers. This will override the dataset path
+                                  in the config file.
+  --result_json_path TEXT         A JSON path to extract the result from the
+                                  workflow. Use this when the workflow returns
+                                  multiple objects or a dictionary. For
+                                  example, '$.output' will extract the 'output'
+                                  field from the result.  [default: $]
+  --endpoint TEXT                 Use endpoint for running the workflow.
+                                  Example: http://localhost:8000/generate
+  --endpoint_timeout INTEGER      HTTP response timeout in seconds. Only
+                                  relevant if endpoint is specified.
+                                  [default: 300]
+  -o, --override <TEXT TEXT>...   Override config values (e.g., -o
+                                  finetuning.num_epochs 5)
+  --validation_dataset FILE       Validation dataset file path for periodic
+                                  validation
+  --validation_interval INTEGER   Run validation every N epochs  [default: 5]
+  --validation_config_file FILE   Optional separate config file for validation
+                                  runs
+  --help                          Show this message and exit.
+```
+
+### Options Description
+
+- **`--config_file`**: The main configuration file containing both the workflow configuration and finetuning settings. The file must include a `finetuning` section that defines the training parameters, trajectory builder, trainer adapter, and reward function.
+
+- **`--dataset`**: Path to a JSON file containing the training dataset with questions and ground truth answers. If provided, this will override the dataset path specified in the configuration file.
+
+- **`--result_json_path`**: A JSON path expression to extract the relevant result from the workflow output. This is useful when your workflow returns complex objects or dictionaries. The default value `$` uses the entire output.
+
+- **`--endpoint`**: Instead of running the workflow locally, you can specify an HTTP endpoint where the workflow is deployed. This is useful for distributed training scenarios.
+
+- **`--endpoint_timeout`**: When using the `--endpoint` option, this sets the maximum time (in seconds) to wait for a response from the remote service.
+
+- **`-o, --override`**: Override configuration values using dot notation. Multiple overrides can be specified.
+
+- **`--validation_dataset`**: Path to a separate validation dataset for periodic evaluation during training. This helps monitor generalization and detect overfitting.
+
+- **`--validation_interval`**: How often (in epochs) to run validation. Default is every 5 epochs.
+
+- **`--validation_config_file`**: An optional separate configuration file for validation runs. If not specified, the main config file is used for both training and validation.
+
+### Examples
+
+Basic finetuning with a configuration file:
+
+<!-- path-check-skip-begin -->
+```bash
+nat finetune --config_file=configs/finetune.yml
+```
+<!-- path-check-skip-end -->
+
+Override the number of training epochs:
+
+<!-- path-check-skip-begin -->
+```bash
+nat finetune --config_file=configs/finetune.yml -o finetuning.num_epochs 20
+```
+<!-- path-check-skip-end -->
+
+Run finetuning with validation monitoring:
+
+<!-- path-check-skip-begin -->
+```bash
+nat finetune --config_file=configs/finetune.yml \
+    --validation_dataset=data/validation.json \
+    --validation_interval=3
+```
+<!-- path-check-skip-end -->
+
+Use a remote endpoint for workflow execution:
+
+<!-- path-check-skip-begin -->
+```bash
+nat finetune --config_file=configs/finetune.yml \
+    --endpoint=http://localhost:8000/generate \
+    --endpoint_timeout=600
+```
+<!-- path-check-skip-end -->
+
 ## Optimize
 
 The `nat optimize` command provides automated hyperparameter tuning and prompt engineering for NeMo Agent toolkit workflows. It intelligently searches for the best combination of parameters based on the evaluation metrics you specify. The optimizer uses [Optuna](https://optuna.org/) for numerical hyperparameter optimization and a genetic algorithm (GA) for prompt optimization. Please reference the [NeMo Agent toolkit Optimizer Guide](../reference/optimizer.md) for a comprehensive overview of the optimizer capabilities and configuration.