diff --git a/.github/workflows/dependabot-pr-cleaner.yaml b/.github/workflows/dependabot-pr-cleaner.yaml new file mode 100644 index 000000000..08f5ef852 --- /dev/null +++ b/.github/workflows/dependabot-pr-cleaner.yaml @@ -0,0 +1,87 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Dependabot PR cleaner +run-name: >- + Clean up description of PR ${{github.event.pull_request.number}} on + ${{github.ref_name}} by @${{github.actor}} + +# Dependabot's PR descriptions are written in HTML and contain repeated parts +# that bloat git histories. This converts them to Markdown & cleans them up. + +on: + pull_request_target: + types: + - opened + - synchronize + - reopened + + workflow_dispatch: + inputs: + pr-number: + description: 'The PR number of the PR to clean:' + required: true + +permissions: read-all + +jobs: + clean-pr-description: + if: >- + ${{github.actor == 'dependabot[bot]' && + github.repository_owner == 'quantumlib'}} + name: Clean PR description + runs-on: ubuntu-slim + timeout-minutes: 5 + permissions: + contents: read + pull-requests: write + env: + GH_REPO: ${{github.repository}} + PR_NUMBER: ${{inputs.pr-number || github.event.pull_request.number}} + steps: + - name: Set up Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v5 + with: + cache: pip + + - name: Install dependencies + run: pip install html2text==2025.4.15 + + - name: Get the PR description body + env: + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh pr view ${{env.PR_NUMBER}} --json body --jq .body > body.txt + + - name: Clean up the description and convert it to Markdown + shell: python + run: | + import html2text + + with open("body.txt", "r", encoding="utf-8") as f: + content = f.read() + + # Delete everything starting with the commits list onward. + commits_start_re = r"
\s*Commits" + content = re.split(commits_start_re, content, flags=re.IGNORECASE)[0] + + converter = html2text.HTML2Text() + markdown_content = converter.handle(content.strip()).strip() + + with open("new-body.txt", "w", encoding="utf-8") as f: + f.write(markdown_content) + + - name: Write the description back to the PR + env: + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh pr edit ${{env.PR_NUMBER}} --body-file new-body.txt