diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2b6aec5a6..4203be293 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -169,4 +169,4 @@ jobs: - uses: jupyterlab/maintainer-tools/.github/actions/base-setup@v1 - uses: jupyterlab/maintainer-tools/.github/actions/check-links@v1 with: - ignore_links: "https://www.linkedin.com/.* https://fellowship.mlh.io/.* https://github.com/.*" + ignore_links: "https://www.linkedin.com/.* https://fellowship.mlh.io/.* https://github.com/.* https://amazon.com/.*" diff --git a/jupyterlab_git/__init__.py b/jupyterlab_git/__init__.py index 7e1762e25..78d7514c1 100644 --- a/jupyterlab_git/__init__.py +++ b/jupyterlab_git/__init__.py @@ -51,6 +51,19 @@ class JupyterLabGit(Configurable): config=True, ) + output_cleaning_command = Unicode( + "jupyter nbconvert", + help="Notebook cleaning command. Configurable by server admin.", + config=True, + ) + + # Extra options to pass to the cleaning tool + output_cleaning_options = Unicode( + "--ClearOutputPreprocessor.enabled=True --inplace", + help="Extra command-line options to pass to the cleaning tool.", + config=True, + ) + @default("credential_helper") def _credential_helper_default(self): return "cache --timeout=3600" diff --git a/jupyterlab_git/git.py b/jupyterlab_git/git.py index a4ae5e9e8..8c7d6c546 100644 --- a/jupyterlab_git/git.py +++ b/jupyterlab_git/git.py @@ -1182,6 +1182,52 @@ async def merge(self, branch: str, path: str) -> dict: return {"code": code, "command": " ".join(cmd), "message": error} return {"code": code, "message": output.strip()} + async def check_notebooks_with_outputs(self, path): + import nbformat, os + + code, stdout, _ = await self.__execute( + ["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"], cwd=path + ) + staged_files = stdout.splitlines() + notebooks = [f for f in staged_files if f.endswith(".ipynb")] + + dirty_notebooks = [] + + for nb_path in notebooks: + full_nb_path = os.path.join(path, nb_path) + try: + nb = nbformat.read(full_nb_path, as_version=nbformat.NO_CONVERT) + for cell in nb.get("cells", []): + if cell.get("cell_type") == "code" and cell.get("outputs"): + dirty_notebooks.append(nb_path) + break + except Exception: + pass + + return { + "notebooks_with_outputs": dirty_notebooks, + "has_outputs": len(dirty_notebooks) > 0, + } + + async def strip_notebook_outputs(self, notebooks: list, repo_path: str): + for nb_path in notebooks: + full_path = os.path.join(repo_path, nb_path) + + try: + cmd = shlex.split(self._config.output_cleaning_command) + options = shlex.split(self._config.output_cleaning_options) + + full_cmd = cmd + options + [full_path] + + # Run the cleaning command + subprocess.run(full_cmd, check=True) + + # Re-stage the cleaned notebook + subprocess.run(["git", "-C", repo_path, "add", full_path], check=True) + + except Exception as e: + print(f"Failed: {nb_path}: {e}") + async def commit(self, commit_msg, amend, path, author=None): """ Execute git commit command & return the result. diff --git a/jupyterlab_git/handlers.py b/jupyterlab_git/handlers.py index c6b270ac1..d463bf97d 100644 --- a/jupyterlab_git/handlers.py +++ b/jupyterlab_git/handlers.py @@ -599,6 +599,44 @@ async def post(self, path: str = ""): self.finish(json.dumps(body)) +class GitStripNotebooksHandler(GitHandler): + """Handler to strip outputs from notebooks in a repository.""" + + @tornado.web.authenticated + async def post(self, path: str = ""): + """ + POST request handler to strip outputs from notebooks. + """ + data = self.get_json_body() + notebooks = data.get("notebooks", []) + + try: + await self.git.strip_notebook_outputs(notebooks, self.url2localpath(path)) + self.set_status(200) + self.finish(json.dumps({"code": 0, "message": "Notebooks stripped"})) + except Exception as e: + self.set_status(500) + self.finish( + json.dumps( + { + "code": 1, + "message": f"Failed to strip notebook outputs: {str(e)}", + } + ) + ) + + +class GitCheckNotebooksHandler(GitHandler): + """ + Handler to check staged notebooks for outputs. + """ + + @tornado.web.authenticated + async def get(self, path: str = ""): + body = await self.git.check_notebooks_with_outputs(self.url2localpath(path)) + self.finish(json.dumps(body)) + + class GitUpstreamHandler(GitHandler): @tornado.web.authenticated async def post(self, path: str = ""): @@ -1182,6 +1220,8 @@ def setup_handlers(web_app): ("/stash_pop", GitStashPopHandler), ("/stash_apply", GitStashApplyHandler), ("/submodules", GitSubmodulesHandler), + ("/check_notebooks", GitCheckNotebooksHandler), + ("/strip_notebooks", GitStripNotebooksHandler), ] handlers = [ diff --git a/schema/plugin.json b/schema/plugin.json index c0dba9365..77f28cf69 100644 --- a/schema/plugin.json +++ b/schema/plugin.json @@ -82,6 +82,13 @@ "title": "Hide hidden file warning", "description": "If true, the warning popup when opening the .gitignore file without hidden files will not be displayed.", "default": false + }, + "clearOutputsBeforeCommit": { + "type": "boolean", + "title": "Clear outputs before commit", + "description": "If true, notebook outputs will be cleared before committing. If false, outputs are kept. If null, ask each time.", + "default": null, + "nullable": true } }, "jupyter.lab.shortcuts": [ diff --git a/src/__tests__/test-components/GitPanel.spec.tsx b/src/__tests__/test-components/GitPanel.spec.tsx index defda4230..4c0f0b7e9 100644 --- a/src/__tests__/test-components/GitPanel.spec.tsx +++ b/src/__tests__/test-components/GitPanel.spec.tsx @@ -188,6 +188,8 @@ describe('GitPanel', () => { it('should commit when commit message is provided', async () => { configSpy.mockResolvedValue({ options: commitUser }); + props.model.checkNotebooksForOutputs = jest.fn().mockResolvedValue([]); + await userEvent.type(screen.getAllByRole('textbox')[0], commitSummary); await userEvent.type( screen.getAllByRole('textbox')[1], @@ -223,6 +225,7 @@ describe('GitPanel', () => { it('should prompt for user identity if explicitly configured', async () => { configSpy.mockResolvedValue({ options: commitUser }); + props.model.checkNotebooksForOutputs = jest.fn().mockResolvedValue([]); props.settings = MockSettings(false, true) as any; renderResult.rerender(); @@ -245,6 +248,7 @@ describe('GitPanel', () => { it('should prompt for user identity if user.name is not set', async () => { configSpy.mockImplementation(mockConfigImplementation('user.email')); mockUtils.showDialog.mockResolvedValue(dialogValue); + props.model.checkNotebooksForOutputs = jest.fn().mockResolvedValue([]); await userEvent.type(screen.getAllByRole('textbox')[0], commitSummary); await userEvent.click(screen.getByRole('button', { name: 'Commit' })); @@ -261,6 +265,7 @@ describe('GitPanel', () => { it('should prompt for user identity if user.email is not set', async () => { configSpy.mockImplementation(mockConfigImplementation('user.name')); mockUtils.showDialog.mockResolvedValue(dialogValue); + props.model.checkNotebooksForOutputs = jest.fn().mockResolvedValue([]); await userEvent.type(screen.getAllByRole('textbox')[0], commitSummary); await userEvent.click(screen.getByRole('button', { name: 'Commit' })); @@ -280,6 +285,7 @@ describe('GitPanel', () => { configSpy.mockImplementation(mockConfigImplementation('user.email')); mockUtils.showDialog.mockResolvedValue(dialogValue); + props.model.checkNotebooksForOutputs = jest.fn().mockResolvedValue([]); await userEvent.type(screen.getAllByRole('textbox')[0], commitSummary); await userEvent.click(screen.getByRole('button', { name: 'Commit' })); @@ -298,6 +304,7 @@ describe('GitPanel', () => { renderResult.rerender(); configSpy.mockImplementation(mockConfigImplementation('user.name')); mockUtils.showDialog.mockResolvedValue(dialogValue); + props.model.checkNotebooksForOutputs = jest.fn().mockResolvedValue([]); await userEvent.type(screen.getAllByRole('textbox')[0], commitSummary); await userEvent.click(screen.getByRole('button', { name: 'Commit' })); diff --git a/src/components/GitPanel.tsx b/src/components/GitPanel.tsx index 61cd1e8b7..9cc0bfe0e 100644 --- a/src/components/GitPanel.tsx +++ b/src/components/GitPanel.tsx @@ -803,9 +803,66 @@ export class GitPanel extends React.Component { ): Promise => { const errorMsg = this.props.trans.__('Failed to commit changes.'); let id: string | null = notificationId ?? null; + try { const author = await this._hasIdentity(this.props.model.pathRepository); + const notebooksWithOutputs = + await this.props.model.checkNotebooksForOutputs(); + + const clearSetting = + this.props.settings.composite['clearOutputsBeforeCommit']; + if ( + notebooksWithOutputs.length > 0 && + (clearSetting === null || clearSetting === undefined) + ) { + const dialog = new Dialog({ + title: this.props.trans.__('Notebook outputs detected'), + checkbox: { + label: this.props.trans.__('Clear all outputs before committing?'), + checked: false + }, + buttons: [ + Dialog.cancelButton({ + label: this.props.trans.__('Keep Outputs & Commit') + }), + Dialog.okButton({ label: this.props.trans.__('Clean & Commit') }) + ], + defaultButton: 0 + }); + + const result = await dialog.launch(); + dialog.dispose(); + + if (result.button.label === this.props.trans.__('Cancel')) { + return; + } + const accepted = + result.button.label === this.props.trans.__('Clean & Commit'); + + // Remember the user’s choice if checkbox is checked + if (result?.isChecked) { + this.props.settings.set('clearOutputsBeforeCommit', accepted); + } + if (accepted) { + id = Notification.emit( + this.props.trans.__('Cleaning notebook outputs…'), + 'in-progress', + { autoClose: false } + ); + + await this.props.model.stripNotebooksOutputs(notebooksWithOutputs); + } + } else if (clearSetting === true) { + // Always clean before commit + id = Notification.emit( + this.props.trans.__('Cleaning notebook outputs…'), + 'in-progress', + { autoClose: false } + ); + await this.props.model.stripNotebooksOutputs(notebooksWithOutputs); + } + const notificationMsg = this.props.trans.__('Committing changes...'); if (id !== null) { Notification.update({ diff --git a/src/model.ts b/src/model.ts index fc961e6f8..d4222704c 100644 --- a/src/model.ts +++ b/src/model.ts @@ -743,6 +743,47 @@ export class GitExtension implements IGitExtension { await this.refresh(); } + /** + * Check staged notebooks for outputs. + * + * @returns A promise resolving to an array of notebook paths that have outputs + * + * @throws {Git.NotInRepository} If the current path is not a Git repository + * @throws {Git.GitResponseError} If the server response is not ok + * @throws {ServerConnection.NetworkError} If the request cannot be made + */ + async checkNotebooksForOutputs(): Promise { + const path = await this._getPathRepository(); + + return this._taskHandler.execute('git:check-notebooks', async () => { + const result = await requestAPI<{ notebooks_with_outputs: string[] }>( + URLExt.join(path, 'check_notebooks') + ); + return result.notebooks_with_outputs; + }); + } + + /** + * Strip outputs from the given staged notebooks. + * + * @param notebooks - Array of notebook paths to clean + * + * @returns A promise resolving when the operation completes + * + * @throws {Git.NotInRepository} If the current path is not a Git repository + * @throws {Git.GitResponseError} If the server response is not ok + * @throws {ServerConnection.NetworkError} If the request cannot be made + */ + async stripNotebooksOutputs(notebooks: string[]): Promise { + const path = await this._getPathRepository(); + + await this._taskHandler.execute('git:strip-notebooks', async () => { + await requestAPI(URLExt.join(path, 'strip_notebooks'), 'POST', { + notebooks + }); + }); + } + /** * Get (or set) Git configuration options. *