From e1fa556ee655d5aadaa92d237a3938f46d09ede3 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 29 Apr 2025 08:28:10 +0000 Subject: [PATCH 1/2] Add JS rewriting rule to ignore import function rewriting --- CHANGELOG.md | 4 ++++ src/zimscraperlib/rewriting/js.py | 1 + tests/rewriting/test_js_rewriting.py | 13 +++++++++++++ 3 files changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b2f89f..b06366a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Upgrade to wombat 3.8.11 (#256) +### Fixed + +- JS rewriting abusively rewrite import function (#255) + ## [5.1.1] - 2025-02-17 ### Changed diff --git a/src/zimscraperlib/rewriting/js.py b/src/zimscraperlib/rewriting/js.py index fa1c664..de9b6bc 100644 --- a/src/zimscraperlib/rewriting/js.py +++ b/src/zimscraperlib/rewriting/js.py @@ -186,6 +186,7 @@ def create_js_rules() -> list[TransformationRule]: # As the rule will match first, it will prevent next rule matching `import` to # be apply to `async import`. (re.compile(r"async\s+import\s*\("), m2str(lambda x: x)), + (re.compile(r"[^$.]\bimport\s*\([^)]*\)\s*\{"), m2str(lambda x: x)), # esm dynamic import, if found, mark as module ( re.compile(r"[^$.]\bimport\s*\("), diff --git a/tests/rewriting/test_js_rewriting.py b/tests/rewriting/test_js_rewriting.py index 9b9b926..48c278e 100644 --- a/tests/rewriting/test_js_rewriting.py +++ b/tests/rewriting/test_js_rewriting.py @@ -271,6 +271,12 @@ def wrap_import(text: str) -> str: import { Z } from "../../../path.js"; B = await import(somefile); + +class X { + import(a, b, c) { + await import (somefile); + } +} """, expected=""" import * from "../../../example.com/file.js" @@ -282,6 +288,12 @@ def wrap_import(text: str) -> str: import { Z } from "../../path.js"; B = await ____wb_rewrite_import__(import.meta.url, somefile); + +class X { + import(a, b, c) { + await ____wb_rewrite_import__ (import.meta.url, somefile); + } +} """, ), ImportTestContent( @@ -341,6 +353,7 @@ def test_import_rewrite(rewrite_import_content: ImportTestContent): "a.window.x = 5", " postMessage({'a': 'b'})", "simport(5);", + "import(e) {", "a.import(5);", "$import(5);", "async import(val) { ... }", From baad1c300860fd74d72244a5d1cf3dafad76fb5c Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 29 Apr 2025 09:18:15 +0000 Subject: [PATCH 2/2] Backport more changes from wabac.js --- CHANGELOG.md | 1 + src/zimscraperlib/rewriting/js.py | 4 ++- tests/rewriting/test_js_rewriting.py | 43 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b06366a..4e4df3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Upgrade to wombat 3.8.11 (#256) +- Backport changes in wabac.js around JS rewriting rules (#259) ### Fixed diff --git a/src/zimscraperlib/rewriting/js.py b/src/zimscraperlib/rewriting/js.py index de9b6bc..2faf56c 100644 --- a/src/zimscraperlib/rewriting/js.py +++ b/src/zimscraperlib/rewriting/js.py @@ -154,13 +154,15 @@ def create_js_rules() -> list[TransformationRule]: return [ # rewriting `eval(...)` - invocation (re.compile(r"(?:^|\s)\beval\s*\("), replace_prefix_from(eval_str, "eval")), + (re.compile(r"\([\w]+,\s*eval\)\("), m2str(lambda _: f" {eval_str}")), # rewriting `x = eval` - no invocation (re.compile(r"[=]\s*\beval\b(?![(:.$])"), replace("eval", "self.eval")), + (re.compile(r"var\s+self"), replace("var", "let")), # rewriting `.postMessage` -> `__WB_pmw(self).postMessage` (re.compile(r"\.postMessage\b\("), add_prefix(".__WB_pmw(self)")), # rewriting `location = ` to custom expression `(...).href =` assignement ( - re.compile(r"[^$.]?\s?\blocation\b\s*[=]\s*(?![\s\d=])"), + re.compile(r"(?:^|[^$.+*/%^-])\s?\blocation\b\s*[=]\s*(?![\s\d=])"), add_suffix_non_prop(check_loc), ), # rewriting `return this` diff --git a/tests/rewriting/test_js_rewriting.py b/tests/rewriting/test_js_rewriting.py index 48c278e..6ab7177 100644 --- a/tests/rewriting/test_js_rewriting.py +++ b/tests/rewriting/test_js_rewriting.py @@ -101,6 +101,40 @@ def test_js_rewrite_post_message(simple_js_rewriter: JsRewriter): ) +@pytest.mark.parametrize( + "raw_js,expected", + [ + pytest.param("x = eval; x(a);", "x = self.eval; x(a);", id="case1"), + pytest.param( + " eval(a)", + " WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; " + "return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); })" + ".eval(this, (function() { return arguments })(),a)", + id="case2", + ), + pytest.param( + "$eval = eval; $eval(a);", "$eval = self.eval; $eval(a);", id="case3" + ), + pytest.param( + "foo(a, eval(data));", + "foo(a, WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; " + "return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); })" + ".eval(this, (function() { return arguments })(),data));", + id="case4", + ), + pytest.param( + "return(1, eval)(data);", + "return WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; " + "return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); })" + ".eval(this, (function() { return arguments })(),data);", + id="case5", + ), + ], +) +def test_js_rewrite_evals(simple_js_rewriter: JsRewriter, raw_js: str, expected: str): + assert simple_js_rewriter.rewrite(raw_js) == expected + + class WrappedTestContent(ContentForTests): def __init__( @@ -173,11 +207,20 @@ def wrap_script(text: str) -> str: input_="this. location = 'http://example.com/'", expected="this. location = 'http://example.com/'", ), + WrappedTestContent( + input_="abc-location = http://example.com/", + expected="abc-location = http://example.com/", + ), + WrappedTestContent( + input_="func(location = 0)", + expected="func(location = 0)", + ), WrappedTestContent( input_="if (self.foo) { console.log('blah') }", expected="if (self.foo) { console.log('blah') }", ), WrappedTestContent(input_="window.x = 5", expected="window.x = 5"), + WrappedTestContent(input_=" var self ", expected=" let self "), ] ) def rewrite_wrapped_content(request: pytest.FixtureRequest):