From 62e5d1be7a0ea1619da324b938fe583985259e07 Mon Sep 17 00:00:00 2001 From: Anas Khan <83116240+anxkhn@users.noreply.github.com> Date: Mon, 29 Jun 2026 07:29:55 +0530 Subject: [PATCH] feat(jinja): add b64decode/b64encode builtin filters Core SQLMesh shipped no builtin Jinja filters, so base64-encoded secrets (e.g. a BigQuery service-account key stored in an env var) could not be decoded directly in config YAML. Add b64decode/b64encode and register them on the shared environment factory so they are available both in config YAML and in models, mirroring ansible's filters. Closes #5754 Signed-off-by: Anas Khan <83116240+anxkhn@users.noreply.github.com> --- docs/guides/configuration.md | 10 ++++++++++ sqlmesh/utils/jinja.py | 32 +++++++++++++++++++++++++++++++- tests/utils/test_jinja.py | 26 +++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index fd8e8dd8cd..b80afa1388 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -170,6 +170,16 @@ The examples specify a Snowflake connection whose password is stored in an envir account: ``` + !!! tip "Base64-encoded secrets" + + If a secret is distributed base64-encoded in a single environment variable (for example a BigQuery service-account key), pipe the variable through the built-in `b64decode` filter to decode it to text inline: + + ```yaml + keyfile_json: {{ env_var('BIGQUERY_KEY_B64') | b64decode }} + ``` + + A matching `b64encode` filter is also available. Both return UTF-8 text, so they are intended for string/JSON secrets rather than arbitrary binary data. + === "Python" Python accesses environment variables via the `os` library's `environ` dictionary. diff --git a/sqlmesh/utils/jinja.py b/sqlmesh/utils/jinja.py index bd82cf225c..829981db1c 100644 --- a/sqlmesh/utils/jinja.py +++ b/sqlmesh/utils/jinja.py @@ -1,5 +1,6 @@ from __future__ import annotations +import base64 import importlib import json import re @@ -28,11 +29,40 @@ SQLMESH_JINJA_PACKAGE = "sqlmesh.utils.jinja" +def b64decode(value: t.Union[str, bytes]) -> str: + """Decode a base64-encoded value and return it as UTF-8 text. + + Intended for base64-encoded string/JSON secrets (for example a service-account + key stored in an environment variable), not arbitrary binary payloads. + """ + decoded = value.encode("utf-8") if isinstance(value, str) else value + return base64.b64decode(decoded).decode("utf-8") + + +def b64encode(value: t.Union[str, bytes]) -> str: + """Base64-encode a value and return the encoding as UTF-8 text. + + The input is treated as UTF-8 text, mirroring ``b64decode``; it is intended for + string/JSON secrets rather than arbitrary binary payloads. + """ + encoded = value.encode("utf-8") if isinstance(value, str) else value + return base64.b64encode(encoded).decode("utf-8") + + +def create_builtin_filters() -> t.Dict[str, t.Callable]: + return { + "b64decode": b64decode, + "b64encode": b64encode, + } + + def environment(**kwargs: t.Any) -> Environment: extensions = kwargs.pop("extensions", []) extensions.append("jinja2.ext.do") extensions.append("jinja2.ext.loopcontrols") - return Environment(extensions=extensions, **kwargs) + env = Environment(extensions=extensions, **kwargs) + env.filters.update(create_builtin_filters()) + return env ENVIRONMENT = environment() diff --git a/tests/utils/test_jinja.py b/tests/utils/test_jinja.py index 1cf7c1bf95..01eeb47412 100644 --- a/tests/utils/test_jinja.py +++ b/tests/utils/test_jinja.py @@ -1,6 +1,8 @@ from __future__ import annotations -from sqlmesh.utils import AttributeDict +from base64 import b64encode + +from sqlmesh.utils import AttributeDict, yaml from sqlmesh.utils.jinja import ( ENVIRONMENT, JinjaMacroRegistry, @@ -329,3 +331,25 @@ def test_macro_registry_to_expressions_sorted(): == "refs = {'orders': {'database': 'jaffle_shop', 'nested_list': ['a', 'b', 'c'], 'schema': 'main'}, 'payments': {'database': 'jaffle_shop', 'nested': {'baz': 'bing', 'foo': 'bar'}, 'schema': 'main'}}\n" "sources = {}" ) + + +def test_builtin_base64_filters(): + encoded = b64encode(b"secret").decode("utf-8") + + env = JinjaMacroRegistry().build_environment() + assert env.from_string("{{ value | b64decode }}").render(value=encoded) == "secret" + assert env.from_string("{{ 'secret' | b64encode }}").render() == encoded + assert env.from_string("{{ 'secret' | b64encode | b64decode }}").render() == "secret" + + # The same filters are available when rendering Jinja in config YAML files. + config = yaml.load(f'env_vars:\n TOKEN: "{{{{ "{encoded}" | b64decode }}}}"') + assert config == {"env_vars": {"TOKEN": "secret"}} + + +def test_builtin_b64decode_with_env_var(monkeypatch): + # Real-world use case: a base64-encoded secret stored in an environment variable + # is decoded inline in config YAML via env_var(...) piped through b64decode. + monkeypatch.setenv("SNOWFLAKE_PW_B64", b64encode(b"super-secret-pw").decode("utf-8")) + + config = yaml.load("password: \"{{ env_var('SNOWFLAKE_PW_B64') | b64decode }}\"") + assert config == {"password": "super-secret-pw"}