diff --git a/.gitignore b/.gitignore index 3faf7f2..6cad13e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ pdfkit.egg-info # Tests .tox .python-version + +# OS +.DS_Store diff --git a/HISTORY.rst b/HISTORY.rst index 7470f24..fc8726a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,5 +1,13 @@ Changelog --------- + +* `2.0.1` + * Fix security vulnerability CVE-2025-26240: Implement secure meta option + allowlist filtering for ``from_string`` to prevent Local File Inclusion + (LFI) and Server-Side Request Forgery (SSRF). Add + ``allow_unsafe_meta_tags`` option to override filtering when rendering + trusted content. + * `2.0.0` * Drop support for Python <= 3.7 * `1.0.0` diff --git a/README.rst b/README.rst index 2822f0c..0142719 100644 --- a/README.rst +++ b/README.rst @@ -135,7 +135,7 @@ You can specify external CSS files when converting files or strings using *css* css = ['example.css', 'example2.css'] pdfkit.from_file('file.html', options=options, css=css) -You can also pass any options through meta tags in your HTML: +You can also pass options through meta tags in your HTML: .. code-block:: python @@ -151,6 +151,19 @@ You can also pass any options through meta tags in your HTML: pdfkit.from_string(body, 'out.pdf') #with --page-size=Legal and --orientation=Landscape +Security (CVE-2025-26240) +------------------------- + +By default, ``pdfkit.from_string`` filters the meta tags against a safe allowlist of layout and formatting options (such as ``page-size``, ``orientation``, ``margin-top``, etc.) to prevent Local File Inclusion (LFI) and Server-Side Request Forgery (SSRF) vulnerabilities (CVE-2025-26240). + +If you are rendering untrusted HTML, the default safe parsing behavior protects your system from malicious parameters like ``--enable-local-file-access``, ``--post-file``, or ``--script``. + +If you are working with trusted HTML and explicitly need to allow potentially unsafe wkhtmltopdf command-line options via HTML meta tags, you can enable them by setting ``allow_unsafe_meta_tags=True``: + +.. code-block:: python + + pdfkit.from_string(body, 'out.pdf', allow_unsafe_meta_tags=True) + Configuration ------------- diff --git a/pdfkit/__init__.py b/pdfkit/__init__.py index 027c95c..56028a3 100644 --- a/pdfkit/__init__.py +++ b/pdfkit/__init__.py @@ -4,7 +4,7 @@ """ __author__ = 'Golovanov Stanislav' -__version__ = '2.0.0' +__version__ = '2.0.1' __license__ = 'MIT' from .pdfkit import PDFKit diff --git a/pdfkit/api.py b/pdfkit/api.py index 86d9656..9309697 100644 --- a/pdfkit/api.py +++ b/pdfkit/api.py @@ -52,7 +52,7 @@ def from_file(input, output_path=None, options=None, toc=None, cover=None, css=N def from_string(input, output_path=None, options=None, toc=None, cover=None, css=None, - configuration=None, cover_first=False, verbose=False): + configuration=None, cover_first=False, verbose=False, allow_unsafe_meta_tags=False): """ Convert given string or strings to PDF document @@ -65,12 +65,14 @@ def from_string(input, output_path=None, options=None, toc=None, cover=None, css :param configuration: (optional) instance of pdfkit.configuration.Configuration() :param cover_first: (optional) if True, cover always precedes TOC :param verbose: (optional) By default '--quiet' is passed to all calls, set this to False to get wkhtmltopdf output to stdout. + :param allow_unsafe_meta_tags: (optional) if True, allow unsafe options from HTML meta tags (not recommended, CVE-2025-26240). Returns: True on success """ r = PDFKit(input, 'string', options=options, toc=toc, cover=cover, css=css, - configuration=configuration, cover_first=cover_first, verbose=verbose) + configuration=configuration, cover_first=cover_first, verbose=verbose, + allow_unsafe_meta_tags=allow_unsafe_meta_tags) return r.to_pdf(output_path) diff --git a/pdfkit/pdfkit.py b/pdfkit/pdfkit.py index 88b3c92..809b8b9 100644 --- a/pdfkit/pdfkit.py +++ b/pdfkit/pdfkit.py @@ -2,9 +2,11 @@ import re import subprocess import sys +import warnings from collections import OrderedDict from .source import Source from .configuration import Configuration +from .security import is_safe_meta_option import io import codecs @@ -32,7 +34,8 @@ def __str__(self): return self.msg def __init__(self, url_or_file, type_, options=None, toc=None, cover=None, - css=None, configuration=None, cover_first=False, verbose=False): + css=None, configuration=None, cover_first=False, verbose=False, + allow_unsafe_meta_tags=False): self.source = Source(url_or_file, type_) self.configuration = (Configuration() if configuration is None @@ -42,6 +45,8 @@ def __init__(self, url_or_file, type_, options=None, toc=None, cover=None, except AttributeError: self.wkhtmltopdf = self.configuration.wkhtmltopdf + self.allow_unsafe_meta_tags = allow_unsafe_meta_tags + self.options = OrderedDict() if self.source.isString(): self.options.update(self._find_options_in_meta(url_or_file)) @@ -292,6 +297,15 @@ def _find_options_in_meta(self, content): if re.search('name=["\']%s' % self.configuration.meta_tag_prefix, x): name = re.findall('name=["\']%s([^"\']*)' % self.configuration.meta_tag_prefix, x)[0] - found[name] = re.findall('content=["\']([^"\']*)', x)[0] + value = re.findall('content=["\']([^"\']*)', x)[0] + if self.allow_unsafe_meta_tags or is_safe_meta_option(name): + found[name] = value + else: + warnings.warn( + 'Blocked unsafe meta tag option: {}. ' + 'To allow this, set allow_unsafe_meta_tags=True, but ' + 'be aware of the security risks (CVE-2025-26240).'.format(name), + RuntimeWarning + ) return found diff --git a/pdfkit/security.py b/pdfkit/security.py new file mode 100644 index 0000000..112b848 --- /dev/null +++ b/pdfkit/security.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +import warnings + +# Safe options allowed in HTML meta tags when parsing untrusted markup. +# Anything not in this allowlist is considered unsafe/untrusted and will be ignored +# unless 'allow_unsafe_meta_tags' is explicitly enabled. +ALLOWED_META_OPTIONS = frozenset([ + # Page size & orientation + 'page-size', + 'page-width', + 'page-height', + 'orientation', + + # Margins + 'margin-top', + 'margin-right', + 'margin-bottom', + 'margin-left', + + # Document properties + 'encoding', + 'dpi', + 'grayscale', + 'lowquality', + 'image-dpi', + 'image-quality', + 'title', + 'no-pdf-compression', + 'quiet', + + # Header options (pure text / formatting, no file or URL loading) + 'header-line', + 'header-spacing', + 'header-center', + 'header-left', + 'header-right', + 'header-font-name', + 'header-font-size', + + # Footer options (pure text / formatting, no file or URL loading) + 'footer-line', + 'footer-spacing', + 'footer-center', + 'footer-left', + 'footer-right', + 'footer-font-name', + 'footer-font-size', + + # Outline options + 'outline', + 'outline-depth', + 'no-outline', + + # TOC formatting options + 'toc-header-text', + 'toc-level-indentation', + 'toc-text-size-shrink', +]) + + +def is_safe_meta_option(option_name): + """ + Checks if an option is present in the ALLOWED_META_OPTIONS allowlist. + Normalizes option names by stripping any leading dashes and lowercasing. + + :param option_name: The name of the option to check. + :return: True if the option is safe, False otherwise. + """ + if not option_name: + return False + normalized = option_name.lower().lstrip('-') + return normalized in ALLOWED_META_OPTIONS diff --git a/tests/pdfkit-tests.py b/tests/pdfkit-tests.py index 969afbd..2be501b 100644 --- a/tests/pdfkit-tests.py +++ b/tests/pdfkit-tests.py @@ -492,5 +492,79 @@ def test_issue_169_quiet_boolean_True(self): output = r.to_pdf() self.assertEqual(output[:4].decode('utf-8'), '%PDF') + +class TestPDFKitSecurity(unittest.TestCase): + """Test security-related behavior, particularly CVE-2025-26240""" + + def test_default_meta_tags_behavior_blocks_unsafe_options(self): + body = """ + +
+ + + + + + + """ + import warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + r = pdfkit.PDFKit(body, 'string') + self.assertTrue(len(w) >= 3) + self.assertTrue(any("Blocked unsafe meta tag option: enable-local-file-access" in str(warn.message) for warn in w)) + self.assertTrue(any("Blocked unsafe meta tag option: post-file" in str(warn.message) for warn in w)) + self.assertTrue(any("Blocked unsafe meta tag option: script" in str(warn.message) for warn in w)) + + cmd = r.command() + self.assertNotIn('--enable-local-file-access', cmd) + self.assertNotIn('--post-file', cmd) + self.assertNotIn('--script', cmd) + self.assertIn('--page-size', cmd) + self.assertEqual(cmd[cmd.index('--page-size') + 1], 'Legal') + + def test_allow_unsafe_meta_tags_allows_everything(self): + body = """ + + + + + + + + """ + import warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + r = pdfkit.PDFKit(body, 'string', allow_unsafe_meta_tags=True) + self.assertEqual(len([warn for warn in w if "Blocked unsafe meta" in str(warn.message)]), 0) + + cmd = r.command() + self.assertIn('--enable-local-file-access', cmd) + self.assertIn('--post-file', cmd) + self.assertEqual(cmd[cmd.index('--post-file') + 1], '/etc/passwd') + self.assertIn('--page-size', cmd) + self.assertEqual(cmd[cmd.index('--page-size') + 1], 'Legal') + + def test_from_string_parameter_propagation(self): + body = """ + + + + + + """ + import warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + pdfkit.from_string(body, 'out.pdf') + self.assertTrue(any("Blocked unsafe meta tag option: post-file" in str(warn.message) for warn in w)) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + pdfkit.from_string(body, 'out.pdf', allow_unsafe_meta_tags=True) + self.assertEqual(len([warn for warn in w if "Blocked unsafe meta" in str(warn.message)]), 0) + + if __name__ == "__main__": unittest.main()