mirror of https://github.com/python/peps
257 lines
18 KiB
HTML
257 lines
18 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 552 – Deterministic pycs | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0552/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 552 – Deterministic pycs | peps.python.org'>
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0552/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="Python Enhancement Proposals (PEPs)">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 552</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 552 – Deterministic pycs</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Benjamin Peterson <benjamin at python.org></dd>
|
||
<dt class="field-even">Status<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-odd">Type<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-even">Created<span class="colon">:</span></dt>
|
||
<dd class="field-even">04-Sep-2017</dd>
|
||
<dt class="field-odd">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-odd">3.7</dd>
|
||
<dt class="field-even">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-even">07-Sep-2017</dd>
|
||
<dt class="field-odd">Resolution<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><a class="reference external" href="https://mail.python.org/pipermail/python-dev/2017-September/149649.html">Python-Dev message</a></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#credits">Credits</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>This PEP proposes an extension to the pyc format to make it more deterministic.</p>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>A <a class="reference external" href="https://reproducible-builds.org/">reproducible build</a> is one where the same byte-for-byte output is generated
|
||
every time the same sources are built—even across different machines (naturally
|
||
subject to the requirement that they have rather similar environments
|
||
set up). Reproducibility is important for security. It is also a key concept in
|
||
content-based build systems such as <a class="reference external" href="https://bazel.build/">Bazel</a>, which are most effective when the
|
||
output files’ contents are a deterministic function of the input files’
|
||
contents.</p>
|
||
<p>The current Python pyc format is the marshaled code object of the module
|
||
prefixed by a <a class="reference external" href="https://docs.python.org/3/library/importlib.html#importlib.util.MAGIC_NUMBER">magic number</a>, the source timestamp, and the source file
|
||
size. The presence of a source timestamp means that a pyc is not a deterministic
|
||
function of the input file’s contents—it also depends on volatile metadata, the
|
||
mtime of the source. Thus, pycs are a barrier to proper reproducibility.</p>
|
||
<p>Distributors of Python code are currently stuck with the options of</p>
|
||
<ol class="arabic simple">
|
||
<li>not distributing pycs and losing the caching advantages</li>
|
||
<li>distributing pycs and losing reproducibility</li>
|
||
<li>carefully giving all Python source files a deterministic timestamp
|
||
(see, for example, <a class="reference external" href="https://github.com/python/cpython/pull/296">https://github.com/python/cpython/pull/296</a>)</li>
|
||
<li>doing a complicated mixture of 1. and 2. like generating pycs at installation
|
||
time</li>
|
||
</ol>
|
||
<p>None of these options are very attractive. This PEP proposes allowing the
|
||
timestamp to be replaced with a deterministic hash. The current timestamp
|
||
invalidation method will remain the default, though. Despite its nondeterminism,
|
||
timestamp invalidation works well for many workflows and usecases. The
|
||
hash-based pyc format can impose the cost of reading and hashing every source
|
||
file, which is more expensive than simply checking timestamps. Thus, for now, we
|
||
expect it to be used mainly by distributors and power use cases.</p>
|
||
<p>(Note there are other problems <a class="footnote-reference brackets" href="#frozensets" id="id1">[1]</a> <a class="footnote-reference brackets" href="#interning" id="id2">[2]</a> we do not
|
||
address here that can make pycs non-deterministic.)</p>
|
||
</section>
|
||
<section id="specification">
|
||
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
||
<p>The pyc header currently consists of 3 32-bit words. We will expand it to 4. The
|
||
first word will continue to be the magic number, versioning the bytecode and pyc
|
||
format. The second word, conceptually the new word, will be a bit field. The
|
||
interpretation of the rest of the header and invalidation behavior of the pyc
|
||
depends on the contents of the bit field.</p>
|
||
<p>If the bit field is 0, the pyc is a traditional timestamp-based pyc. I.e., the
|
||
third and forth words will be the timestamp and file size respectively, and
|
||
invalidation will be done by comparing the metadata of the source file with that
|
||
in the header.</p>
|
||
<p>If the lowest bit of the bit field is set, the pyc is a hash-based pyc. We call
|
||
the second lowest bit the <code class="docutils literal notranslate"><span class="pre">check_source</span></code> flag. Following the bit field is a
|
||
64-bit hash of the source file. We will use a <a class="reference external" href="https://131002.net/siphash/">SipHash</a> with a hardcoded key of
|
||
the contents of the source file. Another fast hash like MD5 or <a class="reference external" href="https://blake2.net/">BLAKE2</a> would
|
||
also work. We choose SipHash because Python already has a builtin implementation
|
||
of it from <a class="pep reference internal" href="../pep-0456/" title="PEP 456 – Secure and interchangeable hash algorithm">PEP 456</a>, although an interface that allows picking the SipHash key
|
||
must be exposed to Python. Security of the hash is not a concern, though we pass
|
||
over completely-broken hashes like MD5 to ease auditing of Python in controlled
|
||
environments.</p>
|
||
<p>When Python encounters a hash-based pyc, its behavior depends on the setting of
|
||
the <code class="docutils literal notranslate"><span class="pre">check_source</span></code> flag. If the <code class="docutils literal notranslate"><span class="pre">check_source</span></code> flag is set, Python will
|
||
determine the validity of the pyc by hashing the source file and comparing the
|
||
hash with the expected hash in the pyc. If the pyc needs to be regenerated, it
|
||
will be regenerated as a hash-based pyc again with the <code class="docutils literal notranslate"><span class="pre">check_source</span></code> flag
|
||
set.</p>
|
||
<p>For hash-based pycs with the <code class="docutils literal notranslate"><span class="pre">check_source</span></code> unset, Python will simply load the
|
||
pyc without checking the hash of the source file. The expectation in this case
|
||
is that some external system (e.g., the local Linux distribution’s package
|
||
manager) is responsible for keeping pycs up to date, so Python itself doesn’t
|
||
have to check. Even when validation is disabled, the hash field should be set
|
||
correctly, so out-of-band consistency checkers can verify the up-to-dateness of
|
||
the pyc. Note also that the <a class="pep reference internal" href="../pep-3147/" title="PEP 3147 – PYC Repository Directories">PEP 3147</a> edict that pycs without corresponding
|
||
source files not be loaded will still be enforced for hash-based pycs.</p>
|
||
<p>The programmatic APIs of <code class="docutils literal notranslate"><span class="pre">py_compile</span></code> and <code class="docutils literal notranslate"><span class="pre">compileall</span></code> will support
|
||
generation of hash-based pycs. Principally, <code class="docutils literal notranslate"><span class="pre">py_compile</span></code> will define a new
|
||
enumeration corresponding to all the available pyc invalidation modules:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">PycInvalidationMode</span><span class="p">(</span><span class="n">Enum</span><span class="p">):</span>
|
||
<span class="n">TIMESTAMP</span>
|
||
<span class="n">CHECKED_HASH</span>
|
||
<span class="n">UNCHECKED_HASH</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><code class="docutils literal notranslate"><span class="pre">py_compile.compile</span></code>, <code class="docutils literal notranslate"><span class="pre">compileall.compile_dir</span></code>, and
|
||
<code class="docutils literal notranslate"><span class="pre">compileall.compile_file</span></code> will all gain an <code class="docutils literal notranslate"><span class="pre">invalidation_mode</span></code> parameter,
|
||
which accepts a value of the <code class="docutils literal notranslate"><span class="pre">PycInvalidationMode</span></code> enumeration.</p>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">compileall</span></code> tool will be extended with a command new option,
|
||
<code class="docutils literal notranslate"><span class="pre">--invalidation-mode</span></code> to generate hash-based pycs with and without the
|
||
<code class="docutils literal notranslate"><span class="pre">check_source</span></code> bit set. <code class="docutils literal notranslate"><span class="pre">--invalidation-mode</span></code> will be a tristate option
|
||
taking values <code class="docutils literal notranslate"><span class="pre">timestamp</span></code> (the default), <code class="docutils literal notranslate"><span class="pre">checked-hash</span></code>, and
|
||
<code class="docutils literal notranslate"><span class="pre">unchecked-hash</span></code> corresponding to the values of <code class="docutils literal notranslate"><span class="pre">PycInvalidationMode</span></code>.</p>
|
||
<p><code class="docutils literal notranslate"><span class="pre">importlib.util</span></code> will be extended with a <code class="docutils literal notranslate"><span class="pre">source_hash(source)</span></code> function that
|
||
computes the hash used by the pyc writing code for a bytestring <strong>source</strong>.</p>
|
||
<p>Runtime configuration of hash-based pyc invalidation will be facilitated by a
|
||
new <code class="docutils literal notranslate"><span class="pre">--check-hash-based-pycs</span></code> interpreter option. This is a tristate option,
|
||
which may take 3 values: <code class="docutils literal notranslate"><span class="pre">default</span></code>, <code class="docutils literal notranslate"><span class="pre">always</span></code>, and <code class="docutils literal notranslate"><span class="pre">never</span></code>. The default
|
||
value, <code class="docutils literal notranslate"><span class="pre">default</span></code>, means the <code class="docutils literal notranslate"><span class="pre">check_source</span></code> flag in hash-based pycs
|
||
determines invalidation as described above. <code class="docutils literal notranslate"><span class="pre">always</span></code> causes the interpreter to
|
||
hash the source file for invalidation regardless of value of <code class="docutils literal notranslate"><span class="pre">check_source</span></code>
|
||
bit. <code class="docutils literal notranslate"><span class="pre">never</span></code> causes the interpreter to always assume hash-based pycs are
|
||
valid. When <code class="docutils literal notranslate"><span class="pre">--check-hash-based-pycs=never</span></code> is in effect, unchecked hash-based
|
||
pycs will be regenerated as unchecked hash-based pycs. Timestamp-based pycs are
|
||
unaffected by <code class="docutils literal notranslate"><span class="pre">--check-hash-based-pycs</span></code>.</p>
|
||
</section>
|
||
<section id="references">
|
||
<h2><a class="toc-backref" href="#references" role="doc-backlink">References</a></h2>
|
||
<aside class="footnote-list brackets">
|
||
<aside class="footnote brackets" id="frozensets" role="doc-footnote">
|
||
<dt class="label" id="frozensets">[<a href="#id1">1</a>]</dt>
|
||
<dd><a class="reference external" href="http://benno.id.au/blog/2013/01/15/python-determinism">http://benno.id.au/blog/2013/01/15/python-determinism</a></aside>
|
||
<aside class="footnote brackets" id="interning" role="doc-footnote">
|
||
<dt class="label" id="interning">[<a href="#id2">2</a>]</dt>
|
||
<dd><a class="reference external" href="http://bugzilla.opensuse.org/show_bug.cgi?id=1049186">http://bugzilla.opensuse.org/show_bug.cgi?id=1049186</a></aside>
|
||
</aside>
|
||
</section>
|
||
<section id="credits">
|
||
<h2><a class="toc-backref" href="#credits" role="doc-backlink">Credits</a></h2>
|
||
<p>The author would like to thank Gregory P. Smith, Christian Heimes, and Steve
|
||
Dower for useful conversations on the topic of this PEP.</p>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document has been placed in the public domain.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0552.rst">https://github.com/python/peps/blob/main/peps/pep-0552.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0552.rst">2023-09-09 17:39:29 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#credits">Credits</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0552.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |