mirror of https://github.com/python/peps
1490 lines
100 KiB
HTML
1490 lines
100 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 458 – Secure PyPI downloads with signed repository metadata | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0458/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 458 – Secure PyPI downloads with signed repository metadata | peps.python.org'>
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0458/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="Python Enhancement Proposals (PEPs)">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 458</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 458 – Secure PyPI downloads with signed repository metadata</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Trishank Karthik Kuppusamy <karthik at trishank.com>,
|
||
Vladimir Diaz <vladimir.diaz at nyu.edu>,
|
||
Marina Moore <mm9693 at nyu.edu>,
|
||
Lukas Puehringer <lukas.puehringer at nyu.edu>,
|
||
Joshua Lock <jlock at vmware.com>,
|
||
Lois Anne DeLong <lad278 at nyu.edu>,
|
||
Justin Cappos <jcappos at nyu.edu></dd>
|
||
<dt class="field-even">Sponsor<span class="colon">:</span></dt>
|
||
<dd class="field-even">Alyssa Coghlan <ncoghlan at gmail.com></dd>
|
||
<dt class="field-odd">BDFL-Delegate<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Donald Stufft <donald at stufft.io></dd>
|
||
<dt class="field-even">Discussions-To<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="https://discuss.python.org/t/pep-458-secure-pypi-downloads-with-package-signing/2648">Discourse thread</a></dd>
|
||
<dt class="field-odd">Status<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Normative proposal accepted for implementation">Accepted</abbr></dd>
|
||
<dt class="field-even">Type<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-odd">Topic<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><a class="reference external" href="../topic/packaging/">Packaging</a></dd>
|
||
<dt class="field-even">Created<span class="colon">:</span></dt>
|
||
<dd class="field-even">27-Sep-2013</dd>
|
||
<dt class="field-odd">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-odd">06-Jan-2019, 13-Nov-2019</dd>
|
||
<dt class="field-even">Resolution<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="https://discuss.python.org/t/pep-458-secure-pypi-downloads-with-package-signing/2648/115">Discourse message</a></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#proposed-tuf-integration">Proposed TUF Integration</a></li>
|
||
<li><a class="reference internal" href="#non-goals">Non-goals</a></li>
|
||
<li><a class="reference internal" href="#pep-status">PEP Status</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#threat-model">Threat Model</a></li>
|
||
<li><a class="reference internal" href="#definitions">Definitions</a></li>
|
||
<li><a class="reference internal" href="#overview-of-tuf">Overview of TUF</a></li>
|
||
<li><a class="reference internal" href="#integrating-pypi-with-tuf">Integrating PyPI with TUF</a><ul>
|
||
<li><a class="reference internal" href="#what-additional-repository-files-are-required-on-pypi">What Additional Repository Files are Required on PyPI?</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#pypi-and-tuf-metadata">PyPI and TUF Metadata</a><ul>
|
||
<li><a class="reference internal" href="#signing-metadata-and-repository-management">Signing Metadata and Repository Management</a></li>
|
||
<li><a class="reference internal" href="#how-to-establish-initial-trust-in-the-pypi-root-keys">How to Establish Initial Trust in the PyPI Root Keys</a></li>
|
||
<li><a class="reference internal" href="#minimum-security-model">Minimum Security Model</a></li>
|
||
<li><a class="reference internal" href="#metadata-expiry-times">Metadata Expiry Times</a></li>
|
||
<li><a class="reference internal" href="#metadata-scalability">Metadata Scalability</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#pypi-and-key-requirements">PyPI and Key Requirements</a><ul>
|
||
<li><a class="reference internal" href="#number-and-type-of-keys-recommended">Number and Type Of Keys Recommended</a></li>
|
||
<li><a class="reference internal" href="#managing-online-keys">Managing online keys</a></li>
|
||
<li><a class="reference internal" href="#managing-offline-keys">Managing offline keys</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#how-should-metadata-be-generated">How Should Metadata be Generated?</a><ul>
|
||
<li><a class="reference internal" href="#consistent-snapshots">Consistent Snapshots</a></li>
|
||
<li><a class="reference internal" href="#producing-consistent-snapshots">Producing Consistent Snapshots</a></li>
|
||
<li><a class="reference internal" href="#cleaning-up-old-metadata">Cleaning up old metadata</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#revoking-trust-in-projects-and-distributions">Revoking Trust in Projects and Distributions</a></li>
|
||
<li><a class="reference internal" href="#key-compromise-analysis">Key Compromise Analysis</a><ul>
|
||
<li><a class="reference internal" href="#in-the-event-of-a-key-compromise">In the Event of a Key Compromise</a></li>
|
||
<li><a class="reference internal" href="#auditing-snapshots">Auditing Snapshots</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#managing-future-changes-to-the-update-process">Managing Future Changes to the Update Process</a><ul>
|
||
<li><a class="reference internal" href="#hash-algorithm-transition-plan">Hash Algorithm Transition Plan</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#appendix-a-repository-attacks-prevented-by-tuf">Appendix A: Repository Attacks Prevented by TUF</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#acknowledgements">Acknowledgements</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>This PEP describes changes to the PyPI infrastructure that are needed to ensure
|
||
that users get valid packages from PyPI. These changes should have minimal
|
||
impact on other parts of the ecosystem. The PEP focuses on communication between
|
||
PyPI and users, and so does not require any action by package developers.
|
||
Developers will upload packages using the current process, and PyPI will
|
||
automatically generate signed repository metadata for these packages.</p>
|
||
<p>In order for the security mechanism to be
|
||
effective, additional work will need to be done by PyPI consumers (like pip) to
|
||
verify the signatures and metadata provided by PyPI. This verification can be
|
||
transparent to users (unless it fails) and provides an automatic security
|
||
mechanism. There is documentation for how to consume TUF metadata in the TUF
|
||
repository. However, changes to PyPI consumers are not a pre-requisite for
|
||
publishing the metadata from PyPI, and can be done
|
||
according to the timelines and priorities of individual projects.</p>
|
||
</section>
|
||
<section id="proposed-tuf-integration">
|
||
<h2><a class="toc-backref" href="#proposed-tuf-integration" role="doc-backlink">Proposed TUF Integration</a></h2>
|
||
<p>This PEP proposes how The Update Framework <a class="footnote-reference brackets" href="#id69" id="id1">[2]</a> (<a class="reference external" href="https://www.linuxfoundation.org/cloud-containers-virtualization/2017/10/cncf-host-two-security-projects-notary-tuf-specification/">TUF</a>) should be integrated with the
|
||
Python Package Index (PyPI <a class="footnote-reference brackets" href="#id68" id="id2">[1]</a>). TUF was designed to be a flexible
|
||
security add-on to a software updater or package manager. A full implementation
|
||
of the framework
|
||
integrates best security practices, such as separating role responsibilities,
|
||
adopting the many-man rule for signing packages, keeping signing keys offline,
|
||
and revocation of expired or compromised signing keys. As a result, attackers
|
||
would need to steal multiple signing keys, which are stored independently,
|
||
in order to compromise the role responsible for specifying a repository’s available
|
||
files. Or, alternatively, a role
|
||
responsible for indicating the latest snapshot of the repository may also have to be
|
||
compromised.</p>
|
||
<p>The initial integration proposed in this PEP will allow modern package managers,
|
||
such as pip <a class="footnote-reference brackets" href="#id70" id="id4">[3]</a>, to be more secure against attacks on PyPI mirrors and PyPI’s
|
||
own content distribution network, and to better protect users from such attacks.
|
||
Specifically, this PEP describes how PyPI processes
|
||
should be adapted to generate and incorporate TUF metadata (i.e., the minimum
|
||
security model). This minimum security model supports verification of PyPI
|
||
distributions that are signed with keys stored on PyPI. Distributions that are
|
||
uploaded by developers are signed by PyPI, requiring no action from developers
|
||
(other than uploading the distribution), and are immediately available for
|
||
download. The minimum security model also minimizes PyPI administrative
|
||
responsibilities by automating much of the signing process.</p>
|
||
<p>There is no discussion in <em>this</em> PEP of support for project distributions that
|
||
are signed by developers (maximum security model). This possible future extension
|
||
is covered in detail in <a class="pep reference internal" href="../pep-0480/" title="PEP 480 – Surviving a Compromise of PyPI: End-to-end signing of packages">PEP 480</a>. The maximum security model requires more PyPI
|
||
administrative work (though no added work for clients), and also proposes
|
||
an easy-to-use key management solution for developers/publishers, ideas on how
|
||
to interface with a potential future build farm on PyPI infrastructure, and the
|
||
feasibility of end-to-end signing.</p>
|
||
<p>While it does provide implementation recommendations, this PEP does not
|
||
prescribe exactly how package managers, such as pip, should be adapted
|
||
to install or update projects from PyPI with TUF metadata. Package managers
|
||
interested in adopting TUF on the client side may consult its <a class="reference external" href="https://theupdateframework.readthedocs.io/en/stable/api/tuf.ngclient.html">library
|
||
documentation</a>, which was created for this purpose.</p>
|
||
</section>
|
||
<section id="non-goals">
|
||
<h2><a class="toc-backref" href="#non-goals" role="doc-backlink">Non-goals</a></h2>
|
||
<p>This PEP does not eliminate any existing features from PyPI. In particular, it
|
||
does not replace existing support for OpenPGP signatures. Developers can continue
|
||
to upload detached OpenPGP signatures along with distributions. In the future,
|
||
<a class="pep reference internal" href="../pep-0480/" title="PEP 480 – Surviving a Compromise of PyPI: End-to-end signing of packages">PEP 480</a> may allow developers to directly sign TUF metadata using their OpenPGP keys.</p>
|
||
</section>
|
||
<section id="pep-status">
|
||
<h2><a class="toc-backref" href="#pep-status" role="doc-backlink">PEP Status</a></h2>
|
||
<p>Due to the amount of work required to implement this PEP, in early
|
||
2019 it was deferred until appropriate funding could be secured to
|
||
implement the PEP. The Python Software Foundation secured this funding
|
||
<a class="footnote-reference brackets" href="#id86" id="id6">[22]</a> and new PEP coauthors restarted PEP <a class="reference external" href="https://discuss.python.org/t/pep-458-secure-pypi-downloads-with-package-signing/2648/">discussion</a>.</p>
|
||
</section>
|
||
<section id="motivation">
|
||
<h2><a class="toc-backref" href="#motivation" role="doc-backlink">Motivation</a></h2>
|
||
<p>Attacks on software repositories are common, even in organizations with very
|
||
good security <a class="reference external" href="https://github.com/theupdateframework/pip/wiki/Attacks-on-software-repositories">practices</a>. The resulting repository compromise allows an
|
||
attacker to edit all files stored on the repository and sign these files using
|
||
any keys stored on the repository (online keys). In many signing schemes (like
|
||
TLS), this access allows the attacker to replace files on the repository and
|
||
make it look like these files are coming from PyPI. Without a way to revoke and
|
||
replace the trusted private key, it is very challenging to recover from a
|
||
repository compromise. In addition to the dangers of repository compromise,
|
||
software repositories are vulnerable to an attacker on the network (MITM)
|
||
intercepting and changing files. These and other attacks on software
|
||
repositories are detailed <a class="reference external" href="https://theupdateframework.github.io/security.html">here</a>.</p>
|
||
<p>This PEP, together with the follow-up proposal in <a class="pep reference internal" href="../pep-0480/" title="PEP 480 – Surviving a Compromise of PyPI: End-to-end signing of packages">PEP 480</a>, aims to protect users
|
||
of PyPI from compromises of the integrity, consistency, and freshness properties
|
||
of PyPI packages, and enhances compromise resilience by mitigating key risk and
|
||
providing mechanisms to recover from a compromise of PyPI or its signing keys.</p>
|
||
<p>On January 5, 2013, the Python Software Foundation (PSF) announced that <a class="footnote-reference brackets" href="#id71" id="id10">[4]</a> a security
|
||
breach had occurred on the
|
||
python.org wikis for Python and Jython. As a result, all of the wiki data was destroyed.
|
||
Fortunately, the PyPI infrastructure was not affected by this breach.
|
||
However, the incident is a reminder that PyPI needed to take defensive steps to
|
||
protect users as much as possible in the event of a compromise. Attacks on
|
||
software repositories happen all the time <a class="footnote-reference brackets" href="#id72" id="id11">[5]</a>. The PSF must accept the
|
||
possibility of security breaches and prepare PyPI accordingly because it is a
|
||
valuable resource used by thousands, if not millions, of people.</p>
|
||
<p>Before the wiki attack, PyPI used MD5 hashes to tell package managers, such as
|
||
pip, whether or not a distribution file was corrupted in transit. However, the absence
|
||
of SSL made it hard for package managers to verify transport integrity to PyPI.
|
||
It was therefore easy to launch a man-in-the-middle attack between pip and
|
||
PyPI, and arbitrarily change the content of distributions. As a result, users could be tricked into
|
||
installing malicious distributions. After the wiki
|
||
attack, several steps were proposed (some of which were implemented) to deliver
|
||
a much higher level of security than was previously the case. These steps included
|
||
requiring SSL to
|
||
communicate with PyPI <a class="footnote-reference brackets" href="#id73" id="id12">[6]</a>, restricting project names <a class="footnote-reference brackets" href="#id74" id="id13">[7]</a>, and migrating from
|
||
MD5 to SHA-2 hashes <a class="footnote-reference brackets" href="#id75" id="id14">[8]</a>.</p>
|
||
<p>Though necessary, these steps are insufficient to protect distributions because attacks are still
|
||
possible through other avenues. For example, a public mirror is trusted to
|
||
honestly mirror PyPI, but some mirrors may misbehave, whether by accident or through
|
||
malicious intervention.
|
||
Package managers such as pip are supposed to use signatures from PyPI to verify
|
||
distribution files downloaded from a <a class="pep reference internal" href="../pep-0381/" title="PEP 381 – Mirroring infrastructure for PyPI">public mirror</a>, but none are known to actually
|
||
do so <a class="footnote-reference brackets" href="#id76" id="id15">[10]</a>. Therefore, it would be wise to add more security measures to
|
||
detect attacks from public mirrors or content delivery networks <a class="footnote-reference brackets" href="#id77" id="id16">[11]</a> (CDNs).</p>
|
||
<p>Even though official mirrors have been <a class="pep reference internal" href="../pep-0449/" title="PEP 449 – Removal of the PyPI Mirror Auto Discovery and Naming Scheme">deprecated on PyPI</a>, a
|
||
wide variety of other attack vectors on package managers remain <a class="footnote-reference brackets" href="#id78" id="id17">[13]</a>. These attacks
|
||
can crash client systems, cause obsolete distributions to be installed, or even
|
||
allow an attacker to execute arbitrary code. In <a class="reference external" href="https://mail.python.org/pipermail/distutils-sig/2013-September/022755.html">September 2013</a>, a post was
|
||
made to the Distutils mailing list showing that the latest version of pip (at
|
||
the time) was susceptible to such attacks, and how TUF could protect users
|
||
against them <a class="footnote-reference brackets" href="#id79" id="id18">[14]</a>. Specifically, testing was done to see how pip would
|
||
respond to these attacks with and without TUF. Attacks tested included replay
|
||
and freeze, arbitrary installation, slow retrieval, and endless data. The post
|
||
also included a demonstration of how pip would respond if PyPI were
|
||
compromised.</p>
|
||
<p>To provide compromise resilient protection of PyPI, this PEP proposes the use of
|
||
The Update Framework <a class="footnote-reference brackets" href="#id69" id="id20">[2]</a> (TUF). TUF provides protection from a variety of
|
||
attacks on software update systems, while also providing mechanisms to recover
|
||
from a repository compromise. TUF has been used in production by a number of
|
||
organizations, including use in Cloud Native Computing Foundation’s Notary
|
||
service, which provides the infrastructure for container image signing in Docker
|
||
Registry. The TUF specification has been the subject of three independent
|
||
security <a class="reference external" href="https://theupdateframework.github.io/audits.html">audits</a>.</p>
|
||
<p>The scope of <em>this</em> PEP is protecting users from compromises of PyPI mirrors,
|
||
and PyPI’s own TLS termination and content distribution infrastructure.
|
||
Protection from compromises of PyPI itself is discussed in <a class="pep reference internal" href="../pep-0480/" title="PEP 480 – Surviving a Compromise of PyPI: End-to-end signing of packages">PEP 480</a>.</p>
|
||
</section>
|
||
<section id="threat-model">
|
||
<h2><a class="toc-backref" href="#threat-model" role="doc-backlink">Threat Model</a></h2>
|
||
<p>The threat model assumes the following:</p>
|
||
<ul class="simple">
|
||
<li>Offline keys are safe and securely stored.</li>
|
||
<li>Attackers <em>cannot</em> compromise PyPI’s trusted keys stored online.</li>
|
||
<li>Attackers can respond to client requests.</li>
|
||
</ul>
|
||
<p>An attacker is considered successful if it can cause a client to install (or
|
||
leave installed) something other than the most up-to-date version of a
|
||
software distribution file. If the attacker is preventing the installation
|
||
of updates, they do not want clients to realize there is anything wrong.</p>
|
||
<p>This threat model describes the minimum security model. The maximum security
|
||
model described in <a class="pep reference internal" href="../pep-0480/" title="PEP 480 – Surviving a Compromise of PyPI: End-to-end signing of packages">PEP 480</a> also assumes that attackers can compromise PyPI’s
|
||
online keys.</p>
|
||
</section>
|
||
<section id="definitions">
|
||
<h2><a class="toc-backref" href="#definitions" role="doc-backlink">Definitions</a></h2>
|
||
<p>The keywords “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”,
|
||
“SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be
|
||
interpreted as described in <span class="target" id="index-0"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc2119.html"><strong>RFC 2119</strong></a>.</p>
|
||
<p>This PEP focuses only on integrating TUF into PyPI. However, the reader is
|
||
encouraged to review TUF design principles <a class="footnote-reference brackets" href="#id69" id="id22">[2]</a> and SHOULD be
|
||
familiar with the TUF specification <a class="footnote-reference brackets" href="#id81" id="id23">[16]</a>.</p>
|
||
<p>The following terms used in this PEP are defined in the Python Packaging
|
||
Glossary <a class="footnote-reference brackets" href="#id82" id="id24">[17]</a>: <em>project</em>, <em>release</em>, <em>distribution</em>.</p>
|
||
<p>Additional terms used in this PEP are defined as follows:</p>
|
||
<ul class="simple">
|
||
<li>Role: TUF specifies one <em>root</em> role and multiple other roles to which the
|
||
<em>root</em> role delegates responsibilities, directly or indirectly. The term
|
||
<em>top-level</em> role refers to the <em>root</em> role and any role specified directly by
|
||
the <em>root</em> role, i.e. <em>timestamp</em>, <em>snapshot</em> and <em>targets</em> roles. Each role
|
||
has a single metadata file that it is trusted to provide.</li>
|
||
<li>Distribution file: A versioned archive file that contains Python packages,
|
||
modules, and other resource files that are used to distribute a release. The
|
||
terms <em>distribution file</em>, <em>distribution package</em> <a class="footnote-reference brackets" href="#id82" id="id25">[17]</a>, or simply
|
||
<em>distribution</em> or <em>package</em> may be used interchangeably in this PEP.</li>
|
||
<li>Simple index: The HTML page that contains internal links to distribution
|
||
files.</li>
|
||
<li>Target files: As a rule of thumb, target files are all files on PyPI whose
|
||
integrity should be guaranteed with TUF. Typically, this includes
|
||
distribution files and PyPI metadata, such as simple indices.</li>
|
||
<li>Metadata: Metadata are signed files that describe roles, other metadata, and
|
||
target files. If not specified otherwise metadata means TUF-specific
|
||
metadata.</li>
|
||
<li>Repository: A repository is a source for named metadata and target
|
||
files. Clients request metadata and target files stored on a repository.</li>
|
||
<li>Consistent snapshot: A set of TUF metadata and target files that capture the
|
||
complete state of all projects on PyPI as they existed at some fixed point in
|
||
time.</li>
|
||
<li>Developer: Either the owner or maintainer of a project who is allowed to
|
||
update the TUF metadata, as well as target files for a project.</li>
|
||
<li>Online key: A private cryptographic key that MUST be stored on the PyPI
|
||
server infrastructure. This is usually to allow automated signing with the
|
||
key. However, an attacker who compromises the PyPI infrastructure will be
|
||
able to read these keys.</li>
|
||
<li>Offline key: A private cryptographic key that MUST be stored independent of
|
||
the PyPI server infrastructure. This prevents automated signing with the
|
||
key. An attacker who compromises the PyPI infrastructure will not be able to
|
||
immediately read these keys.</li>
|
||
<li>Threshold signature scheme: A role can increase its resilience to key
|
||
compromises by specifying that at least t out of n keys are REQUIRED to sign
|
||
its metadata. A compromise of t-1 keys is insufficient to compromise the
|
||
role itself. Saying that a role requires (t, n) keys denotes the threshold
|
||
signature property.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="overview-of-tuf">
|
||
<h2><a class="toc-backref" href="#overview-of-tuf" role="doc-backlink">Overview of TUF</a></h2>
|
||
<p>At its highest level, TUF provides applications with a secure method for
|
||
knowing about and obtaining new versions of files. On the
|
||
surface, this all sounds simple. The basic steps for updating applications are:</p>
|
||
<ul class="simple">
|
||
<li>Knowing an update exists.</li>
|
||
<li>Downloading a correct copy of the latest version of the updated file.</li>
|
||
</ul>
|
||
<p>The problem is that updating applications is only simple when there are no
|
||
malicious activities in the picture. If an attacker is trying to interfere with
|
||
these seemingly simple steps, there is plenty they can do.</p>
|
||
<p>Assume a software updater takes the approach of most systems (at least the ones
|
||
that try to be secure). It downloads both the file it wants and a cryptographic
|
||
signature of the file. The software updater already knows which key it trusts
|
||
to make the signature. It checks that the signature is correct and was made by
|
||
this trusted key. Unfortunately, the software updater is still at risk in many
|
||
ways, including the following scenarios:</p>
|
||
<ul class="simple">
|
||
<li>An attacker keeps giving the software updater the same update file, so it
|
||
never realizes there is an update.</li>
|
||
<li>An attacker gives the software updater an older, insecure version of a file
|
||
that it already has, so it downloads that one and blindly uses it thinking it
|
||
is newer.</li>
|
||
<li>An attacker gives the software updater a newer version of a file, but
|
||
not the newest one. The file is newer to the software updater, but it
|
||
may be insecure and exploitable by the attacker.</li>
|
||
<li>An attacker compromises the key used to sign these files and now the software
|
||
updater downloads a malicious file that is properly signed.</li>
|
||
</ul>
|
||
<p>TUF is designed to address these attacks, and others, by adding signed metadata
|
||
(text files that describe the repository’s files) to the repository and
|
||
referencing the metadata files during the update procedure. Repository files
|
||
are verified against the information included in the metadata before they are
|
||
handed off to the software update system. The framework also provides
|
||
multi-signature trust, explicit and implicit revocation of cryptographic keys,
|
||
responsibility separation of the metadata, and minimized key risk. For a full
|
||
list and outline of the repository attacks and software updater weaknesses
|
||
addressed by TUF, see Appendix A.</p>
|
||
</section>
|
||
<section id="integrating-pypi-with-tuf">
|
||
<h2><a class="toc-backref" href="#integrating-pypi-with-tuf" role="doc-backlink">Integrating PyPI with TUF</a></h2>
|
||
<p>A software update system must complete two main tasks to integrate with TUF.
|
||
First, the repository on the server side MUST be modified to provide signed
|
||
TUF metadata. This PEP is concerned with the first part of the integration,
|
||
and the changes on PyPI required to support software updates with TUF.</p>
|
||
<p>Second, it must add the framework to the client side of the update system. For
|
||
example, TUF MAY be integrated with the pip package manager. Thus, new versions
|
||
of pip going forward SHOULD use TUF by default to download and verify distributions
|
||
from PyPI before installing them. However, there may be unforeseen issues that
|
||
might prevent users from installing or updating distributions, including pip itself,
|
||
via TUF. Therefore, pip SHOULD provide an option e.g.,
|
||
<code class="docutils literal notranslate"><span class="pre">--unsafely-disable-package-verification</span></code>, in order to work around such issues
|
||
until they are resolved. Note, the proposed option name is purposefully long,
|
||
because a user must be helped to understand that the action is unsafe and not
|
||
generally recommended.</p>
|
||
<p>We assume that pip would use TUF to verify distributions downloaded only from PyPI.
|
||
pip MAY support TAP <a class="reference external" href="https://github.com/theupdateframework/taps/blob/master/tap4.md">4</a> in order use TUF to also verify distributions downloaded
|
||
from <a class="pep reference internal" href="../pep-0470/" title="PEP 470 – Removing External Hosting Support on PyPI">elsewhere</a>.</p>
|
||
<section id="what-additional-repository-files-are-required-on-pypi">
|
||
<h3><a class="toc-backref" href="#what-additional-repository-files-are-required-on-pypi" role="doc-backlink">What Additional Repository Files are Required on PyPI?</a></h3>
|
||
<p>In order for package managers like pip to download and verify distributions with
|
||
TUF, a few extra files MUST be added to PyPI. These extra repository files are
|
||
called TUF metadata, and they contain such information as which keys can be trusted,
|
||
the <a class="reference external" href="https://en.wikipedia.org/wiki/Cryptographic_hash_function">cryptographic hashes</a> of files, signatures, metadata version numbers, and
|
||
the date after which the metadata should be considered expired.</p>
|
||
<p>When a package manager wants to check for updates, it asks TUF to do the work.
|
||
That is, a package manager never has to deal with this additional metadata or
|
||
understand what’s going on underneath. If TUF reports back that there are
|
||
updates available, a package manager can then ask TUF to download these files
|
||
from PyPI. TUF downloads them and checks them against the TUF metadata that it
|
||
also downloads from the repository. If the downloaded target files are
|
||
trustworthy, TUF then hands them over to the package manager.</p>
|
||
<p>The <a class="reference external" href="https://theupdateframework.github.io/specification/latest/#document-formats">Document formats</a> section of the TUF specification provides information
|
||
about each type of required metadata and its expected content. The next
|
||
section covers the different kinds of metadata RECOMMENDED for PyPI.</p>
|
||
<p>In addition, all target files SHOULD be available on disk at least two times.
|
||
Once under their original filename, to provide backwards compatibility, and
|
||
once with their SHA-512 hash included in their
|
||
filename. This is required to produce <a class="reference internal" href="#consistent-snapshots">Consistent Snapshots</a>.</p>
|
||
<p>Depending on the used file system different data deduplication mechanisms MAY
|
||
be employed to avoid storage increase from hard copies of target files.</p>
|
||
</section>
|
||
</section>
|
||
<section id="pypi-and-tuf-metadata">
|
||
<h2><a class="toc-backref" href="#pypi-and-tuf-metadata" role="doc-backlink">PyPI and TUF Metadata</a></h2>
|
||
<p>TUF metadata provides information that clients can use to make update
|
||
decisions. For example, a <em>targets</em> metadata lists the available target files
|
||
on PyPI and includes the required signatures, cryptographic hash, and
|
||
file sizes for each. Different metadata files provide different information, which are
|
||
signed by separate roles. The <em>root</em> role indicates what metadata belongs to
|
||
each role. The concept of roles allows TUF to delegate responsibilities
|
||
to multiple roles, thus minimizing the impact of any one compromised role.</p>
|
||
<p>TUF requires four top-level roles. These are <em>root</em>, <em>timestamp</em>, <em>snapshot</em>,
|
||
and <em>targets</em>. The <em>root</em> role specifies the public cryptographic keys of the
|
||
top-level roles (including its own). The <em>timestamp</em> role references the
|
||
latest <em>snapshot</em> and can signify when a new snapshot of the repository is
|
||
available. The <em>snapshot</em> role indicates the latest version of all the TUF
|
||
metadata files (other than <em>timestamp</em>). The <em>targets</em> role lists the file
|
||
paths of available target files together with their cryptographic hashes.
|
||
The file paths must be specified relative to a base URL. This allows the
|
||
actual target files to be served from anywhere, as long as the base URL
|
||
can be accessed by the client. Each top-level role will serve its
|
||
responsibilities without exception. Table 1 provides an overview of the
|
||
roles used in TUF.</p>
|
||
<table class="docutils align-default">
|
||
<tbody>
|
||
<tr class="row-odd"><td colspan="2">Roles and Responsibilities</td>
|
||
</tr>
|
||
<tr class="row-even"><td>root</td>
|
||
<td>The root role is the locus of trust for the entire
|
||
repository. The root role signs the root.json metadata
|
||
file. This file indicates which keys are authorized for
|
||
each of the top-level roles, including for the root role
|
||
itself. The roles “root”, “snapshot”, “timestamp” and
|
||
“targets” must be specified and each has a list of public
|
||
keys.</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>targets</td>
|
||
<td>The targets role is responsible for indicating which
|
||
target files are available from the repository. More
|
||
precisely, it shares the responsibility of providing
|
||
information about the content of updates. The targets
|
||
role signs targets.json metadata, and can delegate trust
|
||
for repository files to other roles (delegated roles).</td>
|
||
</tr>
|
||
<tr class="row-even"><td>delegated roles</td>
|
||
<td>If the top-level targets role performs delegation, the
|
||
resulting delegated roles can then provide their own
|
||
metadata files. The format of the metadata files provided
|
||
by delegated targets roles is the same as that of
|
||
targets.json. As with targets.json, the latest version of
|
||
metadata files belonging to delegated roles are described
|
||
in the snapshot role’s metadata.</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>snapshot</td>
|
||
<td>The snapshot role is responsible for ensuring that
|
||
clients see a consistent repository state. It provides
|
||
repository state information by indicating the latest
|
||
versions of the top-level targets and delegated targets
|
||
metadata files on the repository in snapshot.json. root
|
||
and timestamp are not listed in snapshot.json, because
|
||
timestamp signs for its freshness, after snapshot.json
|
||
has been created, and root, which has all top-level keys,
|
||
is required ahead of time to trust any of the top-level
|
||
roles.</td>
|
||
</tr>
|
||
<tr class="row-even"><td>timestamp</td>
|
||
<td>The timestamp role is responsible for providing
|
||
information about the timeliness of available updates.
|
||
Timeliness information is made available by frequently
|
||
signing a new timestamp.json file that has a short
|
||
expiration time. This file indicates the latest version
|
||
of snapshot.json.</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>Table 1: An overview of the TUF roles.</p>
|
||
<p>Unless otherwise specified, this PEP RECOMMENDS that every metadata or
|
||
target file be hashed using the SHA2-512 function of
|
||
the <a class="reference external" href="https://en.wikipedia.org/wiki/SHA-2">SHA-2</a> family. SHA-2 has native and well-tested Python 2 and 3
|
||
support (allowing for verification of these hashes without additional,
|
||
non-Python dependencies). If stronger security guarantees are
|
||
required, then both SHA2-256 and SHA2-512 or both SHA2-256 and <a class="reference external" href="https://en.wikipedia.org/wiki/SHA-3">SHA3-256</a>
|
||
MAY be used instead. SHA2-256 and SHA3-256
|
||
are based on very different designs from each other, providing extra protection
|
||
against <a class="reference external" href="https://en.wikipedia.org/wiki/Collision_attack">collision attacks</a>. However, SHA-3
|
||
requires installing additional, non-Python dependencies for <a class="reference external" href="https://pip.pypa.io/en/latest/development/release-process/#python-2-support">Python 2</a>.</p>
|
||
<section id="signing-metadata-and-repository-management">
|
||
<h3><a class="toc-backref" href="#signing-metadata-and-repository-management" role="doc-backlink">Signing Metadata and Repository Management</a></h3>
|
||
<p>The top-level <em>root</em> role signs for the keys of the top-level <em>timestamp</em>,
|
||
<em>snapshot</em>, <em>targets</em>, and <em>root</em> roles. The <em>timestamp</em> role signs for every
|
||
new snapshot of the repository metadata. The <em>snapshot</em> role signs for <em>root</em>,
|
||
<em>targets</em>, and all delegated targets roles. The delegated targets role <em>bins</em>
|
||
further delegates to the <em>bin-n</em> roles, which sign for all distribution files
|
||
belonging to registered PyPI projects.</p>
|
||
<p>Figure 1 provides an overview of the roles available within PyPI, which
|
||
includes the top-level roles and the roles delegated to by <em>targets</em>. The figure
|
||
also indicates the types of keys used to sign each role, and which roles are
|
||
trusted to sign for files available on PyPI. The next two sections cover the
|
||
details of signing repository files and the types of keys used for each role.</p>
|
||
<img alt="../_images/pep-0458-1.png" class="invert-in-dark-mode" src="../_images/pep-0458-1.png" />
|
||
<p>Figure 1: An overview of the role metadata available on PyPI.</p>
|
||
<p>The roles that change most frequently are <em>timestamp</em>, <em>snapshot</em> and roles
|
||
delegated to by <em>bins</em> (i.e., <em>bin-n</em>). The <em>timestamp</em> and <em>snapshot</em>
|
||
metadata MUST be updated whenever <em>root</em>, <em>targets</em> or delegated metadata are
|
||
updated. Observe, though, that <em>root</em> and <em>targets</em> metadata are much less
|
||
likely to be updated as often as delegated metadata. Similarly, the <em>bins</em> role
|
||
will only be updated when a <em>bin-n</em> role is added, updated, or removed. Therefore, <em>timestamp</em>,
|
||
<em>snapshot</em>, and <em>bin-n</em> metadata will most likely be updated frequently (possibly every
|
||
minute) due to delegated metadata being updated frequently in order to support
|
||
continuous delivery of projects. Continuous delivery is a set of processes
|
||
that PyPI uses to produce snapshots that can safely coexist and be deleted
|
||
independent of other snapshots <a class="footnote-reference brackets" href="#id83" id="id33">[18]</a>.</p>
|
||
<p>Every year, PyPI administrators SHOULD sign for <em>root</em> and <em>targets</em> role keys.
|
||
Automation will continuously sign for a timestamped snapshot of all projects. A
|
||
repository <a class="reference external" href="https://theupdateframework.readthedocs.io/en/stable/api/tuf.api.html">Metadata API</a> is available that can be used to <a class="reference external" href="https://github.com/theupdateframework/python-tuf/blob/v0.20.0/examples/repo_example/basic_repo.py">manage a TUF
|
||
repository</a>.</p>
|
||
<p>In standard operation, the <em>bin-n</em> metadata will be updated and signed as new
|
||
distributions are uploaded to PyPI. However, there will also need to be a
|
||
one-time online initialization mechanism to create and sign <em>bin-n</em> metadata for
|
||
all existing distributions that are part of the PyPI repository every time PyPI
|
||
is re-initialized.</p>
|
||
</section>
|
||
<section id="how-to-establish-initial-trust-in-the-pypi-root-keys">
|
||
<h3><a class="toc-backref" href="#how-to-establish-initial-trust-in-the-pypi-root-keys" role="doc-backlink">How to Establish Initial Trust in the PyPI Root Keys</a></h3>
|
||
<p>Package managers like pip MUST ship the <em>root</em> metadata file with the
|
||
installation files that users initially download. This includes information
|
||
about the keys trusted for all top-level roles (including the root keys themselves).
|
||
Package managers must also bundle a TUF client library. Any new version of <em>root</em>
|
||
metadata that the TUF client library may download is verified against the root keys
|
||
initially bundled with the package manager. If a root key is compromised,
|
||
but a threshold of keys are still secured, then PyPI administrators MUST push new
|
||
<em>root</em> metadata that revokes trust in the compromised keys. If a threshold of root
|
||
keys are compromised, then the <em>root</em> metadata MUST be updated out-of-band.
|
||
(However, the threshold of root keys should be chosen so that this event is extremely
|
||
unlikely.) Package managers do not necessarily need to be updated immediately if root
|
||
keys are revoked or added between new releases of the package manager, as the TUF update
|
||
process automatically handles cases where a threshold of previous <em>root</em> keys sign
|
||
for new <em>root</em> keys (assuming no backwards-incompatibility in the TUF specification
|
||
used). So, for example, if a package manager was initially shipped with version 1 of
|
||
the <em>root</em> metadata, and a threshold of <em>root</em> keys in version 1 signed version 2 of
|
||
the <em>root metadata</em>, and a threshold of <em>root</em> keys in version 2 signed version 3 of
|
||
the <em>root metadata, then the package manager should be able to transparently update
|
||
its copy of the *root</em> metadata from version 1 to 3 using its TUF client library.</p>
|
||
<p>Thus, to repeat, the latest good copy of <em>root</em> metadata and a TUF client library MUST
|
||
be included in any new version of pip shipped with CPython (via ensurepip). The TUF
|
||
client library inside the package manager then loads the <em>root</em> metadata and downloads
|
||
the rest of the roles, including updating the <em>root</em> metadata if it has changed.
|
||
An <a class="reference external" href="https://github.com/theupdateframework/specification/blob/master/tuf-spec.md#5-detailed-workflows">outline of the update process</a> is available.</p>
|
||
</section>
|
||
<section id="minimum-security-model">
|
||
<h3><a class="toc-backref" href="#minimum-security-model" role="doc-backlink">Minimum Security Model</a></h3>
|
||
<p>There are two security models to consider when integrating TUF into PyPI. The
|
||
one proposed in this PEP is the minimum security model, which supports
|
||
verification of PyPI distributions signed with private cryptographic
|
||
keys stored on PyPI. Distributions uploaded by developers are signed by PyPI
|
||
and immediately available for download. A possible future extension to this
|
||
PEP, discussed in <a class="pep reference internal" href="../pep-0480/" title="PEP 480 – Surviving a Compromise of PyPI: End-to-end signing of packages">PEP 480</a>, proposes the maximum security model and allows
|
||
a developer to sign for their project. Developer keys are not stored online:
|
||
therefore, projects are safe from PyPI compromises.</p>
|
||
<p>The minimum security model requires no action from a developer and protects
|
||
against malicious CDNs <a class="footnote-reference brackets" href="#id84" id="id37">[19]</a> and public mirrors. To support continuous
|
||
delivery of uploaded distributions, PyPI signs for projects with an online key.
|
||
This level of security prevents projects from being accidentally or
|
||
deliberately tampered with by a mirror or a CDN because neither will
|
||
have any of the keys required to sign for projects. However, it does not
|
||
protect projects from attackers who have compromised PyPI, since they can
|
||
then manipulate TUF metadata using the keys stored online.</p>
|
||
<p>This PEP proposes that the <em>bin-n</em> roles sign for all PyPI projects with online
|
||
keys. These <em>bin-n</em> roles MUST all be delegated by the upper-level <em>bins</em> role,
|
||
which is signed with an offline key, and in turn MUST be delegated by the
|
||
top-level <em>targets</em> role, which is also signed with an offline key.
|
||
This means that when a package manager such as pip (i.e., using TUF) downloads
|
||
a distribution file from a project on PyPI, it will consult the <em>targets</em> role about
|
||
the TUF metadata for that distribution file. If ultimately no <em>bin-n</em> roles
|
||
delegated by <em>targets</em> via <em>bins</em> specify the distribution file, then it is
|
||
considered to be non-existent on PyPI.</p>
|
||
<p>Note, the reason why <em>targets</em> does not directly delegate to <em>bin-n</em>, but
|
||
instead uses the intermediary <em>bins</em> role, is so that other delegations can
|
||
easily be added or removed, without affecting the <em>bins</em>-to-<em>bin-n</em> mapping.
|
||
This is crucial for the implementation of <a class="pep reference internal" href="../pep-0480/" title="PEP 480 – Surviving a Compromise of PyPI: End-to-end signing of packages">PEP 480</a>.</p>
|
||
</section>
|
||
<section id="metadata-expiry-times">
|
||
<h3><a class="toc-backref" href="#metadata-expiry-times" role="doc-backlink">Metadata Expiry Times</a></h3>
|
||
<p>The metadata for the <em>root</em>, <em>targets</em>, and <em>bins</em> roles SHOULD each expire in
|
||
one year, because these metadata files are expected to change very rarely.</p>
|
||
<p>The <em>timestamp</em>, <em>snapshot</em>, and <em>bin-n</em> metadata SHOULD each expire in one day
|
||
because a CDN or mirror SHOULD synchronize itself with PyPI every day.
|
||
Furthermore, this generous time frame also takes into account client clocks
|
||
that are highly skewed or adrift.</p>
|
||
</section>
|
||
<section id="metadata-scalability">
|
||
<h3><a class="toc-backref" href="#metadata-scalability" role="doc-backlink">Metadata Scalability</a></h3>
|
||
<p>As the number of projects and distributions on a repository grows, TUF metadata will need to
|
||
grow correspondingly. For example, consider the <em>bins</em> role. In August 2013,
|
||
it was found that the size of the <em>bins</em> metadata was about 42MB if the <em>bins</em>
|
||
role itself signed for about 220K PyPI targets (which are simple indices and
|
||
distributions). This PEP does not delve into the details, but TUF features a
|
||
so-called <a class="reference external" href="https://github.com/theupdateframework/python-tuf/blob/v0.20.0/examples/repo_example/hashed_bin_delegation.py">“hashed bin delegation”</a> scheme that splits a large targets metadata file
|
||
into many small ones. This allows a TUF client updater to intelligently
|
||
download only a small number of TUF metadata files in order to update any
|
||
project signed for by the <em>bins</em> role. For example, applying this scheme to
|
||
the previous repository resulted in pip downloading between 1.3KB and 111KB to
|
||
install or upgrade a PyPI project via TUF.</p>
|
||
<p>Based on our findings as of the time this document was updated for
|
||
implementation (Nov 7 2019), summarized in Tables 2-3, PyPI SHOULD
|
||
split all targets in the <em>bins</em> role by delegating them to 16,384
|
||
<em>bin-n</em> roles (see C10 in Table 2). Each <em>bin-n</em> role would sign
|
||
for the PyPI targets whose SHA2-512 hashes fall into that bin
|
||
(see Figure 1 and <a class="reference internal" href="#consistent-snapshots">Consistent Snapshots</a>). It was found
|
||
that this number of bins would result in a 5-9% metadata overhead
|
||
(relative to the average size of downloaded distribution files; see V13 and
|
||
V15 in Table 3) for returning users, and a 69% overhead for new
|
||
users who are installing pip for the first time (see V17 in Table 3).</p>
|
||
<p>A few assumptions used in calculating these metadata overhead percentages:</p>
|
||
<ol class="arabic simple">
|
||
<li>We are ignoring root, timestamp, and top-level targets metadata.</li>
|
||
<li>pip will always be bundled with the latest good copy of metadata for all
|
||
roles.</li>
|
||
</ol>
|
||
<table class="docutils align-default">
|
||
<tbody>
|
||
<tr class="row-odd"><td>Name</td>
|
||
<td>Description</td>
|
||
<td>Value</td>
|
||
</tr>
|
||
<tr class="row-even"><td>C1</td>
|
||
<td># of bytes in a SHA2-512 hexadecimal digest</td>
|
||
<td>128</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>C2</td>
|
||
<td># of bytes for a SHA2-512 public key ID</td>
|
||
<td>64</td>
|
||
</tr>
|
||
<tr class="row-even"><td>C3</td>
|
||
<td># of bytes for an Ed25519 signature</td>
|
||
<td>128</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>C4</td>
|
||
<td># of bytes for an Ed25519 public key</td>
|
||
<td>64</td>
|
||
</tr>
|
||
<tr class="row-even"><td>C5</td>
|
||
<td># of bytes for a target relative file path</td>
|
||
<td>256</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>C6</td>
|
||
<td># of bytes to encode a target file size</td>
|
||
<td>7</td>
|
||
</tr>
|
||
<tr class="row-even"><td>C7</td>
|
||
<td># of bytes to encode a version number</td>
|
||
<td>6</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>C8</td>
|
||
<td># of targets (simple indices and distributions)</td>
|
||
<td>2,273,539</td>
|
||
</tr>
|
||
<tr class="row-even"><td>C9</td>
|
||
<td>Average # of bytes for a downloaded distribution</td>
|
||
<td>2,184,393</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>C10</td>
|
||
<td># of bins</td>
|
||
<td>16,384</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>C8 was computed by querying the number of release files.
|
||
C9 was derived by taking the average between a rough estimate of the average
|
||
size of release files <em>downloaded</em> over the past 31 days (1,628,321 bytes),
|
||
and the average size of releases files on disk (2,740,465 bytes).
|
||
Ee Durbin helped to provide these numbers on November 7, 2019.</p>
|
||
<p>Table 2: A list of constants used to calculate metadata overhead.</p>
|
||
<table class="docutils align-default">
|
||
<tbody>
|
||
<tr class="row-odd"><td>Name</td>
|
||
<td>Description</td>
|
||
<td>Formula</td>
|
||
<td>Value</td>
|
||
</tr>
|
||
<tr class="row-even"><td>V1</td>
|
||
<td>Length of a path hash prefix</td>
|
||
<td>math.ceil(math.log(C10, 16))</td>
|
||
<td>4</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>V2</td>
|
||
<td>Total # of path hash prefixes</td>
|
||
<td>16**V1</td>
|
||
<td>65,536</td>
|
||
</tr>
|
||
<tr class="row-even"><td>V3</td>
|
||
<td>Avg # of targets per bin</td>
|
||
<td>math.ceil(C8/C10)</td>
|
||
<td>139</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>V4</td>
|
||
<td>Avg size of SHA-512 hashes per bin</td>
|
||
<td>V3*C1</td>
|
||
<td>17,792</td>
|
||
</tr>
|
||
<tr class="row-even"><td>V5</td>
|
||
<td>Avg size of target paths per bin</td>
|
||
<td>V3*C5</td>
|
||
<td>35,584</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>V6</td>
|
||
<td>Avg size of lengths per bin</td>
|
||
<td>V3*C6</td>
|
||
<td>973</td>
|
||
</tr>
|
||
<tr class="row-even"><td>V7</td>
|
||
<td>Avg size of bin-n metadata (bytes)</td>
|
||
<td>V4+V5+V6</td>
|
||
<td>54,349</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>V8</td>
|
||
<td>Total size of public key IDs in bins</td>
|
||
<td>C10*C2</td>
|
||
<td>1,048,576</td>
|
||
</tr>
|
||
<tr class="row-even"><td>V9</td>
|
||
<td>Total size of path hash prefixes in bins</td>
|
||
<td>V1*V2</td>
|
||
<td>262,144</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>V10</td>
|
||
<td>Est. size of bins metadata (bytes)</td>
|
||
<td>V8+V9</td>
|
||
<td>1,310,720</td>
|
||
</tr>
|
||
<tr class="row-even"><td>V11</td>
|
||
<td>Est. size of snapshot metadata (bytes)</td>
|
||
<td>C10*C7</td>
|
||
<td>98,304</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>V12</td>
|
||
<td>Est. size of metadata overhead per distribution per returning user (same snapshot)</td>
|
||
<td>2*V7</td>
|
||
<td>108,698</td>
|
||
</tr>
|
||
<tr class="row-even"><td>V13</td>
|
||
<td>Est. metadata overhead per distribution per returning user (same snapshot)</td>
|
||
<td>round((V12/C9)*100)</td>
|
||
<td>5%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>V14</td>
|
||
<td>Est. size of metadata overhead per distribution per returning user (diff snapshot)</td>
|
||
<td>V12+V11</td>
|
||
<td>207,002</td>
|
||
</tr>
|
||
<tr class="row-even"><td>V15</td>
|
||
<td>Est. metadata overhead per distribution per returning user (diff snapshot)</td>
|
||
<td>round((V14/C9)*100)</td>
|
||
<td>9%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>V16</td>
|
||
<td>Est. size of metadata overhead per distribution per new user</td>
|
||
<td>V14+V10</td>
|
||
<td>1,517,722</td>
|
||
</tr>
|
||
<tr class="row-even"><td>V17</td>
|
||
<td>Est. metadata overhead per distribution per new user</td>
|
||
<td>round((V16/C9)*100)</td>
|
||
<td>69%</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>Table 3: Estimated metadata overheads for new and returning users.</p>
|
||
<p>The interested reader may find an interactive version of the metadata overhead
|
||
calculator <a class="reference external" href="https://docs.google.com/spreadsheets/d/11_XkeHrf4GdhMYVqpYWsug6JNz5ZK6HvvmDZX0__K2I/edit?usp=sharing">here</a>:</p>
|
||
<p>This number of bins SHOULD increase when the metadata overhead for returning
|
||
users exceeds 50%. Presently, this SHOULD happen when the number of targets
|
||
increase at least 10x from over 2M to over 22M, at which point the metadata
|
||
overhead for returning and new users would be around 50-54% and 114%
|
||
respectively, assuming that the number of bins stay fixed. If the number of
|
||
bins is increased, then the cost for all users would effectively be the cost
|
||
for new users, because their cost would be dominated by the (once-in-a-while)
|
||
cost of downloading the large number of delegations in the <code class="docutils literal notranslate"><span class="pre">bins</span></code> metadata.
|
||
If the cost for new users should prove to be too much, primarily due to the
|
||
overhead of downloading the <code class="docutils literal notranslate"><span class="pre">bins</span></code> metadata, then this subject SHOULD be
|
||
revisited before that happens.</p>
|
||
<p>Note that changes to the number of bins on the server are transparent to the
|
||
client. The package manager will be required to download a fresh set of
|
||
metadata, as though it were a new user, but this operation will not require any
|
||
explicit code logic or user interaction in order to do so.</p>
|
||
<p>It is possible to make TUF metadata more compact by representing it in a binary
|
||
format, as opposed to the JSON text format. Nevertheless, a sufficiently large
|
||
number of projects and distributions will introduce scalability challenges at
|
||
some point, and therefore the <em>bins</em> role will still need delegations (as
|
||
outlined in Figure 1) in order to address the problem. The JSON format is an
|
||
open and well-known standard for data interchange, which is already supported by
|
||
the TUF reference implementation, and therefore the recommended data format by
|
||
this PEP. However, due to the large number of delegations, compressed
|
||
versions of all metadata SHOULD also be made available to clients via the
|
||
existing Warehouse mechanisms for HTTP compression. In addition, the JSON
|
||
metadata could be compressed before being sent to clients. The TUF reference
|
||
implementation does not currently support downloading compressed JSON metadata,
|
||
but this could be added to reduce the metadata size.</p>
|
||
</section>
|
||
</section>
|
||
<section id="pypi-and-key-requirements">
|
||
<h2><a class="toc-backref" href="#pypi-and-key-requirements" role="doc-backlink">PyPI and Key Requirements</a></h2>
|
||
<p>In this section, the kinds of keys required to sign for TUF roles on PyPI are
|
||
examined. TUF is agnostic with respect to choices of digital signature
|
||
algorithms. However, this PEP RECOMMENDS that all digital signatures be
|
||
produced with the Ed25519 algorithm <a class="footnote-reference brackets" href="#id80" id="id40">[15]</a>. Ed25519 has native and
|
||
well-tested Python support (allowing for verification of signatures without
|
||
additional, non-Python dependencies), uses small keys, and is supported
|
||
by modern HSM and authentication token hardware.</p>
|
||
<section id="number-and-type-of-keys-recommended">
|
||
<h3><a class="toc-backref" href="#number-and-type-of-keys-recommended" role="doc-backlink">Number and Type Of Keys Recommended</a></h3>
|
||
<p>The <em>root</em> role key is critical for security and should very rarely be used.
|
||
It is primarily used for key revocation, and it is the locus of trust for all
|
||
of PyPI. The <em>root</em> role signs for the keys that are authorized for each of
|
||
the top-level roles (including its own). Keys belonging to the <em>root</em> role are
|
||
intended to be very well-protected and used with the least frequency of all
|
||
keys. It is RECOMMENDED that the PSF board determine the current set of trusted
|
||
root key holders, each of whom will own a (strong) root key.
|
||
A majority of them can then constitute a quorum to revoke or endow trust in all
|
||
top-level keys. Alternatively, the system administrators of PyPI could be
|
||
given responsibility for signing for the <em>root</em> role. Therefore, the <em>root</em>
|
||
role SHOULD require (t, n) keys, where n is the number of key holders determined
|
||
by the PSF board, and t > 1 (so that at least two members must sign the <em>root</em>
|
||
role).</p>
|
||
<p>The <em>targets</em> role will be used only to sign for the static delegation of all
|
||
targets to the <em>bins</em> role. Since these target delegations must be secured
|
||
against attacks in the event of a compromise, the keys for the <em>targets</em> role
|
||
MUST be offline and independent of other keys. For simplicity of key
|
||
management, without sacrificing security, it is RECOMMENDED that the keys of
|
||
the <em>targets</em> role be permanently discarded as soon as they have been created
|
||
and used to sign for the role. Therefore, the <em>targets</em> role SHOULD require
|
||
(2, 2) keys. Again, this is because the keys are going to be permanently
|
||
discarded, and more offline keys will not help resist key recovery
|
||
attacks <a class="footnote-reference brackets" href="#id85" id="id41">[20]</a> unless the diversity of cryptographic algorithms is maintained.</p>
|
||
<p>For similar reasons, the keys for the <em>bins</em> role SHOULD be set up similar to
|
||
the keys for the <em>targets</em> role.</p>
|
||
<p>In order to support continuous delivery, the keys for the <em>timestamp</em>,
|
||
<em>snapshot</em>, and all <em>bin-n</em> roles MUST be online. There is little benefit in
|
||
requiring all of these roles to use different online keys, since attackers
|
||
would presumably be able to compromise all of them if they compromise PyPI.
|
||
Therefore, it is reasonable to use one online key for all of them.</p>
|
||
</section>
|
||
<section id="managing-online-keys">
|
||
<h3><a class="toc-backref" href="#managing-online-keys" role="doc-backlink">Managing online keys</a></h3>
|
||
<p>The online key shared by the <em>timestamp</em>, <em>snapshot</em>, and all <em>bin-n</em> roles
|
||
MAY be stored, encrypted or not, on the Python infrastructure. For example,
|
||
the key MAY be kept on a self-hosted key management service (e.g. Hashicorp
|
||
<a class="reference external" href="https://www.vaultproject.io/">Vault</a>), or a third-party one (e.g. AWS <a class="reference external" href="https://aws.amazon.com/kms/">KMS</a>, Google Cloud <a class="reference external" href="https://cloud.google.com/kms/">KMS</a>, or Azure
|
||
Key <a class="reference external" href="https://docs.microsoft.com/en-us/azure/key-vault/basic-concepts">Vault</a>).</p>
|
||
<p>Some of these key management services allow keys to be stored on Hardware
|
||
Security Modules (HSMs) (e.g., Hashicorp <a class="reference external" href="https://www.vaultproject.io/docs/enterprise/hsm/index.html">Vault</a>, AWS <a class="reference external" href="https://aws.amazon.com/cloudhsm/">CloudHSM</a>, Google
|
||
Cloud <a class="reference external" href="https://cloud.google.com/hsm/">HSM</a>, Azure Key <a class="reference external" href="https://docs.microsoft.com/en-us/azure/key-vault/key-vault-hsm-protected-keys">Vault</a>). This prevents attackers from exfiltrating
|
||
the online private key (albeit not from using it, although their actions
|
||
may now be cryptographically auditable). However, this requires modifying
|
||
the reference TUF implementation to support HSMs (<a class="reference external" href="https://github.com/secure-systems-lab/securesystemslib/pull/170">WIP</a>).</p>
|
||
<p>Regardless of where and how this online key is kept, its use SHOULD be
|
||
carefully logged, monitored, and audited, ideally in such a manner that
|
||
attackers who compromise PyPI are unable to immediately turn off this logging,
|
||
monitoring, and auditing.</p>
|
||
</section>
|
||
<section id="managing-offline-keys">
|
||
<h3><a class="toc-backref" href="#managing-offline-keys" role="doc-backlink">Managing offline keys</a></h3>
|
||
<p>As explained in the previous section, the <em>root</em>, <em>targets</em>, and <em>bins</em> role
|
||
keys MUST be offline for maximum security. These keys will be offline in the
|
||
sense that their private keys MUST NOT be stored on PyPI, though some of them
|
||
MAY be online in the private infrastructure of the project.</p>
|
||
<p>There SHOULD be an offline key ceremony to generate, backup, and store these
|
||
keys in such a manner that the private keys can be read only by the Python
|
||
administrators when necessary (e.g., such as rotating the keys for the
|
||
top-level TUF roles). Thus, keys SHOULD be generated, preferably in a physical
|
||
location where side-channel <a class="reference external" href="https://en.wikipedia.org/wiki/Side-channel_attack">attacks</a> are not a concern, using:</p>
|
||
<ol class="arabic simple">
|
||
<li>A trusted, <a class="reference external" href="https://en.wikipedia.org/wiki/Air_gap_(networking)">airgapped</a> computer with a true random number <a class="reference external" href="https://en.wikipedia.org/wiki/Hardware_random_number_generator">generator</a>, and
|
||
with no <strong>data</strong> persisting after the ceremony</li>
|
||
<li>A trusted operating system</li>
|
||
<li>A trusted set of third-party packages (such as updated versions of
|
||
cryptographic libraries or the TUF reference implementation, where the
|
||
versions provided by the trusted operating system are not recent enough)</li>
|
||
</ol>
|
||
<p>In order to avoid the persistence of sensitive data (e.g., private keys) other than
|
||
on backup media after the ceremony, offline keys SHOULD be generated
|
||
encrypted using strong passwords, either on (in decreasing order of trust):
|
||
private HSMs (e.g., <a class="reference external" href="https://www.yubico.com/products/yubihsm/">YubiHSM</a>), cloud-based HSMs (e.g., those listed above),
|
||
in volatile memory (e.g., RAM), or in nonvolatile memory
|
||
(e.g., SSD or microSD). If keys must be generated on nonvolatile memory,
|
||
then this memory MUST be irrecoverably destroyed after having securely
|
||
backed up the keys.</p>
|
||
<p>Passwords used to encrypt keys SHOULD be stored somewhere durable and
|
||
trustworthy to which only Python admins have access.</p>
|
||
<p>In order to minimize <a class="reference external" href="https://en.wikipedia.org/wiki/Operations_security">OPSEC</a> errors during the ceremony, scripts SHOULD be
|
||
written, for execution on the trusted key-generation computer, to automate
|
||
tedious steps of the ceremony, such as:</p>
|
||
<ul class="simple">
|
||
<li>Exporting to <a class="reference external" href="https://en.wikipedia.org/wiki/Sneakernet">sneakernet</a> all code and data (previous TUF metadata and <em>root</em>
|
||
keys) required to generate new keys and replace old ones</li>
|
||
<li>Tightening the firewall, updating the entire operating system in order to
|
||
fix security vulnerabilities, and airgapping the computer</li>
|
||
<li>Exporting <em>all</em> new TUF metadata and keys to encrypted backup media.
|
||
This backup provides a complete copy of the data required to restore the PyPI
|
||
TUF repository</li>
|
||
<li>Exporting <em>only</em> new TUF metadata and online keys to encrypted backup media.
|
||
This backup provides all online data for import into the PyPI infrastructure
|
||
and is useful, e.g., when the online data needs to be restored from a previous
|
||
archived state</li>
|
||
<li>Printing and saving cryptographic hashes of new TUF metadata. This printed copy
|
||
provides an additional offline paper backup, which can be used as a comparison
|
||
in the case of a compromise</li>
|
||
</ul>
|
||
<p>Note the one-time keys for the <em>targets</em> and <em>bins</em> roles MAY be safely
|
||
generated, used, and deleted during the offline key ceremony. Furthermore,
|
||
the <em>root</em> keys MAY not be generated during the offline key ceremony itself.
|
||
Instead, a threshold t of n Python administrators, as discussed above, MAY
|
||
independently sign the <em>root</em> metadata <strong>after</strong> the offline key ceremony used
|
||
to generate all other keys.</p>
|
||
</section>
|
||
</section>
|
||
<section id="how-should-metadata-be-generated">
|
||
<h2><a class="toc-backref" href="#how-should-metadata-be-generated" role="doc-backlink">How Should Metadata be Generated?</a></h2>
|
||
<p>Project developers expect the distributions they upload to PyPI to be
|
||
immediately available for download. Unfortunately, there will be problems when
|
||
many readers and writers simultaneously access the same metadata and
|
||
target files. That is, there needs to be a way to ensure consistency of
|
||
metadata and target files when multiple developers simultaneously change these
|
||
files. There are also issues with consistency on PyPI
|
||
without TUF, but the problem is more severe with signed metadata that MUST keep
|
||
track of the files available on PyPI in real-time.</p>
|
||
<p>Suppose that PyPI generates a <em>snapshot</em> that indicates the latest version of
|
||
every metadata, except <em>timestamp</em>, at version 1 and a client requests this
|
||
<em>snapshot</em> from PyPI. While the client is busy downloading this <em>snapshot</em>,
|
||
PyPI then timestamps a new snapshot at, say, version 2. Without ensuring
|
||
consistency of metadata, the client would find itself with a copy of <em>snapshot</em>
|
||
that disagrees with what is available on PyPI. The result would be indistinguishable from
|
||
arbitrary metadata injected by an attacker. The problem would also occur with
|
||
mirrors attempting to sync with PyPI.</p>
|
||
<section id="consistent-snapshots">
|
||
<h3><a class="toc-backref" href="#consistent-snapshots" role="doc-backlink">Consistent Snapshots</a></h3>
|
||
<p>To keep TUF metadata on PyPI consistent with the highly volatile target files,
|
||
consistent snapshots SHOULD be used. Each consistent snapshot captures the
|
||
state of all known projects at a given time and MAY safely coexist with any
|
||
other snapshot, or be deleted independently, without affecting any other
|
||
snapshot.</p>
|
||
<p>To maintain consistent snapshots, all TUF metadata MUST, when written to disk,
|
||
include a version number in their filename:</p>
|
||
<blockquote>
|
||
<div><dl class="simple">
|
||
<dt>VERSION_NUMBER.ROLENAME.json,</dt><dd>where VERSION_NUMBER is an incrementing integer, and ROLENAME is one of the
|
||
top-level metadata roles – <em>root</em>, <em>snapshot</em> or <em>targets</em> – or one of
|
||
the delegated targets roles – <em>bins</em> or <em>bin-n</em>.</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
<p>The only exception is the <em>timestamp</em> metadata file, whose version would not be known
|
||
in advance when a client performs an update. The <em>timestamp</em> metadata
|
||
lists the
|
||
version of the <em>snapshot</em> metadata, which in turn lists the versions of the
|
||
<em>targets</em> and delegated targets metadata, all as part of a given consistent
|
||
snapshot.</p>
|
||
<p>In normal usage, version number overflow is unlikely to occur. An 8-byte integer,
|
||
for instance, can be incremented once per millisecond and last almost 300 million
|
||
years. If an attacker increases the version number arbitrarily, the repository
|
||
can recover by revoking the compromised keys and resetting the version number as
|
||
described in the TUF <a class="reference external" href="https://github.com/theupdateframework/specification/blame/4b82990afdc6c6d77aa9d43e0632f01bb9e7752c/tuf-spec.md#L1112-L1120">specification</a>.</p>
|
||
<p>The <em>targets</em> or delegated targets metadata refer to the actual target
|
||
files, including their cryptographic hashes as specified above.
|
||
Thus, to mark a target file as part of a consistent snapshot it MUST, when
|
||
written to disk, include its hash in its filename:</p>
|
||
<blockquote>
|
||
<div><dl class="simple">
|
||
<dt>HASH.FILENAME</dt><dd>where HASH is the <a class="reference external" href="https://docs.python.org/3.7/library/hashlib.html#hashlib.hash.hexdigest">hex digest</a> of the hash of the file contents and
|
||
FILENAME is the original filename.</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
<p>This means that there MAY be multiple copies of every target file, one for each
|
||
of the cryptographic hash functions specified above.</p>
|
||
<p>Assuming infinite disk space, strictly incrementing version numbers, and no
|
||
<a class="reference external" href="https://en.wikipedia.org/wiki/Collision_(computer_science)">hash collisions</a>, a client may safely read from one snapshot while PyPI
|
||
produces another snapshot.</p>
|
||
<p>Clients, such as pip, that use the TUF protocol MUST be modified to download
|
||
every metadata and target file, except for <em>timestamp</em> metadata. This is done
|
||
by including, in the file request, the version of the file (for metadata),
|
||
or the cryptographic hash of the file (for target files) in the filename.</p>
|
||
<p>In this simple but effective manner, PyPI is able to capture a consistent
|
||
snapshot of all projects and the associated metadata at a given time. The next
|
||
subsection provides implementation details of this idea.</p>
|
||
<p>Note: This PEP does not prohibit using advanced file systems or tools to
|
||
produce consistent snapshots. There are two important reasons for proposing a simple solution in this PEP.
|
||
First, the solution does not mandate that PyPI
|
||
use any particular file system or tool. Second, the generic file-system based
|
||
approach allows mirrors to use extant file transfer tools, such as rsync, to
|
||
efficiently transfer consistent snapshots from PyPI.</p>
|
||
</section>
|
||
<section id="producing-consistent-snapshots">
|
||
<h3><a class="toc-backref" href="#producing-consistent-snapshots" role="doc-backlink">Producing Consistent Snapshots</a></h3>
|
||
<p>When a new distribution file is uploaded to PyPI, PyPI MUST update the
|
||
responsible <em>bin-n</em> metadata. Remember that all target files are sorted into
|
||
bins by their filename hashes. PyPI MUST also update <em>snapshot</em> to account for
|
||
the updated <em>bin-n</em> metadata, and <em>timestamp</em> to account for the updated
|
||
<em>snapshot</em> metadata. These updates SHOULD be handled by an automated <em>snapshot
|
||
process</em>.</p>
|
||
<p>File uploads MAY be handled in parallel, however, consistent snapshots MUST be
|
||
produced in a strictly sequential manner. Furthermore, as long as distribution
|
||
files are self-contained, a consistent snapshot MAY be produced for each
|
||
uploaded file. To do so upload processes place new distribution files into a
|
||
concurrency-safe FIFO queue and the snapshot process reads from that queue one
|
||
file at a time and performs the following tasks:</p>
|
||
<p>First, it adds the new file path to the relevant <em>bin-n</em> metadata, increments
|
||
its version number, signs it with the <em>bin-n</em> role key, and writes it to
|
||
<em>VERSION_NUMBER.bin-N.json</em>.</p>
|
||
<p>Then, it takes the most recent <em>snapshot</em> metadata, updates its <em>bin-n</em>
|
||
metadata version numbers, increments its own version number, signs it with the
|
||
<em>snapshot</em> role key, and writes it to <em>VERSION_NUMBER.snapshot.json</em>.</p>
|
||
<p>And finally, the snapshot process takes the most recent <em>timestamp</em> metadata,
|
||
updates its <em>snapshot</em> metadata hash and version number, increments its own
|
||
version number, sets a new expiration time, signs it with the <em>timestamp</em> role
|
||
key, and writes it to <em>timestamp.json</em>.</p>
|
||
<p>When updating <em>bin-n</em> metadata for a consistent snapshot, the snapshot process
|
||
SHOULD also include any new or updated hashes of simple index pages in the
|
||
relevant <em>bin-n</em> metadata. Note that, simple index pages may be generated
|
||
dynamically on API calls, so it is important that their output remains stable
|
||
throughout the validity of a consistent snapshot.</p>
|
||
<p>Since the snapshot process MUST generate consistent snapshots in a strictly
|
||
sequential manner it constitutes a bottleneck. Fortunately, the operation of
|
||
signing is fast enough that this may be done a thousand or more times per
|
||
second.</p>
|
||
<p>Moreover, PyPI MAY serve distribution files to clients before the corresponding
|
||
consistent snapshot metadata is generated. In that case the client software
|
||
SHOULD inform the user that full TUF protection is not yet available but will
|
||
be shortly.</p>
|
||
<p>PyPI SHOULD use a <a class="reference external" href="https://en.wikipedia.org/wiki/Transaction_log">transaction log</a> to record upload processes and the
|
||
snapshot queue for auditing and to recover from errors after a server failure.</p>
|
||
</section>
|
||
<section id="cleaning-up-old-metadata">
|
||
<h3><a class="toc-backref" href="#cleaning-up-old-metadata" role="doc-backlink">Cleaning up old metadata</a></h3>
|
||
<p>To avoid running out of disk space due to the constant production of new
|
||
consistent snapshots, PyPI SHOULD regularly delete old consistent snapshots,
|
||
i.e. metadata and target files that were obsoleted some reasonable time in
|
||
the past, such as 1 hour.</p>
|
||
<p>In order to preserve the latest consistent snapshot PyPI MAY use a
|
||
“mark-and-sweep” algorithm. That is, walk from the root of the latest
|
||
consistent snapshot, i.e. <em>timestamp</em> over <em>snapshot</em> over <em>targets</em> and
|
||
delegated targets until the target files, marking all visited files, and
|
||
delete all unmarked files. The last few consistent snapshots may be preserved
|
||
in a similar fashion.</p>
|
||
<p>Deleting a consistent snapshot will cause clients to see nothing except HTTP
|
||
404 responses to any request for a file within that consistent snapshot.
|
||
Clients SHOULD then retry their requests (as before) with the latest consistent
|
||
snapshot.</p>
|
||
<p>Note that <em>root</em> metadata, even though versioned, is not part of any consistent
|
||
snapshot. PyPI MUST NOT delete old versions of <em>root</em> metadata. This guarantees
|
||
that clients can update to the latest <em>root</em> role keys, no matter how outdated
|
||
their local <em>root</em> metadata is.</p>
|
||
</section>
|
||
</section>
|
||
<section id="revoking-trust-in-projects-and-distributions">
|
||
<h2><a class="toc-backref" href="#revoking-trust-in-projects-and-distributions" role="doc-backlink">Revoking Trust in Projects and Distributions</a></h2>
|
||
<p>From time to time either a project or a distribution will need to be revoked.
|
||
To revoke trust in either a project or a distribution, the associated bin-n
|
||
role can simply remove the corresponding targets and re-sign the bin-n
|
||
metadata. This action only requires actions with the online bin-n key.</p>
|
||
</section>
|
||
<section id="key-compromise-analysis">
|
||
<h2><a class="toc-backref" href="#key-compromise-analysis" role="doc-backlink">Key Compromise Analysis</a></h2>
|
||
<p>This PEP has covered the minimum security model, the TUF roles that should be
|
||
added to support continuous delivery of distributions, and how to generate and
|
||
sign the metadata for each role. The remaining sections discuss how PyPI
|
||
SHOULD audit repository metadata, and the methods PyPI can use to detect and
|
||
recover from a PyPI compromise.</p>
|
||
<p>Table 4 summarizes a few of the attacks possible when a threshold number of
|
||
private cryptographic keys (belonging to any of the PyPI roles) are
|
||
compromised. The leftmost column lists the roles (or a combination of roles)
|
||
that have been compromised, and the columns to its right show whether the
|
||
compromised roles leave clients susceptible to malicious updates, a freeze
|
||
attack, or metadata inconsistency attacks. Note that if the timestamp, snapshot,
|
||
and bin-n roles are stored in the same online location, a compromise of one
|
||
means they will all be compromised. Therefore, the table considers these
|
||
roles together. A version of this table that considers these roles separately
|
||
is included in <a class="pep reference internal" href="../pep-0480/" title="PEP 480 – Surviving a Compromise of PyPI: End-to-end signing of packages">PEP 480</a>.</p>
|
||
<table class="docutils align-default">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head">Role Compromise</th>
|
||
<th class="head">Malicious Updates</th>
|
||
<th class="head">Freeze Attack</th>
|
||
<th class="head">Metadata Inconsistency Attacks</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td>targets
|
||
<strong>OR</strong>
|
||
bins</td>
|
||
<td colspan="3">NO
|
||
timestamp and snapshot need to cooperate</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>timestamp
|
||
<strong>AND</strong>
|
||
snapshot
|
||
<strong>AND</strong>
|
||
bin-n</td>
|
||
<td colspan="3">YES
|
||
limited by earliest root, targets, or bins metadata expiry time</td>
|
||
</tr>
|
||
<tr class="row-even"><td>root</td>
|
||
<td colspan="3">YES</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>Table 4: Attacks possible by compromising certain combinations of role keys.
|
||
In <a class="reference external" href="https://mail.python.org/pipermail/distutils-sig/2013-September/022755.html">September 2013</a>, it was shown how the latest version (at the time) of pip
|
||
was susceptible to these attacks and how TUF could protect users against them
|
||
<a class="footnote-reference brackets" href="#id79" id="id61">[14]</a>.</p>
|
||
<p>Note that compromising <em>targets</em> or <em>bins</em>
|
||
does not immediately allow an attacker to serve malicious
|
||
updates. The attacker must also compromise the <em>timestamp</em> and <em>snapshot</em>
|
||
roles, which are both online and therefore more likely to be compromised.
|
||
This means that, in order to launch any attack, one must not only be able to
|
||
act as a man-in-the-middle, but also compromise the <em>timestamp</em> key (or
|
||
compromise the <em>root</em> keys and sign a new <em>timestamp</em> key). To launch any
|
||
attack other than a freeze attack, one must also compromise the <em>snapshot</em> key.
|
||
In practice, this PEP recommends storing the <em>snapshot</em>, <em>timestamp</em>, and
|
||
<em>bin-n</em> keys together, or even using the same key for all of these roles.
|
||
Because of this, the attacker only needs to compromise this single server to
|
||
perform any of the attacks listed above. Note that clients are still protected
|
||
against compromises of non-signing infrastructure such as CDNs or mirrors.
|
||
Moreover, the offline <em>root</em> key will
|
||
allow the repository to recover from an attack by revoking the online key(s).</p>
|
||
<p>The maximum security model shows how TUF mitigates online key compromises by
|
||
introducing additional roles for end-to-signing. Details about how to generate
|
||
developer keys and sign upload distributions are provided in <a class="pep reference internal" href="../pep-0480/" title="PEP 480 – Surviving a Compromise of PyPI: End-to-end signing of packages">PEP 480</a>.</p>
|
||
<section id="in-the-event-of-a-key-compromise">
|
||
<h3><a class="toc-backref" href="#in-the-event-of-a-key-compromise" role="doc-backlink">In the Event of a Key Compromise</a></h3>
|
||
<p>A key compromise means that a threshold of keys (belonging to the metadata
|
||
roles on PyPI), as well as the PyPI infrastructure have been compromised and
|
||
used to sign new metadata on PyPI.</p>
|
||
<p>If a threshold number of <em>timestamp</em>, <em>snapshot</em>, <em>targets</em>, <em>bins</em> or <em>bin-n</em>
|
||
keys have been compromised, then PyPI MUST take the following steps:</p>
|
||
<ol class="arabic simple">
|
||
<li>Revoke the <em>timestamp</em>, <em>snapshot</em> and <em>targets</em> role keys from
|
||
the <em>root</em> role. This is done by replacing the compromised <em>timestamp</em>,
|
||
<em>snapshot</em> and <em>targets</em> keys with newly issued keys.</li>
|
||
<li>Revoke the <em>bins</em> keys from the <em>targets</em> role by replacing their keys with
|
||
newly issued keys. Sign the new <em>targets</em> role metadata and discard the new
|
||
keys (because, as explained earlier, this increases the security of
|
||
<em>targets</em> metadata).</li>
|
||
<li>All targets of the <em>bin-n</em> roles SHOULD be compared with the last known
|
||
good consistent snapshot in which none of the <em>timestamp</em>, <em>snapshot</em>,
|
||
<em>bins</em> or <em>bin-n</em> keys
|
||
were known to have been compromised. Added, updated or deleted targets in
|
||
the compromised consistent snapshot that do not match the last known good
|
||
consistent snapshot MAY be restored to their previous versions. After
|
||
ensuring the integrity of all <em>bin-n</em> targets, their keys should be renewed
|
||
in the <em>bins</em> metadata.</li>
|
||
<li>The <em>bins</em> and <em>bin-n</em> metadata MUST have their version numbers incremented,
|
||
expiry times suitably extended, and signatures renewed.</li>
|
||
<li>A new timestamped consistent snapshot MUST be issued.</li>
|
||
</ol>
|
||
<p>Following these steps would preemptively protect all of these roles, even if
|
||
only one of them may have been compromised.</p>
|
||
<p>If a threshold number of <em>root</em> keys have been compromised, then PyPI MUST take
|
||
the above steps and also replace all <em>root</em> keys in the <em>root</em> role.</p>
|
||
<p>It is also RECOMMENDED that PyPI sufficiently document compromises with
|
||
security bulletins. These security bulletins will be most informative when
|
||
users of pip-with-TUF are unable to install or update a project because the
|
||
keys for the <em>timestamp</em>, <em>snapshot</em> or <em>root</em> roles are no longer valid. They
|
||
could then visit the PyPI web site to consult security bulletins that would
|
||
help to explain why they are no longer able to install or update, and then take
|
||
action accordingly. When a threshold number of <em>root</em> keys have not been
|
||
revoked due to a compromise, then new <em>root</em> metadata may be safely updated
|
||
because a threshold number of existing <em>root</em> keys will be used to sign for the
|
||
integrity of the new <em>root</em> metadata. TUF clients will be able to verify the
|
||
integrity of the new <em>root</em> metadata with a threshold number of previously
|
||
known <em>root</em> keys. This will be the common case. Otherwise, in the worst
|
||
case, in which a threshold number of <em>root</em> keys have been revoked due to a
|
||
compromise, an end-user may choose to update new <em>root</em> metadata with
|
||
<a class="reference external" href="https://en.wikipedia.org/wiki/Out-of-band#Authentication">out-of-band</a> mechanisms.</p>
|
||
</section>
|
||
<section id="auditing-snapshots">
|
||
<h3><a class="toc-backref" href="#auditing-snapshots" role="doc-backlink">Auditing Snapshots</a></h3>
|
||
<p>If a malicious party compromises PyPI, they can sign arbitrary files with any
|
||
of the online keys. The roles with offline keys (i.e., <em>root</em>, <em>targets</em> and <em>bins</em>)
|
||
are still protected. To safely recover from a repository compromise, snapshots
|
||
should be audited to ensure files are only restored to trusted versions.</p>
|
||
<p>When a repository compromise has been detected, the integrity of three types of
|
||
information must be validated:</p>
|
||
<ol class="arabic simple">
|
||
<li>If the online keys of the repository have been compromised, they can be
|
||
revoked by having the <em>targets</em> role sign new metadata delegating to a new
|
||
key.</li>
|
||
<li>If the role metadata on the repository has been changed, this would impact
|
||
the metadata that is signed by online keys. Any role information created
|
||
since the last period should be discarded. As a result, developers of new
|
||
projects will need to re-register their projects.</li>
|
||
<li>If the target files themselves may have been tampered with, they can be
|
||
validated using the stored hash information for target files that existed
|
||
at the time of the last period.</li>
|
||
</ol>
|
||
<p>In order to safely restore snapshots in the event of a compromise, PyPI SHOULD
|
||
maintain a small number of its own mirrors to copy PyPI snapshots according to
|
||
some schedule. The mirroring protocol can be used immediately for this
|
||
purpose. The mirrors must be secured and isolated such that they are
|
||
responsible only for mirroring PyPI. The mirrors can be checked against one
|
||
another to detect accidental or malicious failures.</p>
|
||
<p>Another approach is to generate the cryptographic hash of <em>snapshot</em>
|
||
periodically and tweet it. Perhaps a user comes forward with the actual
|
||
metadata and the repository maintainers can verify the metadata file’s cryptographic
|
||
hash. Alternatively, PyPI may periodically archive its own versions of
|
||
<em>snapshot</em> rather than rely on externally provided metadata. In this case,
|
||
PyPI SHOULD take the cryptographic hash of every target file on the
|
||
repository and store this data on an offline device. If any target file
|
||
hash has changed, this indicates an attack.</p>
|
||
<p>As for attacks that serve different versions of metadata, or freeze a version
|
||
of a distribution at a specific version, they can be handled by TUF with techniques
|
||
like implicit key revocation and metadata mismatch detection <a class="footnote-reference brackets" href="#id69" id="id64">[2]</a>.</p>
|
||
</section>
|
||
</section>
|
||
<section id="managing-future-changes-to-the-update-process">
|
||
<h2><a class="toc-backref" href="#managing-future-changes-to-the-update-process" role="doc-backlink">Managing Future Changes to the Update Process</a></h2>
|
||
<p>If breaking changes are made to the update process, PyPI should implement these
|
||
changes without disrupting existing clients. For general guidance on how to do
|
||
so, see the ongoing discussion in the TAP <a class="reference external" href="https://github.com/theupdateframework/taps/pull/107">repository</a>.</p>
|
||
<p>Note that the changes to PyPI from this PEP will be backwards compatible. The
|
||
location of target files and simple indices are not changed in this PEP, so any
|
||
existing PyPI clients will still be able to perform updates using these files.
|
||
This PEP adds the ability for clients to use TUF metadata to improve the
|
||
security of the update process.</p>
|
||
<section id="hash-algorithm-transition-plan">
|
||
<h3><a class="toc-backref" href="#hash-algorithm-transition-plan" role="doc-backlink">Hash Algorithm Transition Plan</a></h3>
|
||
<p>If the algorithm used to hash target and metadata files becomes vulnerable, it
|
||
SHOULD be replaced by a stronger hash algorithm.</p>
|
||
<p>The TUF metadata format allows to list digests from different hash algorithms
|
||
alongside each other, together with an algorithm identifier, so that clients
|
||
can seamlessly switch between algorithms.</p>
|
||
<p>However, once support for an old algorithm is turned off, clients that don’t
|
||
support the new algorithm will only be able to install or update packages,
|
||
including the client itself, by disabling TUF verification. To allow clients to
|
||
transition without temporarily losing TUF security guarantees, we recommend
|
||
the following procedure.</p>
|
||
<ol class="arabic simple">
|
||
<li>Implement new algorithm in Warehouse.</li>
|
||
<li>Regenerate existing, unexpired TUF metadata to include hashes using both the
|
||
old and new algorithms. All new metadata going forward shall list both hash
|
||
algorithms.
|
||
Note, only TUF metadata that lists hash digests for target files or other
|
||
metadata needs to be renewed, that is <em>bin-n</em>, <em>snapshot</em> and <em>timestamp</em>.
|
||
Thus, only online keys are required to sign renewed metadata.</li>
|
||
<li>Announce transition on high-visibility channels, such as <a class="reference external" href="https://discuss.python.org/c/packaging">packaging on
|
||
Python Discourse</a> and the <a class="reference external" href="https://mail.python.org/mailman3/lists/pypi-announce.python.org/">PyPI changes mailing list</a>.</li>
|
||
<li>Give popular clients such as pip and bandersnatch the chance to adopt new
|
||
hash algorithm.</li>
|
||
<li>Give end-users the chance to update clients.</li>
|
||
<li>Get rough consensus to remove old hash algorithm from PyPI maintainers.</li>
|
||
<li>Remove Warehouse support for old algorithm and only support new algorithm.</li>
|
||
</ol>
|
||
</section>
|
||
</section>
|
||
<section id="appendix-a-repository-attacks-prevented-by-tuf">
|
||
<h2><a class="toc-backref" href="#appendix-a-repository-attacks-prevented-by-tuf" role="doc-backlink">Appendix A: Repository Attacks Prevented by TUF</a></h2>
|
||
<ul class="simple">
|
||
<li><strong>Arbitrary software installation</strong>: An attacker installs anything it wants
|
||
on the client system. That is, an attacker can provide arbitrary files in
|
||
response to download requests and the files will not be detected as
|
||
illegitimate.</li>
|
||
<li><strong>Rollback attacks</strong>: An attacker presents a software update system with
|
||
files older than those the client has already seen. This causes the client to use
|
||
outdated files.</li>
|
||
<li><strong>Indefinite freeze attacks</strong>: An attacker continues to present a software
|
||
update system with the same files the client has already seen. The result is
|
||
that the client does not know that new files are available.</li>
|
||
<li><strong>Endless data attacks</strong>: An attacker responds to a file download request
|
||
with an endless stream of data, causing harm to clients (e.g., a disk
|
||
partition filling up or memory exhaustion).</li>
|
||
<li><strong>Slow retrieval attacks</strong>: An attacker responds to clients with a very slow
|
||
stream of data that essentially results in the client never continuing the
|
||
update process.</li>
|
||
<li><strong>Extraneous dependencies attacks</strong>: An attacker indicates to clients that in
|
||
order to install the software they want, they also need to install
|
||
unrelated software. This unrelated software can be from a trusted source
|
||
but may have known vulnerabilities that are exploitable by the attacker.</li>
|
||
<li><strong>Mix-and-match attacks</strong>: An attacker presents clients with a view of a
|
||
repository that includes files that never existed together on the repository
|
||
at the same time. This can result in, for example, outdated versions of
|
||
dependencies being installed.</li>
|
||
<li><strong>Wrong software installation</strong>: An attacker provides a client with a trusted
|
||
file that is not the one the client wanted.</li>
|
||
<li><strong>Malicious mirrors preventing updates</strong>: An attacker in control of one
|
||
repository mirror is able to prevent users from obtaining updates from
|
||
other, good mirrors.</li>
|
||
<li><strong>Vulnerability to key compromises</strong>: An attacker who is able to compromise a
|
||
single key or less than a given threshold of keys can compromise clients.
|
||
This includes relying on a single online key, such as only being protected
|
||
by SSL, or a single offline key, as most software update systems use
|
||
to sign files.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="references">
|
||
<h2><a class="toc-backref" href="#references" role="doc-backlink">References</a></h2>
|
||
<aside class="footnote-list brackets">
|
||
<aside class="footnote brackets" id="id68" role="doc-footnote">
|
||
<dt class="label" id="id68">[<a href="#id2">1</a>]</dt>
|
||
<dd><a class="reference external" href="https://pypi.python.org">https://pypi.python.org</a></aside>
|
||
<aside class="footnote brackets" id="id69" role="doc-footnote">
|
||
<dt class="label" id="id69">[2]<em> (<a href='#id1'>1</a>, <a href='#id20'>2</a>, <a href='#id22'>3</a>, <a href='#id64'>4</a>) </em></dt>
|
||
<dd><a class="reference external" href="https://theupdateframework.github.io/papers/survivable-key-compromise-ccs2010.pdf">https://theupdateframework.github.io/papers/survivable-key-compromise-ccs2010.pdf</a></aside>
|
||
<aside class="footnote brackets" id="id70" role="doc-footnote">
|
||
<dt class="label" id="id70">[<a href="#id4">3</a>]</dt>
|
||
<dd><a class="reference external" href="http://www.pip-installer.org">http://www.pip-installer.org</a></aside>
|
||
<aside class="footnote brackets" id="id71" role="doc-footnote">
|
||
<dt class="label" id="id71">[<a href="#id10">4</a>]</dt>
|
||
<dd><a class="reference external" href="https://wiki.python.org/moin/WikiAttack2013">https://wiki.python.org/moin/WikiAttack2013</a></aside>
|
||
<aside class="footnote brackets" id="id72" role="doc-footnote">
|
||
<dt class="label" id="id72">[<a href="#id11">5</a>]</dt>
|
||
<dd><a class="reference external" href="https://github.com/theupdateframework/pip/wiki/Attacks-on-software-repositories">https://github.com/theupdateframework/pip/wiki/Attacks-on-software-repositories</a></aside>
|
||
<aside class="footnote brackets" id="id73" role="doc-footnote">
|
||
<dt class="label" id="id73">[<a href="#id12">6</a>]</dt>
|
||
<dd><a class="reference external" href="https://mail.python.org/pipermail/distutils-sig/2013-April/020596.html">https://mail.python.org/pipermail/distutils-sig/2013-April/020596.html</a></aside>
|
||
<aside class="footnote brackets" id="id74" role="doc-footnote">
|
||
<dt class="label" id="id74">[<a href="#id13">7</a>]</dt>
|
||
<dd><a class="reference external" href="https://mail.python.org/pipermail/distutils-sig/2013-May/020701.html">https://mail.python.org/pipermail/distutils-sig/2013-May/020701.html</a></aside>
|
||
<aside class="footnote brackets" id="id75" role="doc-footnote">
|
||
<dt class="label" id="id75">[<a href="#id14">8</a>]</dt>
|
||
<dd><a class="reference external" href="https://mail.python.org/pipermail/distutils-sig/2013-July/022008.html">https://mail.python.org/pipermail/distutils-sig/2013-July/022008.html</a></aside>
|
||
<aside class="footnote brackets" id="id76" role="doc-footnote">
|
||
<dt class="label" id="id76">[<a href="#id15">10</a>]</dt>
|
||
<dd><a class="reference external" href="https://mail.python.org/pipermail/distutils-sig/2013-September/022773.html">https://mail.python.org/pipermail/distutils-sig/2013-September/022773.html</a></aside>
|
||
<aside class="footnote brackets" id="id77" role="doc-footnote">
|
||
<dt class="label" id="id77">[<a href="#id16">11</a>]</dt>
|
||
<dd><a class="reference external" href="https://mail.python.org/pipermail/distutils-sig/2013-May/020848.html">https://mail.python.org/pipermail/distutils-sig/2013-May/020848.html</a></aside>
|
||
<aside class="footnote brackets" id="id78" role="doc-footnote">
|
||
<dt class="label" id="id78">[<a href="#id17">13</a>]</dt>
|
||
<dd><a class="reference external" href="https://theupdateframework.github.io/papers/attacks-on-package-managers-ccs2008.pdf">https://theupdateframework.github.io/papers/attacks-on-package-managers-ccs2008.pdf</a></aside>
|
||
<aside class="footnote brackets" id="id79" role="doc-footnote">
|
||
<dt class="label" id="id79">[14]<em> (<a href='#id18'>1</a>, <a href='#id61'>2</a>) </em></dt>
|
||
<dd><a class="reference external" href="https://mail.python.org/pipermail/distutils-sig/2013-September/022755.html">https://mail.python.org/pipermail/distutils-sig/2013-September/022755.html</a></aside>
|
||
<aside class="footnote brackets" id="id80" role="doc-footnote">
|
||
<dt class="label" id="id80">[<a href="#id40">15</a>]</dt>
|
||
<dd><a class="reference external" href="http://ed25519.cr.yp.to/">http://ed25519.cr.yp.to/</a></aside>
|
||
<aside class="footnote brackets" id="id81" role="doc-footnote">
|
||
<dt class="label" id="id81">[<a href="#id23">16</a>]</dt>
|
||
<dd><a class="reference external" href="https://github.com/theupdateframework/specification/blob/master/tuf-spec.md">https://github.com/theupdateframework/specification/blob/master/tuf-spec.md</a></aside>
|
||
<aside class="footnote brackets" id="id82" role="doc-footnote">
|
||
<dt class="label" id="id82">[17]<em> (<a href='#id24'>1</a>, <a href='#id25'>2</a>) </em></dt>
|
||
<dd><a class="reference external" href="https://packaging.python.org/glossary">https://packaging.python.org/glossary</a></aside>
|
||
<aside class="footnote brackets" id="id83" role="doc-footnote">
|
||
<dt class="label" id="id83">[<a href="#id33">18</a>]</dt>
|
||
<dd><a class="reference external" href="https://en.wikipedia.org/wiki/Continuous_delivery">https://en.wikipedia.org/wiki/Continuous_delivery</a></aside>
|
||
<aside class="footnote brackets" id="id84" role="doc-footnote">
|
||
<dt class="label" id="id84">[<a href="#id37">19</a>]</dt>
|
||
<dd><a class="reference external" href="https://mail.python.org/pipermail/distutils-sig/2013-August/022154.html">https://mail.python.org/pipermail/distutils-sig/2013-August/022154.html</a></aside>
|
||
<aside class="footnote brackets" id="id85" role="doc-footnote">
|
||
<dt class="label" id="id85">[<a href="#id41">20</a>]</dt>
|
||
<dd><a class="reference external" href="https://en.wikipedia.org/wiki/Key-recovery_attack">https://en.wikipedia.org/wiki/Key-recovery_attack</a></aside>
|
||
<aside class="footnote brackets" id="id86" role="doc-footnote">
|
||
<dt class="label" id="id86">[<a href="#id6">22</a>]</dt>
|
||
<dd><a class="reference external" href="https://pyfound.blogspot.com/2019/09/pypi-security-q4-2019-request-for.html">https://pyfound.blogspot.com/2019/09/pypi-security-q4-2019-request-for.html</a></aside>
|
||
</aside>
|
||
</section>
|
||
<section id="acknowledgements">
|
||
<h2><a class="toc-backref" href="#acknowledgements" role="doc-backlink">Acknowledgements</a></h2>
|
||
<p>This material is based upon work supported by the National Science Foundation
|
||
under Grants No. CNS-1345049 and CNS-0959138. Any opinions, findings, and
|
||
conclusions or recommendations expressed in this material are those of the
|
||
author(s) and do not necessarily reflect the views of the National Science
|
||
Foundation.</p>
|
||
<p>We thank Alyssa Coghlan, Daniel Holth, Donald Stufft, and the distutils-sig
|
||
community in general for helping us to think about how to usably and
|
||
efficiently integrate TUF with PyPI.</p>
|
||
<p>Roger Dingledine, Sebastian Hahn, Nick Mathewson, Martin Peck and Justin Samuel
|
||
helped us to design TUF from its predecessor, Thandy of the Tor project.</p>
|
||
<p>We appreciate the efforts of Konstantin Andrianov, Geremy Condra, Zane Fisher,
|
||
Justin Samuel, Tian Tian, Santiago Torres, John Ward, and Yuyu Zheng in
|
||
developing TUF.</p>
|
||
<p>Vladimir Diaz, Monzur Muhammad, Sai Teja Peddinti, Sumana Harihareswara,
|
||
Ee Durbin and Dustin Ingram helped us to review this PEP.</p>
|
||
<p>Zane Fisher helped us to review and transcribe this PEP.</p>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document has been placed in the public domain.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0458.rst">https://github.com/python/peps/blob/main/peps/pep-0458.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0458.rst">2023-10-11 12:05:51 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#proposed-tuf-integration">Proposed TUF Integration</a></li>
|
||
<li><a class="reference internal" href="#non-goals">Non-goals</a></li>
|
||
<li><a class="reference internal" href="#pep-status">PEP Status</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#threat-model">Threat Model</a></li>
|
||
<li><a class="reference internal" href="#definitions">Definitions</a></li>
|
||
<li><a class="reference internal" href="#overview-of-tuf">Overview of TUF</a></li>
|
||
<li><a class="reference internal" href="#integrating-pypi-with-tuf">Integrating PyPI with TUF</a><ul>
|
||
<li><a class="reference internal" href="#what-additional-repository-files-are-required-on-pypi">What Additional Repository Files are Required on PyPI?</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#pypi-and-tuf-metadata">PyPI and TUF Metadata</a><ul>
|
||
<li><a class="reference internal" href="#signing-metadata-and-repository-management">Signing Metadata and Repository Management</a></li>
|
||
<li><a class="reference internal" href="#how-to-establish-initial-trust-in-the-pypi-root-keys">How to Establish Initial Trust in the PyPI Root Keys</a></li>
|
||
<li><a class="reference internal" href="#minimum-security-model">Minimum Security Model</a></li>
|
||
<li><a class="reference internal" href="#metadata-expiry-times">Metadata Expiry Times</a></li>
|
||
<li><a class="reference internal" href="#metadata-scalability">Metadata Scalability</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#pypi-and-key-requirements">PyPI and Key Requirements</a><ul>
|
||
<li><a class="reference internal" href="#number-and-type-of-keys-recommended">Number and Type Of Keys Recommended</a></li>
|
||
<li><a class="reference internal" href="#managing-online-keys">Managing online keys</a></li>
|
||
<li><a class="reference internal" href="#managing-offline-keys">Managing offline keys</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#how-should-metadata-be-generated">How Should Metadata be Generated?</a><ul>
|
||
<li><a class="reference internal" href="#consistent-snapshots">Consistent Snapshots</a></li>
|
||
<li><a class="reference internal" href="#producing-consistent-snapshots">Producing Consistent Snapshots</a></li>
|
||
<li><a class="reference internal" href="#cleaning-up-old-metadata">Cleaning up old metadata</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#revoking-trust-in-projects-and-distributions">Revoking Trust in Projects and Distributions</a></li>
|
||
<li><a class="reference internal" href="#key-compromise-analysis">Key Compromise Analysis</a><ul>
|
||
<li><a class="reference internal" href="#in-the-event-of-a-key-compromise">In the Event of a Key Compromise</a></li>
|
||
<li><a class="reference internal" href="#auditing-snapshots">Auditing Snapshots</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#managing-future-changes-to-the-update-process">Managing Future Changes to the Update Process</a><ul>
|
||
<li><a class="reference internal" href="#hash-algorithm-transition-plan">Hash Algorithm Transition Plan</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#appendix-a-repository-attacks-prevented-by-tuf">Appendix A: Repository Attacks Prevented by TUF</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#acknowledgements">Acknowledgements</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0458.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |