mirror of https://github.com/python/peps
1305 lines
123 KiB
HTML
1305 lines
123 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 635 – Structural Pattern Matching: Motivation and Rationale | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0635/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 635 – Structural Pattern Matching: Motivation and Rationale | peps.python.org'>
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0635/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="Python Enhancement Proposals (PEPs)">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 635</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 635 – Structural Pattern Matching: Motivation and Rationale</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Tobias Kohn <kohnt at tobiaskohn.ch>,
|
||
Guido van Rossum <guido at python.org></dd>
|
||
<dt class="field-even">BDFL-Delegate<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p></p></dd>
|
||
<dt class="field-odd">Discussions-To<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><a class="reference external" href="https://mail.python.org/archives/list/python-dev@python.org/">Python-Dev list</a></dd>
|
||
<dt class="field-even">Status<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-odd">Type<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Non-normative PEP containing background, guidelines or other information relevant to the Python ecosystem">Informational</abbr></dd>
|
||
<dt class="field-even">Created<span class="colon">:</span></dt>
|
||
<dd class="field-even">12-Sep-2020</dd>
|
||
<dt class="field-odd">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-odd">3.10</dd>
|
||
<dt class="field-even">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-even">22-Oct-2020, 08-Feb-2021</dd>
|
||
<dt class="field-odd">Resolution<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><a class="reference external" href="https://mail.python.org/archives/list/python-committers@python.org/message/SQC2FTLFV5A7DV7RCEAR2I2IKJKGK7W3">Python-Committers message</a></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a><ul>
|
||
<li><a class="reference internal" href="#pattern-matching-and-oo">Pattern Matching and OO</a></li>
|
||
<li><a class="reference internal" href="#patterns-and-functional-style">Patterns and Functional Style</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a><ul>
|
||
<li><a class="reference internal" href="#overview-and-terminology">Overview and Terminology</a></li>
|
||
<li><a class="reference internal" href="#the-match-statement">The Match Statement</a><ul>
|
||
<li><a class="reference internal" href="#match-semantics">Match Semantics</a></li>
|
||
<li><a class="reference internal" href="#guards">Guards</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#patterns">Patterns</a><ul>
|
||
<li><a class="reference internal" href="#as-patterns">AS Patterns</a></li>
|
||
<li><a class="reference internal" href="#or-patterns">OR Patterns</a></li>
|
||
<li><a class="reference internal" href="#literal-patterns">Literal Patterns</a></li>
|
||
<li><a class="reference internal" href="#capture-patterns">Capture Patterns</a></li>
|
||
<li><a class="reference internal" href="#wildcard-pattern">Wildcard Pattern</a></li>
|
||
<li><a class="reference internal" href="#value-patterns">Value Patterns</a></li>
|
||
<li><a class="reference internal" href="#group-patterns">Group Patterns</a></li>
|
||
<li><a class="reference internal" href="#sequence-patterns">Sequence Patterns</a></li>
|
||
<li><a class="reference internal" href="#mapping-patterns">Mapping Patterns</a></li>
|
||
<li><a class="reference internal" href="#class-patterns">Class Patterns</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#history-and-context">History and Context</a></li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#security-implications">Security Implications</a></li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>This PEP provides the motivation and rationale for <a class="pep reference internal" href="../pep-0634/" title="PEP 634 – Structural Pattern Matching: Specification">PEP 634</a>
|
||
(“Structural Pattern Matching: Specification”). First-time readers
|
||
are encouraged to start with <a class="pep reference internal" href="../pep-0636/" title="PEP 636 – Structural Pattern Matching: Tutorial">PEP 636</a>, which provides a gentler
|
||
introduction to the concepts, syntax and semantics of patterns.</p>
|
||
</section>
|
||
<section id="motivation">
|
||
<h2><a class="toc-backref" href="#motivation" role="doc-backlink">Motivation</a></h2>
|
||
<p>(Structural) pattern matching syntax is found in many languages, from
|
||
Haskell, Erlang and Scala to Elixir and Ruby. (A proposal for
|
||
JavaScript is also under consideration.)</p>
|
||
<p>Python already supports a limited form of this through sequence
|
||
unpacking assignments, which the new proposal leverages.</p>
|
||
<p>Several other common Python idioms are also relevant:</p>
|
||
<ul>
|
||
<li>The <code class="docutils literal notranslate"><span class="pre">if</span> <span class="pre">...</span> <span class="pre">elif</span> <span class="pre">...</span> <span class="pre">elif</span> <span class="pre">...</span> <span class="pre">else</span></code> idiom is often used to find
|
||
out the type or shape of an object in an ad-hoc fashion, using one
|
||
or more checks like <code class="docutils literal notranslate"><span class="pre">isinstance(x,</span> <span class="pre">cls)</span></code>, <code class="docutils literal notranslate"><span class="pre">hasattr(x,</span> <span class="pre">"attr")</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">len(x)</span> <span class="pre">==</span> <span class="pre">n</span></code> or <code class="docutils literal notranslate"><span class="pre">"key"</span> <span class="pre">in</span> <span class="pre">x</span></code> as guards to select an applicable
|
||
block. The block can then assume <code class="docutils literal notranslate"><span class="pre">x</span></code> supports the interface
|
||
checked by the guard. For example:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||
<span class="n">host</span><span class="p">,</span> <span class="n">port</span> <span class="o">=</span> <span class="n">x</span>
|
||
<span class="n">mode</span> <span class="o">=</span> <span class="s2">"http"</span>
|
||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
|
||
<span class="n">host</span><span class="p">,</span> <span class="n">port</span><span class="p">,</span> <span class="n">mode</span> <span class="o">=</span> <span class="n">x</span>
|
||
<span class="c1"># Etc.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Code like this is more elegantly rendered using <code class="docutils literal notranslate"><span class="pre">match</span></code>:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">match</span> <span class="n">x</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="n">host</span><span class="p">,</span> <span class="n">port</span><span class="p">:</span>
|
||
<span class="n">mode</span> <span class="o">=</span> <span class="s2">"http"</span>
|
||
<span class="k">case</span> <span class="n">host</span><span class="p">,</span> <span class="n">port</span><span class="p">,</span> <span class="n">mode</span><span class="p">:</span>
|
||
<span class="k">pass</span>
|
||
<span class="c1"># Etc.</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li>AST traversal code often looks for nodes matching a given pattern,
|
||
for example the code to detect a node of the shape “A + B * C” might
|
||
look like this:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">if</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">BinOp</span><span class="p">)</span> <span class="ow">and</span> <span class="n">node</span><span class="o">.</span><span class="n">op</span> <span class="o">==</span> <span class="s2">"+"</span>
|
||
<span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="o">.</span><span class="n">right</span><span class="p">,</span> <span class="n">BinOp</span><span class="p">)</span> <span class="ow">and</span> <span class="n">node</span><span class="o">.</span><span class="n">right</span><span class="o">.</span><span class="n">op</span> <span class="o">==</span> <span class="s2">"*"</span><span class="p">):</span>
|
||
<span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span> <span class="o">=</span> <span class="n">node</span><span class="o">.</span><span class="n">left</span><span class="p">,</span> <span class="n">node</span><span class="o">.</span><span class="n">right</span><span class="o">.</span><span class="n">left</span><span class="p">,</span> <span class="n">node</span><span class="o">.</span><span class="n">right</span><span class="o">.</span><span class="n">right</span>
|
||
<span class="c1"># Handle a + b*c</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Using <code class="docutils literal notranslate"><span class="pre">match</span></code> this becomes more readable:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">match</span> <span class="n">node</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="n">BinOp</span><span class="p">(</span><span class="s2">"+"</span><span class="p">,</span> <span class="n">a</span><span class="p">,</span> <span class="n">BinOp</span><span class="p">(</span><span class="s2">"*"</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)):</span>
|
||
<span class="c1"># Handle a + b*c</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
<p>We believe that adding pattern matching to Python will enable Python
|
||
users to write cleaner, more readable code for examples like those
|
||
above, and many others.</p>
|
||
<p>For a more academic discussion to this proposal, see <a class="footnote-reference brackets" href="#id6" id="id1">[1]</a>.</p>
|
||
<section id="pattern-matching-and-oo">
|
||
<h3><a class="toc-backref" href="#pattern-matching-and-oo" role="doc-backlink">Pattern Matching and OO</a></h3>
|
||
<p>Pattern matching is complimentary to the object-oriented paradigm.
|
||
Using OO and inheritance we can easily define a method on a base class
|
||
that defines default behavior for a specific operation on that class,
|
||
and we can override this default behavior in subclasses. We can also
|
||
use the Visitor pattern to separate actions from data.</p>
|
||
<p>But this is not sufficient for all situations. For example, a code
|
||
generator may consume an AST, and have many operations where the
|
||
generated code needs to vary based not just on the class of a node,
|
||
but also on the value of some class attributes, like the <code class="docutils literal notranslate"><span class="pre">BinOp</span></code>
|
||
example above. The Visitor pattern is insufficiently flexible for
|
||
this: it can only select based on the class.</p>
|
||
<p>See a <a class="reference external" href="https://github.com/gvanrossum/patma/blob/be5969442d0584005492134c3b24eea408709db2/examples/expr.py#L231">complete example</a>.</p>
|
||
<p>Like the Visitor pattern, pattern matching allows for a strict separation
|
||
of concerns: specific actions or data processing is independent of the
|
||
class hierarchy or manipulated objects. When dealing with predefined or
|
||
even built-in classes, in particular, it is often impossible to add further
|
||
methods to the individual classes. Pattern matching not only relieves the
|
||
programmer or class designer from the burden of the boilerplate code needed
|
||
for the Visitor pattern, but is also flexible enough to directly work with
|
||
built-in types. It naturally distinguishes between sequences of different
|
||
lengths, which might all share the same class despite obviously differing
|
||
structures. Moreover, pattern matching automatically takes inheritance
|
||
into account: a class <em>D</em> inheriting from <em>C</em> will be handled by a pattern
|
||
that targets <em>C</em> by default.</p>
|
||
<p>Object oriented programming is geared towards single-dispatch: it is a
|
||
single instance (or the type thereof) that determines which method is to
|
||
be called. This leads to a somewhat artificial situation in case of binary
|
||
operators where both objects might play an equal role in deciding which
|
||
implementation to use (Python addresses this through the use of reversed
|
||
binary methods). Pattern matching is structurally better suited to handle
|
||
such situations of multi-dispatch, where the action to be taken depends on
|
||
the types of several objects to equal parts.</p>
|
||
</section>
|
||
<section id="patterns-and-functional-style">
|
||
<h3><a class="toc-backref" href="#patterns-and-functional-style" role="doc-backlink">Patterns and Functional Style</a></h3>
|
||
<p>Many Python applications and libraries are not written in a consistent
|
||
OO style – unlike Java, Python encourages defining functions at the
|
||
top-level of a module, and for simple data structures, tuples (or
|
||
named tuples or lists) and dictionaries are often used exclusively or
|
||
mixed with classes or data classes.</p>
|
||
<p>Pattern matching is particularly suitable for picking apart such data
|
||
structures. As an extreme example, it’s easy to write code that picks
|
||
a JSON data structure using <code class="docutils literal notranslate"><span class="pre">match</span></code>:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">match</span> <span class="n">json_pet</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="p">{</span><span class="s2">"type"</span><span class="p">:</span> <span class="s2">"cat"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">:</span> <span class="n">name</span><span class="p">,</span> <span class="s2">"pattern"</span><span class="p">:</span> <span class="n">pattern</span><span class="p">}:</span>
|
||
<span class="k">return</span> <span class="n">Cat</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">pattern</span><span class="p">)</span>
|
||
<span class="k">case</span> <span class="p">{</span><span class="s2">"type"</span><span class="p">:</span> <span class="s2">"dog"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">:</span> <span class="n">name</span><span class="p">,</span> <span class="s2">"breed"</span><span class="p">:</span> <span class="n">breed</span><span class="p">}:</span>
|
||
<span class="k">return</span> <span class="n">Dog</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">breed</span><span class="p">)</span>
|
||
<span class="k">case</span><span class="w"> </span><span class="k">_</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Not a suitable pet"</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Functional programming generally prefers a declarative style with a focus
|
||
on relationships in data. Side effects are avoided whenever possible.
|
||
Pattern matching thus naturally fits and highly supports functional
|
||
programming style.</p>
|
||
</section>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>This section provides the rationale for individual design decisions.
|
||
It takes the place of “Rejected ideas” in the standard PEP format.
|
||
It is organized in sections corresponding to the specification (<a class="pep reference internal" href="../pep-0634/" title="PEP 634 – Structural Pattern Matching: Specification">PEP 634</a>).</p>
|
||
<section id="overview-and-terminology">
|
||
<h3><a class="toc-backref" href="#overview-and-terminology" role="doc-backlink">Overview and Terminology</a></h3>
|
||
<p>Much of the power of pattern matching comes from the nesting of subpatterns.
|
||
That the success of a pattern match depends directly on the success of
|
||
subpattern is thus a cornerstone of the design. However, although a
|
||
pattern like <code class="docutils literal notranslate"><span class="pre">P(Q(),</span> <span class="pre">R())</span></code> succeeds only if both subpatterns <code class="docutils literal notranslate"><span class="pre">Q()</span></code>
|
||
and <code class="docutils literal notranslate"><span class="pre">R()</span></code> succeed (i.e. the success of pattern <code class="docutils literal notranslate"><span class="pre">P</span></code> depends on <code class="docutils literal notranslate"><span class="pre">Q</span></code>
|
||
and <code class="docutils literal notranslate"><span class="pre">R</span></code>), the pattern <code class="docutils literal notranslate"><span class="pre">P</span></code> is checked first. If <code class="docutils literal notranslate"><span class="pre">P</span></code> fails, neither
|
||
<code class="docutils literal notranslate"><span class="pre">Q()</span></code> nor <code class="docutils literal notranslate"><span class="pre">R()</span></code> will be tried (this is a direct consequence of the
|
||
fact that if <code class="docutils literal notranslate"><span class="pre">P</span></code> fails, there are no subjects to match against <code class="docutils literal notranslate"><span class="pre">Q()</span></code>
|
||
and <code class="docutils literal notranslate"><span class="pre">R()</span></code> in the first place).</p>
|
||
<p>Also note that patterns bind names to values rather than performing an
|
||
assignment. This reflects the fact that patterns aim to not have side
|
||
effects, which also means that Capture or AS patterns cannot assign a
|
||
value to an attribute or subscript. We thus consistently use the term
|
||
‘bind’ instead of ‘assign’ to emphasise this subtle difference between
|
||
traditional assignments and name binding in patterns.</p>
|
||
</section>
|
||
<section id="the-match-statement">
|
||
<h3><a class="toc-backref" href="#the-match-statement" role="doc-backlink">The Match Statement</a></h3>
|
||
<p>The match statement evaluates an expression to produce a subject, finds the
|
||
first pattern that matches the subject, and executes the associated block
|
||
of code. Syntactically, the match statement thus takes an expression and
|
||
a sequence of case clauses, where each case clause comprises a pattern and
|
||
a block of code.</p>
|
||
<p>Since case clauses comprise a block of code, they adhere to the existing
|
||
indentation scheme with the syntactic structure of
|
||
<code class="docutils literal notranslate"><span class="pre"><keyword></span> <span class="pre">...:</span> <span class="pre"><(indented)</span> <span class="pre">block></span></code>, which resembles a compound
|
||
statement. The keyword <code class="docutils literal notranslate"><span class="pre">case</span></code> reflects its widespread use in
|
||
pattern matching languages, ignoring those languages that use other
|
||
syntactic means such as a symbol like <code class="docutils literal notranslate"><span class="pre">|</span></code>, because it would not fit
|
||
established Python structures. The syntax of patterns following the
|
||
keyword is discussed below.</p>
|
||
<p>Given that the case clauses follow the structure of a compound statement,
|
||
the match statement itself naturally becomes a compound statement itself
|
||
as well, following the same syntactic structure. This naturally leads to
|
||
<code class="docutils literal notranslate"><span class="pre">match</span> <span class="pre"><expr>:</span> <span class="pre"><case_clause>+</span></code>. Note that the match statement determines
|
||
a quasi-scope in which the evaluated subject is kept alive (although not in
|
||
a local variable), similar to how a with statement might keep a resource
|
||
alive during execution of its block. Furthermore, control flows from the
|
||
match statement to a case clause and then leaves the block of the match
|
||
statement. The block of the match statement thus has both syntactic and
|
||
semantic meaning.</p>
|
||
<p>Various suggestions have sought to eliminate or avoid the naturally arising
|
||
“double indentation” of a case clause’s code block. Unfortunately, all such
|
||
proposals of <em>flat indentation schemes</em> come at the expense of violating
|
||
Python’s established structural paradigm, leading to additional syntactic
|
||
rules:</p>
|
||
<ul>
|
||
<li><em>Unindented case clauses.</em>
|
||
The idea is to align case clauses with the <code class="docutils literal notranslate"><span class="pre">match</span></code>, i.e.:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">match</span> <span class="n">expression</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="n">pattern_1</span><span class="p">:</span>
|
||
<span class="o">...</span>
|
||
<span class="k">case</span> <span class="n">pattern_2</span><span class="p">:</span>
|
||
<span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This may look awkward to the eye of a Python programmer, because
|
||
everywhere else a colon is followed by an indent. The <code class="docutils literal notranslate"><span class="pre">match</span></code> would
|
||
neither follow the syntactic scheme of simple nor composite statements
|
||
but rather establish a category of its own.</p>
|
||
</li>
|
||
<li><em>Putting the expression on a separate line after “match”.</em>
|
||
The idea is to use the expression yielding the subject as a statement
|
||
to avoid the singularity of <code class="docutils literal notranslate"><span class="pre">match</span></code> having no actual block despite
|
||
the colons:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">match</span><span class="p">:</span>
|
||
<span class="n">expression</span>
|
||
<span class="k">case</span> <span class="n">pattern_1</span><span class="p">:</span>
|
||
<span class="o">...</span>
|
||
<span class="k">case</span> <span class="n">pattern_2</span><span class="p">:</span>
|
||
<span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This was ultimately rejected because the first block would be another
|
||
novelty in Python’s grammar: a block whose only content is a single
|
||
expression rather than a sequence of statements. Attempts to amend this
|
||
issue by adding or repurposing yet another keyword along the lines of
|
||
<code class="docutils literal notranslate"><span class="pre">match:</span> <span class="pre">return</span> <span class="pre">expression</span></code> did not yield any satisfactory solution.</p>
|
||
</li>
|
||
</ul>
|
||
<p>Although flat indentation would save some horizontal space, the cost of
|
||
increased complexity or unusual rules is too high. It would also complicate
|
||
life for simple-minded code editors. Finally, the horizontal space issue can
|
||
be alleviated by allowing “half-indent” (i.e. two spaces instead of four)
|
||
for match statements (though we do not recommend this).</p>
|
||
<p>In sample programs using <code class="docutils literal notranslate"><span class="pre">match</span></code>, written as part of the development of this
|
||
PEP, a noticeable improvement in code brevity is observed, more than making
|
||
up for the additional indentation level.</p>
|
||
<p><em>Statement vs. Expression.</em> Some suggestions centered around the idea of
|
||
making <code class="docutils literal notranslate"><span class="pre">match</span></code> an expression rather than a statement. However, this
|
||
would fit poorly with Python’s statement-oriented nature and lead to
|
||
unusually long and complex expressions and the need to invent new
|
||
syntactic constructs or break well established syntactic rules. An
|
||
obvious consequence of <code class="docutils literal notranslate"><span class="pre">match</span></code> as an expression would be that case
|
||
clauses could no longer have arbitrary blocks of code attached, but only
|
||
a single expression. Overall, the strong limitations could in no way
|
||
offset the slight simplification in some special use cases.</p>
|
||
<p><em>Hard vs. Soft Keyword.</em> There were options to make match a hard keyword,
|
||
or choose a different keyword. Although using a hard keyword would simplify
|
||
life for simple-minded syntax highlighters, we decided not to use hard
|
||
keyword for several reasons:</p>
|
||
<ul class="simple">
|
||
<li>Most importantly, the new parser doesn’t require us to do this. Unlike
|
||
with <code class="docutils literal notranslate"><span class="pre">async</span></code> that caused hardships with being a soft keyword for few
|
||
releases, here we can make <code class="docutils literal notranslate"><span class="pre">match</span></code> a permanent soft keyword.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">match</span></code> is so commonly used in existing code, that it would break
|
||
almost every existing program and will put a burden to fix code on many
|
||
people who may not even benefit from the new syntax.</li>
|
||
<li>It is hard to find an alternative keyword that would not be commonly used
|
||
in existing programs as an identifier, and would still clearly reflect the
|
||
meaning of the statement.</li>
|
||
</ul>
|
||
<p><strong>Use “as” or “|” instead of “case” for case clauses.</strong>
|
||
The pattern matching proposed here is a combination of multi-branch control
|
||
flow (in line with <code class="docutils literal notranslate"><span class="pre">switch</span></code> in Algol-derived languages or <code class="docutils literal notranslate"><span class="pre">cond</span></code> in Lisp)
|
||
and object-deconstruction as found in functional languages. While the proposed
|
||
keyword <code class="docutils literal notranslate"><span class="pre">case</span></code> highlights the multi-branch aspect, alternative keywords such
|
||
as <code class="docutils literal notranslate"><span class="pre">as</span></code> would equally be possible, highlighting the deconstruction aspect.
|
||
<code class="docutils literal notranslate"><span class="pre">as</span></code> or <code class="docutils literal notranslate"><span class="pre">with</span></code>, for instance, also have the advantage of already being
|
||
keywords in Python. However, since <code class="docutils literal notranslate"><span class="pre">case</span></code> as a keyword can only occur as a
|
||
leading keyword inside a <code class="docutils literal notranslate"><span class="pre">match</span></code> statement, it is easy for a parser to
|
||
distinguish between its use as a keyword or as a variable.</p>
|
||
<p>Other variants would use a symbol like <code class="docutils literal notranslate"><span class="pre">|</span></code> or <code class="docutils literal notranslate"><span class="pre">=></span></code>, or go entirely without
|
||
special marker.</p>
|
||
<p>Since Python is a statement-oriented language in the tradition of Algol, and as
|
||
each composite statement starts with an identifying keyword, <code class="docutils literal notranslate"><span class="pre">case</span></code> seemed to
|
||
be most in line with Python’s style and traditions.</p>
|
||
<section id="match-semantics">
|
||
<h4><a class="toc-backref" href="#match-semantics" role="doc-backlink">Match Semantics</a></h4>
|
||
<p>The patterns of different case clauses might overlap in that more than
|
||
one case clause would match a given subject. The first-to-match rule
|
||
ensures that the selection of a case clause for a given subject is
|
||
unambiguous. Furthermore, case clauses can have increasingly general
|
||
patterns matching wider sets of subjects. The first-to-match rule
|
||
then ensures that the most precise pattern can be chosen (although it
|
||
is the programmer’s responsibility to order the case clauses correctly).</p>
|
||
<p>In a statically typed language, the match statement would be compiled to
|
||
a decision tree to select a matching pattern quickly and very efficiently.
|
||
This would, however, require that all patterns be purely declarative and
|
||
static, running against the established dynamic semantics of Python. The
|
||
proposed semantics thus represent a path incorporating the best of both
|
||
worlds: patterns are tried in a strictly sequential order so that each
|
||
case clause constitutes an actual statement. At the same time, we allow
|
||
the interpreter to cache any information about the subject or change the
|
||
order in which subpatterns are tried. In other words: if the interpreter
|
||
has found that the subject is not an instance of a class <code class="docutils literal notranslate"><span class="pre">C</span></code>, it can
|
||
directly skip case clauses testing for this again, without having to
|
||
perform repeated instance-checks. If a guard stipulates that a variable
|
||
<code class="docutils literal notranslate"><span class="pre">x</span></code> must be positive, say (i.e. <code class="docutils literal notranslate"><span class="pre">if</span> <span class="pre">x</span> <span class="pre">></span> <span class="pre">0</span></code>), the interpreter might
|
||
check this directly after binding <code class="docutils literal notranslate"><span class="pre">x</span></code> and before any further
|
||
subpatterns are considered.</p>
|
||
<p><em>Binding and scoping.</em> In many pattern matching implementations, each
|
||
case clause would establish a separate scope of its own. Variables bound
|
||
by a pattern would then only be visible inside the corresponding case block.
|
||
In Python, however, this does not make sense. Establishing separate scopes
|
||
would essentially mean that each case clause is a separate function without
|
||
direct access to the variables in the surrounding scope (without having to
|
||
resort to <code class="docutils literal notranslate"><span class="pre">nonlocal</span></code> that is). Moreover, a case clause could no longer
|
||
influence any surrounding control flow through standard statement such as
|
||
<code class="docutils literal notranslate"><span class="pre">return</span></code> or <code class="docutils literal notranslate"><span class="pre">break</span></code>. Hence, such strict scoping would lead to
|
||
unintuitive and surprising behavior.</p>
|
||
<p>A direct consequence of this is that any variable bindings outlive the
|
||
respective case or match statements. Even patterns that only match a
|
||
subject partially might bind local variables (this is, in fact, necessary
|
||
for guards to function properly). However, these semantics for variable
|
||
binding are in line with existing Python structures such as for loops and
|
||
with statements.</p>
|
||
</section>
|
||
<section id="guards">
|
||
<h4><a class="toc-backref" href="#guards" role="doc-backlink">Guards</a></h4>
|
||
<p>Some constraints cannot be adequately expressed through patterns alone.
|
||
For instance, a ‘less’ or ‘greater than’ relationship defies the usual
|
||
‘equal’ semantics of patterns. Moreover, different subpatterns are
|
||
independent and cannot refer to each other. The addition of <em>guards</em>
|
||
addresses these restrictions: a guard is an arbitrary expression attached
|
||
to a pattern and that must evaluate to a “truthy” value for the pattern to succeed.</p>
|
||
<p>For example, <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">[x,</span> <span class="pre">y]</span> <span class="pre">if</span> <span class="pre">x</span> <span class="pre"><</span> <span class="pre">y:</span></code> uses a guard (<code class="docutils literal notranslate"><span class="pre">if</span> <span class="pre">x</span> <span class="pre"><</span> <span class="pre">y</span></code>) to
|
||
express a ‘less than’ relationship between two otherwise disjoint capture
|
||
patterns <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code>.</p>
|
||
<p>From a conceptual point of view, patterns describe structural constraints
|
||
on the subject in a declarative style, ideally without any side-effects.
|
||
Recall, in particular, that patterns are clearly distinct from expressions,
|
||
following different objectives and semantics. Guards then enhance case
|
||
blocks in a highly controlled way with arbitrary expressions (that might
|
||
have side effects). Splitting the overall functionality into a static structural
|
||
and a dynamically evaluated part not only helps with readability, but can
|
||
also introduce dramatic potential for compiler optimizations. To keep this
|
||
clear separation, guards are only supported on the level of case clauses
|
||
and not for individual patterns.</p>
|
||
<p><strong>Example</strong> using guards:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">sort</span><span class="p">(</span><span class="n">seq</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">seq</span><span class="p">:</span>
|
||
<span class="k">case</span><span class="w"> </span><span class="p">[]</span> <span class="o">|</span> <span class="p">[</span><span class="k">_</span><span class="p">]:</span>
|
||
<span class="k">return</span> <span class="n">seq</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">]</span> <span class="k">if</span> <span class="n">x</span> <span class="o"><=</span> <span class="n">y</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">seq</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">]:</span>
|
||
<span class="k">return</span> <span class="p">[</span><span class="n">y</span><span class="p">,</span> <span class="n">x</span><span class="p">]</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">z</span><span class="p">]</span> <span class="k">if</span> <span class="n">x</span> <span class="o"><=</span> <span class="n">y</span> <span class="o"><=</span> <span class="n">z</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">seq</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">z</span><span class="p">]</span> <span class="k">if</span> <span class="n">x</span> <span class="o">>=</span> <span class="n">y</span> <span class="o">>=</span> <span class="n">z</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="p">[</span><span class="n">z</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">x</span><span class="p">]</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">p</span><span class="p">,</span> <span class="o">*</span><span class="n">rest</span><span class="p">]:</span>
|
||
<span class="n">a</span> <span class="o">=</span> <span class="n">sort</span><span class="p">([</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">rest</span> <span class="k">if</span> <span class="n">x</span> <span class="o"><=</span> <span class="n">p</span><span class="p">])</span>
|
||
<span class="n">b</span> <span class="o">=</span> <span class="n">sort</span><span class="p">([</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">rest</span> <span class="k">if</span> <span class="n">p</span> <span class="o"><</span> <span class="n">x</span><span class="p">])</span>
|
||
<span class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span class="p">[</span><span class="n">p</span><span class="p">]</span> <span class="o">+</span> <span class="n">b</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="patterns">
|
||
<span id="id2"></span><h3><a class="toc-backref" href="#patterns" role="doc-backlink">Patterns</a></h3>
|
||
<p>Patterns fulfill two purposes: they impose (structural) constraints on
|
||
the subject and they specify which data values should be extracted from
|
||
the subject and bound to variables. In iterable unpacking, which can be
|
||
seen as a prototype to pattern matching in Python, there is only one
|
||
<em>structural pattern</em> to express sequences while there is a rich set of
|
||
<em>binding patterns</em> to assign a value to a specific variable or field.
|
||
Full pattern matching differs from this in that there is more variety
|
||
in structural patterns but only a minimum of binding patterns.</p>
|
||
<p>Patterns differ from assignment targets (as in iterable unpacking) in two ways:
|
||
they impose additional constraints on the structure of the subject, and
|
||
a subject may safely fail to match a specific pattern at any point
|
||
(in iterable unpacking, this constitutes an error). The latter means that
|
||
pattern should avoid side effects wherever possible.</p>
|
||
<p>This desire to avoid side effects is one reason why capture patterns
|
||
don’t allow binding values to attributes or subscripts: if the
|
||
containing pattern were to fail in a later step, it would be hard to
|
||
revert such bindings.</p>
|
||
<p>A cornerstone of pattern matching is the possibility of arbitrarily
|
||
<em>nesting patterns</em>. The nesting allows expressing deep
|
||
tree structures (for an example of nested class patterns, see the motivation
|
||
section above) as well as alternatives.</p>
|
||
<p>Although patterns might superficially look like expressions,
|
||
it is important to keep in mind that there is a clear distinction. In fact,
|
||
no pattern is or contains an expression. It is more productive to think of
|
||
patterns as declarative elements similar to the formal parameters in a
|
||
function definition.</p>
|
||
<section id="as-patterns">
|
||
<h4><a class="toc-backref" href="#as-patterns" role="doc-backlink">AS Patterns</a></h4>
|
||
<p>Patterns fall into two categories: most patterns impose a (structural)
|
||
constraint that the subject needs to fulfill, whereas the capture pattern
|
||
binds the subject to a name without regard for the subject’s structure or
|
||
actual value. Consequently, a pattern can either express a constraint or
|
||
bind a value, but not both. AS patterns fill this gap in that they
|
||
allow the user to specify a general pattern as well as capture the subject
|
||
in a variable.</p>
|
||
<p>Typical use cases for the AS pattern include OR and Class patterns
|
||
together with a binding name as in, e.g., <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">BinOp('+'|'-'</span> <span class="pre">as</span> <span class="pre">op,</span> <span class="pre">...):</span></code>
|
||
or <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">[int()</span> <span class="pre">as</span> <span class="pre">first,</span> <span class="pre">int()</span> <span class="pre">as</span> <span class="pre">second]:</span></code>. The latter could be
|
||
understood as saying that the subject must fulfil two distinct pattern:
|
||
<code class="docutils literal notranslate"><span class="pre">[first,</span> <span class="pre">second]</span></code> as well as <code class="docutils literal notranslate"><span class="pre">[int(),</span> <span class="pre">int()]</span></code>. The AS pattern
|
||
can thus be seen as a special case of an ‘and’ pattern (see OR patterns
|
||
below for an additional discussion of ‘and’ patterns).</p>
|
||
<p>In an earlier version, the AS pattern was devised as a ‘Walrus pattern’,
|
||
written as <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">[first:=int(),</span> <span class="pre">second:=int()]</span></code>. However, using <code class="docutils literal notranslate"><span class="pre">as</span></code>
|
||
offers some advantages over <code class="docutils literal notranslate"><span class="pre">:=</span></code>:</p>
|
||
<ul class="simple">
|
||
<li>The walrus operator <code class="docutils literal notranslate"><span class="pre">:=</span></code> is used to capture the result of an expression
|
||
on the right hand side, whereas <code class="docutils literal notranslate"><span class="pre">as</span></code> generally indicates some form of
|
||
‘processing’ as in <code class="docutils literal notranslate"><span class="pre">import</span> <span class="pre">foo</span> <span class="pre">as</span> <span class="pre">bar</span></code> or <code class="docutils literal notranslate"><span class="pre">except</span> <span class="pre">E</span> <span class="pre">as</span> <span class="pre">err:</span></code>. Indeed,
|
||
the pattern <code class="docutils literal notranslate"><span class="pre">P</span> <span class="pre">as</span> <span class="pre">x</span></code> does not assign the pattern <code class="docutils literal notranslate"><span class="pre">P</span></code> to <code class="docutils literal notranslate"><span class="pre">x</span></code>, but
|
||
rather the subject that successfully matches <code class="docutils literal notranslate"><span class="pre">P</span></code>.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">as</span></code> allows for a more consistent data flow from left to right (the
|
||
attributes in Class patterns also follow a left-to-right data flow).</li>
|
||
<li>The walrus operator looks very similar to the syntax for matching attributes in the Class pattern,
|
||
potentially leading to some confusion.</li>
|
||
</ul>
|
||
<p><strong>Example</strong> using the AS pattern:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">simplify_expr</span><span class="p">(</span><span class="n">tokens</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">tokens</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="p">[(</span><span class="s1">'('</span><span class="o">|</span><span class="s1">'['</span><span class="p">)</span> <span class="k">as</span> <span class="n">l</span><span class="p">,</span> <span class="o">*</span><span class="n">expr</span><span class="p">,</span> <span class="p">(</span><span class="s1">')'</span><span class="o">|</span><span class="s1">']'</span><span class="p">)</span> <span class="k">as</span> <span class="n">r</span><span class="p">]</span> <span class="k">if</span> <span class="p">(</span><span class="n">l</span><span class="o">+</span><span class="n">r</span><span class="p">)</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'()'</span><span class="p">,</span> <span class="s1">'[]'</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">simplify_expr</span><span class="p">(</span><span class="n">expr</span><span class="p">)</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="p">(</span><span class="s1">'+'</span><span class="o">|</span><span class="s1">'-'</span><span class="p">)</span> <span class="k">as</span> <span class="n">op</span><span class="p">,</span> <span class="n">right</span><span class="p">]:</span>
|
||
<span class="k">return</span> <span class="n">UnaryOp</span><span class="p">(</span><span class="n">op</span><span class="p">,</span> <span class="n">right</span><span class="p">)</span>
|
||
<span class="k">case</span> <span class="p">[(</span><span class="nb">int</span><span class="p">()</span> <span class="o">|</span> <span class="nb">float</span><span class="p">()</span> <span class="k">as</span> <span class="n">left</span><span class="p">)</span> <span class="o">|</span> <span class="n">Num</span><span class="p">(</span><span class="n">left</span><span class="p">),</span> <span class="s1">'+'</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">()</span> <span class="o">|</span> <span class="nb">float</span><span class="p">()</span> <span class="k">as</span> <span class="n">right</span><span class="p">)</span> <span class="o">|</span> <span class="n">Num</span><span class="p">(</span><span class="n">right</span><span class="p">)]:</span>
|
||
<span class="k">return</span> <span class="n">Num</span><span class="p">(</span><span class="n">left</span> <span class="o">+</span> <span class="n">right</span><span class="p">)</span>
|
||
<span class="k">case</span> <span class="p">[(</span><span class="nb">int</span><span class="p">()</span> <span class="o">|</span> <span class="nb">float</span><span class="p">())</span> <span class="k">as</span> <span class="n">value</span><span class="p">]:</span>
|
||
<span class="k">return</span> <span class="n">Num</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="or-patterns">
|
||
<h4><a class="toc-backref" href="#or-patterns" role="doc-backlink">OR Patterns</a></h4>
|
||
<p>The OR pattern allows you to combine ‘structurally equivalent’ alternatives
|
||
into a new pattern, i.e. several patterns can share a common handler. If any
|
||
of an OR pattern’s subpatterns matches the subject, the entire OR
|
||
pattern succeeds.</p>
|
||
<p>Statically typed languages prohibit the binding of names (capture patterns)
|
||
inside an OR pattern because of potential conflicts concerning the types of
|
||
variables. As a dynamically typed language, Python can be less restrictive
|
||
here and allow capture patterns inside OR patterns. However, each subpattern
|
||
must bind the same set of variables so as not to leave potentially undefined
|
||
names. With two alternatives <code class="docutils literal notranslate"><span class="pre">P</span> <span class="pre">|</span> <span class="pre">Q</span></code>, this means that if <em>P</em> binds the
|
||
variables <em>u</em> and <em>v</em>, <em>Q</em> must bind exactly the same variables <em>u</em> and <em>v</em>.</p>
|
||
<p>There was some discussion on whether to use the bar symbol <code class="docutils literal notranslate"><span class="pre">|</span></code> or the <code class="docutils literal notranslate"><span class="pre">or</span></code>
|
||
keyword to separate alternatives. The OR pattern does not fully fit
|
||
the existing semantics and usage of either of these two symbols. However,
|
||
<code class="docutils literal notranslate"><span class="pre">|</span></code> is the symbol of choice in all programming languages with support of
|
||
the OR pattern and is used in that capacity for regular expressions in
|
||
Python as well. It is also the traditional separator between alternatives
|
||
in formal grammars (including Python’s).
|
||
Moreover, <code class="docutils literal notranslate"><span class="pre">|</span></code> is not only used for bitwise OR, but also
|
||
for set unions and dict merging (<a class="pep reference internal" href="../pep-0584/" title="PEP 584 – Add Union Operators To dict">PEP 584</a>).</p>
|
||
<p>Other alternatives were considered as well, but none of these would allow
|
||
OR-patterns to be nested inside other patterns:</p>
|
||
<ul>
|
||
<li><em>Using a comma</em>:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">case</span> <span class="mi">401</span><span class="p">,</span> <span class="mi">403</span><span class="p">,</span> <span class="mi">404</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Some HTTP error"</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This looks too much like a tuple – we would have to find a different way
|
||
to spell tuples, and the construct would have to be parenthesized inside
|
||
the argument list of a class pattern. In general, commas already have many
|
||
different meanings in Python, we shouldn’t add more.</p>
|
||
</li>
|
||
<li><em>Using stacked cases</em>:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">case</span> <span class="mi">401</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="mi">403</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="mi">404</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Some HTTP error"</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This is how this would be done in <em>C</em>, using its fall-through semantics
|
||
for cases. However, we don’t want to mislead people into thinking that
|
||
match/case uses fall-through semantics (which are a common source of bugs
|
||
in <em>C</em>). Also, this would be a novel indentation pattern, which might make
|
||
it harder to support in IDEs and such (it would break the simple rule “add
|
||
an indentation level after a line ending in a colon”). Finally, this
|
||
would not support OR patterns nested inside other patterns, either.</p>
|
||
</li>
|
||
<li><em>Using “case in” followed by a comma-separated list</em>:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">case</span> <span class="ow">in</span> <span class="mi">401</span><span class="p">,</span> <span class="mi">403</span><span class="p">,</span> <span class="mi">404</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Some HTTP error"</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This would not work for OR patterns nested inside other patterns, like:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">case</span> <span class="n">Point</span><span class="p">(</span><span class="mi">0</span><span class="o">|</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="o">|</span><span class="mi">1</span><span class="p">):</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s2">"A corner of the unit square"</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
<p><strong>AND and NOT Patterns</strong></p>
|
||
<p>Since this proposal defines an OR-pattern (<code class="docutils literal notranslate"><span class="pre">|</span></code>) to match one of several alternates,
|
||
why not also an AND-pattern (<code class="docutils literal notranslate"><span class="pre">&</span></code>) or even a NOT-pattern (<code class="docutils literal notranslate"><span class="pre">!</span></code>)?
|
||
Especially given that some other languages (<code class="docutils literal notranslate"><span class="pre">F#</span></code> for example) support
|
||
AND-patterns.</p>
|
||
<p>However, it is not clear how useful this would be. The semantics for matching
|
||
dictionaries, objects and sequences already incorporates an implicit ‘and’:
|
||
all attributes and elements mentioned must be present for the match to
|
||
succeed. Guard conditions can also support many of the use cases that a
|
||
hypothetical ‘and’ operator would be used for.</p>
|
||
<p>A negation of a match pattern using the operator <code class="docutils literal notranslate"><span class="pre">!</span></code> as a prefix
|
||
would match exactly if the pattern itself does not match. For
|
||
instance, <code class="docutils literal notranslate"><span class="pre">!(3</span> <span class="pre">|</span> <span class="pre">4)</span></code> would match anything except <code class="docutils literal notranslate"><span class="pre">3</span></code> or <code class="docutils literal notranslate"><span class="pre">4</span></code>.
|
||
However, there is <a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/2480360.2384582">evidence from other languages</a> that this is
|
||
rarely useful, and primarily used as double negation <code class="docutils literal notranslate"><span class="pre">!!</span></code> to control
|
||
variable scopes and prevent variable bindings (which does not apply to
|
||
Python). Other use cases are better expressed using guards.</p>
|
||
<p>In the end, it was decided that this would make the syntax more complex
|
||
without adding a significant benefit. It can always be added later.</p>
|
||
<p><strong>Example</strong> using the OR pattern:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">simplify</span><span class="p">(</span><span class="n">expr</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">expr</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="s1">'/'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">expr</span>
|
||
<span class="k">case</span><span class="w"> </span><span class="p">(</span><span class="s1">'*'</span><span class="o">|</span><span class="s1">'/'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="k">_</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="mi">0</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="s1">'+'</span><span class="o">|</span><span class="s1">'-'</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">|</span> <span class="p">(</span><span class="s1">'+'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span> <span class="o">|</span> <span class="p">(</span><span class="s1">'*'</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span> <span class="o">|</span> <span class="p">(</span><span class="s1">'*'</span><span class="o">|</span><span class="s1">'/'</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="mi">1</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">x</span>
|
||
<span class="k">return</span> <span class="n">expr</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="literal-patterns">
|
||
<span id="literal-pattern"></span><h4><a class="toc-backref" href="#literal-patterns" role="doc-backlink">Literal Patterns</a></h4>
|
||
<p>Literal patterns are a convenient way for imposing constraints on the
|
||
value of a subject, rather than its type or structure. They also
|
||
allow you to emulate a switch statement using pattern matching.</p>
|
||
<p>Generally, the subject is compared to a literal pattern by means of standard
|
||
equality (<code class="docutils literal notranslate"><span class="pre">x</span> <span class="pre">==</span> <span class="pre">y</span></code> in Python syntax). Consequently, the literal patterns
|
||
<code class="docutils literal notranslate"><span class="pre">1.0</span></code> and <code class="docutils literal notranslate"><span class="pre">1</span></code> match exactly the same set of objects, i.e. <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">1.0:</span></code>
|
||
and <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">1:</span></code> are fully interchangeable. In principle, <code class="docutils literal notranslate"><span class="pre">True</span></code> would also
|
||
match the same set of objects because <code class="docutils literal notranslate"><span class="pre">True</span> <span class="pre">==</span> <span class="pre">1</span></code> holds. However, we
|
||
believe that many users would be surprised finding that <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">True:</span></code>
|
||
matched the subject <code class="docutils literal notranslate"><span class="pre">1.0</span></code>, resulting in some subtle bugs and convoluted
|
||
workarounds. We therefore adopted the rule that the three singleton
|
||
patterns <code class="docutils literal notranslate"><span class="pre">None</span></code>, <code class="docutils literal notranslate"><span class="pre">False</span></code> and <code class="docutils literal notranslate"><span class="pre">True</span></code> match by identity (<code class="docutils literal notranslate"><span class="pre">x</span> <span class="pre">is</span> <span class="pre">y</span></code> in
|
||
Python syntax) rather than equality. Hence, <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">True:</span></code> will match only
|
||
<code class="docutils literal notranslate"><span class="pre">True</span></code> and nothing else. Note that <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">1:</span></code> would still match <code class="docutils literal notranslate"><span class="pre">True</span></code>,
|
||
though, because the literal pattern <code class="docutils literal notranslate"><span class="pre">1</span></code> works by equality and not identity.</p>
|
||
<p>Early ideas to induce a hierarchy on numbers so that <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">1.0</span></code> would
|
||
match both the integer <code class="docutils literal notranslate"><span class="pre">1</span></code> and the floating point number <code class="docutils literal notranslate"><span class="pre">1.0</span></code>, whereas
|
||
<code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">1:</span></code> would only match the integer <code class="docutils literal notranslate"><span class="pre">1</span></code> were eventually dropped in
|
||
favor of the simpler and more consistent rule based on equality. Moreover, any
|
||
additional checks whether the subject is an instance of <code class="docutils literal notranslate"><span class="pre">numbers.Integral</span></code>
|
||
would come at a high runtime cost to introduce what would essentially be
|
||
a novel idea in Python. When needed, the explicit syntax <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">int(1):</span></code> can
|
||
be used.</p>
|
||
<p>Recall that literal patterns are <em>not</em> expressions, but directly
|
||
denote a specific value. From a pragmatic point of view, we want to
|
||
allow using negative and even complex values as literal patterns, but
|
||
they are not atomic literals (only unsigned real and imaginary numbers
|
||
are). E.g., <code class="docutils literal notranslate"><span class="pre">-3+4j</span></code> is syntactically an expression of the form
|
||
<code class="docutils literal notranslate"><span class="pre">BinOp(UnaryOp('-',</span> <span class="pre">3),</span> <span class="pre">'+',</span> <span class="pre">4j)</span></code>. Since expressions are not part
|
||
of patterns, we had to add explicit syntactic support for such values
|
||
without having to resort to full expressions.</p>
|
||
<p>Interpolated <em>f</em>-strings, on the
|
||
other hand, are not literal values, despite their appearance and can
|
||
therefore not be used as literal patterns (string concatenation, however,
|
||
is supported).</p>
|
||
<p>Literal patterns not only occur as patterns in their own right, but also
|
||
as keys in <em>mapping patterns</em>.</p>
|
||
<p><strong>Range matching patterns.</strong>
|
||
This would allow patterns such as <code class="docutils literal notranslate"><span class="pre">1...6</span></code>. However, there are a host of
|
||
ambiguities:</p>
|
||
<ul class="simple">
|
||
<li>Is the range open, half-open, or closed? (I.e. is <code class="docutils literal notranslate"><span class="pre">6</span></code> included in the
|
||
above example or not?)</li>
|
||
<li>Does the range match a single number, or a range object?</li>
|
||
<li>Range matching is often used for character ranges (‘a’…’z’) but that
|
||
won’t work in Python since there’s no character data type, just strings.</li>
|
||
<li>Range matching can be a significant performance optimization if you can
|
||
pre-build a jump table, but that’s not generally possible in Python due
|
||
to the fact that names can be dynamically rebound.</li>
|
||
</ul>
|
||
<p>Rather than creating a special-case syntax for ranges, it was decided
|
||
that allowing custom pattern objects (<code class="docutils literal notranslate"><span class="pre">InRange(0,</span> <span class="pre">6)</span></code>) would be more flexible
|
||
and less ambiguous; however those ideas have been postponed for the time
|
||
being.</p>
|
||
<p><strong>Example</strong> using Literal patterns:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">simplify</span><span class="p">(</span><span class="n">expr</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">expr</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="s1">'+'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">x</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="s1">'+'</span> <span class="o">|</span> <span class="s1">'-'</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="mi">0</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">x</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="s1">'and'</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">x</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="s1">'and'</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="kc">False</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="s1">'or'</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">x</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="s1">'or'</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="kc">True</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="s1">'not'</span><span class="p">,</span> <span class="p">(</span><span class="s1">'not'</span><span class="p">,</span> <span class="n">x</span><span class="p">)):</span>
|
||
<span class="k">return</span> <span class="n">x</span>
|
||
<span class="k">return</span> <span class="n">expr</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="capture-patterns">
|
||
<span id="capture-pattern"></span><h4><a class="toc-backref" href="#capture-patterns" role="doc-backlink">Capture Patterns</a></h4>
|
||
<p>Capture patterns take on the form of a name that accepts any value and binds
|
||
it to a (local) variable (unless the name is declared as <code class="docutils literal notranslate"><span class="pre">nonlocal</span></code> or
|
||
<code class="docutils literal notranslate"><span class="pre">global</span></code>). In that sense, a capture pattern is similar
|
||
to a parameter in a function definition (when the function is called, each
|
||
parameter binds the respective argument to a local variable in the function’s
|
||
scope).</p>
|
||
<p>A name used for a capture pattern must not coincide with another capture
|
||
pattern in the same pattern. This, again, is similar to parameters, which
|
||
equally require each parameter name to be unique within the list of
|
||
parameters. It differs, however, from iterable unpacking assignment, where
|
||
the repeated use of a variable name as target is permissible (e.g.,
|
||
<code class="docutils literal notranslate"><span class="pre">x,</span> <span class="pre">x</span> <span class="pre">=</span> <span class="pre">1,</span> <span class="pre">2</span></code>). The rationale for not supporting <code class="docutils literal notranslate"><span class="pre">(x,</span> <span class="pre">x)</span></code> in patterns
|
||
is its ambiguous reading: it could be seen as in iterable unpacking where
|
||
only the second binding to <code class="docutils literal notranslate"><span class="pre">x</span></code> survives. But it could be equally seen as
|
||
expressing a tuple with two equal elements (which comes with its own issues).
|
||
Should the need arise, then it is still possible to introduce support for
|
||
repeated use of names later on.</p>
|
||
<p>There were calls to explicitly mark capture patterns and thus identify them
|
||
as binding targets. According to that idea, a capture pattern would be
|
||
written as, e.g. <code class="docutils literal notranslate"><span class="pre">?x</span></code>, <code class="docutils literal notranslate"><span class="pre">$x</span></code> or <code class="docutils literal notranslate"><span class="pre">=x</span></code>. The aim of such explicit capture
|
||
markers is to let an unmarked name be a value pattern (see below).
|
||
However, this is based on the misconception that pattern matching was an
|
||
extension of <em>switch</em> statements, placing the emphasis on fast switching based
|
||
on (ordinal) values. Such a <em>switch</em> statement has indeed been proposed for
|
||
Python before (see <a class="pep reference internal" href="../pep-0275/" title="PEP 275 – Switching on Multiple Values">PEP 275</a> and <a class="pep reference internal" href="../pep-3103/" title="PEP 3103 – A Switch/Case Statement">PEP 3103</a>). Pattern matching, on the other
|
||
hand, builds a generalized concept of iterable unpacking. Binding values
|
||
extracted from a data structure is at the very core of the concept and hence
|
||
the most common use case. Explicit markers for capture patterns would thus
|
||
betray the objective of the proposed pattern matching syntax and simplify
|
||
a secondary use case at the expense of additional syntactic clutter for
|
||
core cases.</p>
|
||
<p>It has been proposed that capture patterns are not needed at all,
|
||
since the equivalent effect can be obtained by combining an AS
|
||
pattern with a wildcard pattern (e.g., <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">_</span> <span class="pre">as</span> <span class="pre">x</span></code> is equivalent
|
||
to <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">x</span></code>). However, this would be unpleasantly verbose,
|
||
especially given that we expect capture patterns to be very common.</p>
|
||
<p><strong>Example</strong> using Capture patterns:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">average</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">args</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">]:</span> <span class="c1"># captures the two elements of a sequence</span>
|
||
<span class="k">return</span> <span class="p">(</span><span class="n">x</span> <span class="o">+</span> <span class="n">y</span><span class="p">)</span> <span class="o">/</span> <span class="mi">2</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">x</span><span class="p">]:</span> <span class="c1"># captures the only element of a sequence</span>
|
||
<span class="k">return</span> <span class="n">x</span>
|
||
<span class="k">case</span> <span class="p">[]:</span>
|
||
<span class="k">return</span> <span class="mi">0</span>
|
||
<span class="k">case</span> <span class="n">a</span><span class="p">:</span> <span class="c1"># captures the entire sequence</span>
|
||
<span class="k">return</span> <span class="nb">sum</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="wildcard-pattern">
|
||
<span id="id3"></span><h4><a class="toc-backref" href="#wildcard-pattern" role="doc-backlink">Wildcard Pattern</a></h4>
|
||
<p>The wildcard pattern is a special case of a ‘capture’ pattern: it accepts
|
||
any value, but does not bind it to a variable. The idea behind this rule
|
||
is to support repeated use of the wildcard in patterns. While <code class="docutils literal notranslate"><span class="pre">(x,</span> <span class="pre">x)</span></code>
|
||
is an error, <code class="docutils literal notranslate"><span class="pre">(_,</span> <span class="pre">_)</span></code> is legal.</p>
|
||
<p>Particularly in larger (sequence) patterns, it is important to allow the
|
||
pattern to concentrate on values with actual significance while ignoring
|
||
anything else. Without a wildcard, it would become necessary to ‘invent’
|
||
a number of local variables, which would be bound but never used. Even
|
||
when sticking to naming conventions and using e.g. <code class="docutils literal notranslate"><span class="pre">_1,</span> <span class="pre">_2,</span> <span class="pre">_3</span></code> to name
|
||
irrelevant values, say, this still introduces visual clutter and can hurt
|
||
performance (compare the sequence pattern <code class="docutils literal notranslate"><span class="pre">(x,</span> <span class="pre">y,</span> <span class="pre">*z)</span></code> to <code class="docutils literal notranslate"><span class="pre">(_,</span> <span class="pre">y,</span> <span class="pre">*_)</span></code>,
|
||
where the <code class="docutils literal notranslate"><span class="pre">*z</span></code> forces the interpreter to copy a potentially very long
|
||
sequence, whereas the second version simply compiles to code along the
|
||
lines of <code class="docutils literal notranslate"><span class="pre">y</span> <span class="pre">=</span> <span class="pre">seq[1]</span></code>).</p>
|
||
<p>There has been much discussion about the choice of the underscore as <code class="docutils literal notranslate"><span class="pre">_</span></code>
|
||
as a wildcard pattern, i.e. making this one name non-binding. However, the
|
||
underscore is already heavily used as an ‘ignore value’ marker in iterable
|
||
unpacking. Since the wildcard pattern <code class="docutils literal notranslate"><span class="pre">_</span></code> never binds, this use of the
|
||
underscore does not interfere with other uses such as inside the REPL or
|
||
the <code class="docutils literal notranslate"><span class="pre">gettext</span></code> module.</p>
|
||
<p>It has been proposed to use <code class="docutils literal notranslate"><span class="pre">...</span></code> (i.e., the ellipsis token) or <code class="docutils literal notranslate"><span class="pre">*</span></code>
|
||
(star) as a wildcard. However, both these look as if an arbitrary number
|
||
of items is omitted:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">case</span> <span class="p">[</span><span class="n">a</span><span class="p">,</span> <span class="o">...</span><span class="p">,</span> <span class="n">z</span><span class="p">]:</span> <span class="o">...</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">a</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">z</span><span class="p">]:</span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Either example looks like it would match a sequence of two or more
|
||
items, capturing the first and last values. While that may be the
|
||
ultimate “wildcard”, it does not convey the desired semantics.</p>
|
||
<p>An alternative that does not suggest an arbitrary number of items
|
||
would be <code class="docutils literal notranslate"><span class="pre">?</span></code>. This is even being proposed independently from
|
||
pattern matching in <a class="pep reference internal" href="../pep-0640/" title="PEP 640 – Unused variable syntax">PEP 640</a>. We feel however that using <code class="docutils literal notranslate"><span class="pre">?</span></code> as a
|
||
special “assignment” target is likely more confusing to Python users
|
||
than using <code class="docutils literal notranslate"><span class="pre">_</span></code>. It violates Python’s (admittedly vague) principle
|
||
of using punctuation characters only in ways similar to how they are
|
||
used in common English usage or in high school math, unless the usage
|
||
is <em>very</em> well established in other programming languages (like, e.g.,
|
||
using a dot for member access).</p>
|
||
<p>The question mark fails on both counts: its use in other programming
|
||
languages is a grab-bag of usages only vaguely suggested by the idea
|
||
of a “question”. For example, it means “any character” in shell
|
||
globbing, “maybe” in regular expressions, “conditional expression” in
|
||
C and many C-derived languages, “predicate function” in Scheme,
|
||
“modify error handling” in Rust, “optional argument” and “optional
|
||
chaining” in TypeScript (the latter meaning has also been proposed for
|
||
Python by <a class="pep reference internal" href="../pep-0505/" title="PEP 505 – None-aware operators">PEP 505</a>). An as yet unnamed PEP proposes it to mark
|
||
optional types, e.g. <code class="docutils literal notranslate"><span class="pre">int?</span></code>.</p>
|
||
<p>Another common use of <code class="docutils literal notranslate"><span class="pre">?</span></code> in programming systems is “help”, for
|
||
example, in IPython and Jupyter Notebooks and many interactive
|
||
command-line utilities.</p>
|
||
<p>In addition, this would put Python in a rather unique position:
|
||
The underscore is as a wildcard pattern in <em>every</em>
|
||
programming language with pattern matching that we could find
|
||
(including <em>C#</em>, <em>Elixir</em>, <em>Erlang</em>, <em>F#</em>, <em>Grace</em>, <em>Haskell</em>,
|
||
<em>Mathematica</em>, <em>OCaml</em>, <em>Ruby</em>, <em>Rust</em>, <em>Scala</em>, <em>Swift</em>, and <em>Thorn</em>).
|
||
Keeping in mind that many users of Python also work with other programming
|
||
languages, have prior experience when learning Python, and may move on to
|
||
other languages after having learned Python, we find that such
|
||
well-established standards are important and relevant with respect to
|
||
readability and learnability. In our view, concerns that this wildcard
|
||
means that a regular name received special treatment are not strong
|
||
enough to introduce syntax that would make Python special.</p>
|
||
<p><em>Else blocks.</em> A case block without a guard whose pattern is a single
|
||
wildcard (i.e., <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">_:</span></code>) accepts any subject without binding it to
|
||
a variable or performing any other operation. It is thus semantically
|
||
equivalent to <code class="docutils literal notranslate"><span class="pre">else:</span></code>, if it were supported. However, adding such
|
||
an else block to the match statement syntax would not remove the need
|
||
for the wildcard pattern in other contexts. Another argument against
|
||
this is that there would be two plausible indentation levels for an
|
||
else block: aligned with <code class="docutils literal notranslate"><span class="pre">case</span></code> or aligned with <code class="docutils literal notranslate"><span class="pre">match</span></code>. The
|
||
authors have found it quite contentious which indentation level to
|
||
prefer.</p>
|
||
<p><strong>Example</strong> using the Wildcard pattern:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">is_closed</span><span class="p">(</span><span class="n">sequence</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">sequence</span><span class="p">:</span>
|
||
<span class="k">case</span><span class="w"> </span><span class="p">[</span><span class="k">_</span><span class="p">]:</span> <span class="c1"># any sequence with a single element</span>
|
||
<span class="k">return</span> <span class="kc">True</span>
|
||
<span class="k">case</span><span class="w"> </span><span class="p">[</span><span class="n">start</span><span class="p">,</span> <span class="o">*</span><span class="k">_</span><span class="p">,</span> <span class="n">end</span><span class="p">]:</span> <span class="c1"># a sequence with at least two elements</span>
|
||
<span class="k">return</span> <span class="n">start</span> <span class="o">==</span> <span class="n">end</span>
|
||
<span class="k">case</span><span class="w"> </span><span class="k">_</span><span class="p">:</span> <span class="c1"># anything</span>
|
||
<span class="k">return</span> <span class="kc">False</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="value-patterns">
|
||
<span id="value-pattern"></span><h4><a class="toc-backref" href="#value-patterns" role="doc-backlink">Value Patterns</a></h4>
|
||
<p>It is good programming style to use named constants for parametric values or
|
||
to clarify the meaning of particular values. Clearly, it would be preferable
|
||
to write <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">(HttpStatus.OK,</span> <span class="pre">body):</span></code> over
|
||
<code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">(200,</span> <span class="pre">body):</span></code>, for example. The main issue that arises here is how to
|
||
distinguish capture patterns (variable bindings) from value patterns. The
|
||
general discussion surrounding this issue has brought forward a plethora of
|
||
options, which we cannot all fully list here.</p>
|
||
<p>Strictly speaking, value patterns are not really necessary, but
|
||
could be implemented using guards, i.e.
|
||
<code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">(status,</span> <span class="pre">body)</span> <span class="pre">if</span> <span class="pre">status</span> <span class="pre">==</span> <span class="pre">HttpStatus.OK:</span></code>. Nonetheless, the
|
||
convenience of value patterns is unquestioned and obvious.</p>
|
||
<p>The observation that constants tend to be written in uppercase letters or
|
||
collected in enumeration-like namespaces suggests possible rules to discern
|
||
constants syntactically. However, the idea of using upper- vs. lowercase as
|
||
a marker has been met with scepticism since there is no similar precedence
|
||
in core Python (although it is common in other languages). We therefore only
|
||
adopted the rule that any dotted name (i.e., attribute access) is to be
|
||
interpreted as a value pattern, for example <code class="docutils literal notranslate"><span class="pre">HttpStatus.OK</span></code>
|
||
above. This precludes, in particular, local variables and global
|
||
variables defined in the current module from acting as constants.</p>
|
||
<p>A proposed rule to use a leading dot (e.g.
|
||
<code class="docutils literal notranslate"><span class="pre">.CONSTANT</span></code>) for that purpose was criticised because it was felt that the
|
||
dot would not be a visible-enough marker for that purpose. Partly inspired
|
||
by forms found in other programming languages, a number of different
|
||
markers/sigils were proposed (such as <code class="docutils literal notranslate"><span class="pre">^CONSTANT</span></code>, <code class="docutils literal notranslate"><span class="pre">$CONSTANT</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">==CONSTANT</span></code>, <code class="docutils literal notranslate"><span class="pre">CONSTANT?</span></code>, or the word enclosed in backticks), although
|
||
there was no obvious or natural choice. The current proposal therefore
|
||
leaves the discussion and possible introduction of such a ‘constant’ marker
|
||
for a future PEP.</p>
|
||
<p>Distinguishing the semantics of names based on whether it is a global
|
||
variable (i.e. the compiler would treat global variables as constants rather
|
||
than capture patterns) leads to various issues. The addition or alteration
|
||
of a global variable in the module could have unintended side effects on
|
||
patterns. Moreover, pattern matching could not be used directly inside a
|
||
module’s scope because all variables would be global, making capture
|
||
patterns impossible.</p>
|
||
<p><strong>Example</strong> using the Value pattern:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">handle_reply</span><span class="p">(</span><span class="n">reply</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">reply</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="n">HttpStatus</span><span class="o">.</span><span class="n">OK</span><span class="p">,</span> <span class="n">MimeType</span><span class="o">.</span><span class="n">TEXT</span><span class="p">,</span> <span class="n">body</span><span class="p">):</span>
|
||
<span class="n">process_text</span><span class="p">(</span><span class="n">body</span><span class="p">)</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="n">HttpStatus</span><span class="o">.</span><span class="n">OK</span><span class="p">,</span> <span class="n">MimeType</span><span class="o">.</span><span class="n">APPL_ZIP</span><span class="p">,</span> <span class="n">body</span><span class="p">):</span>
|
||
<span class="n">text</span> <span class="o">=</span> <span class="n">deflate</span><span class="p">(</span><span class="n">body</span><span class="p">)</span>
|
||
<span class="n">process_text</span><span class="p">(</span><span class="n">text</span><span class="p">)</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="n">HttpStatus</span><span class="o">.</span><span class="n">MOVED_PERMANENTLY</span><span class="p">,</span> <span class="n">new_URI</span><span class="p">):</span>
|
||
<span class="n">resend_request</span><span class="p">(</span><span class="n">new_URI</span><span class="p">)</span>
|
||
<span class="k">case</span> <span class="p">(</span><span class="n">HttpStatus</span><span class="o">.</span><span class="n">NOT_FOUND</span><span class="p">):</span>
|
||
<span class="k">raise</span> <span class="n">ResourceNotFound</span><span class="p">()</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="group-patterns">
|
||
<h4><a class="toc-backref" href="#group-patterns" role="doc-backlink">Group Patterns</a></h4>
|
||
<p>Allowing users to explicitly specify the grouping is particularly helpful
|
||
in case of OR patterns.</p>
|
||
</section>
|
||
<section id="sequence-patterns">
|
||
<span id="sequence-pattern"></span><h4><a class="toc-backref" href="#sequence-patterns" role="doc-backlink">Sequence Patterns</a></h4>
|
||
<p>Sequence patterns follow as closely as possible the already established
|
||
syntax and semantics of iterable unpacking. Of course, subpatterns take
|
||
the place of assignment targets (variables, attributes and subscript).
|
||
Moreover, the sequence pattern only matches a carefully selected set of
|
||
possible subjects, whereas iterable unpacking can be applied to any
|
||
iterable.</p>
|
||
<ul class="simple">
|
||
<li>As in iterable unpacking, we do not distinguish between ‘tuple’ and
|
||
‘list’ notation. <code class="docutils literal notranslate"><span class="pre">[a,</span> <span class="pre">b,</span> <span class="pre">c]</span></code>, <code class="docutils literal notranslate"><span class="pre">(a,</span> <span class="pre">b,</span> <span class="pre">c)</span></code> and <code class="docutils literal notranslate"><span class="pre">a,</span> <span class="pre">b,</span> <span class="pre">c</span></code> are all
|
||
equivalent. While this means we have a redundant notation and checking
|
||
specifically for lists or tuples requires more effort (e.g.
|
||
<code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">list([a,</span> <span class="pre">b,</span> <span class="pre">c])</span></code>), we mimic iterable unpacking as much as
|
||
possible.</li>
|
||
<li>A starred pattern will capture a sub-sequence of arbitrary length,
|
||
again mirroring iterable unpacking. Only one starred item may be
|
||
present in any sequence pattern. In theory, patterns such as <code class="docutils literal notranslate"><span class="pre">(*_,</span> <span class="pre">3,</span> <span class="pre">*_)</span></code>
|
||
could be understood as expressing any sequence containing the value <code class="docutils literal notranslate"><span class="pre">3</span></code>.
|
||
In practice, however, this would only work for a very narrow set of use
|
||
cases and lead to inefficient backtracking or even ambiguities otherwise.</li>
|
||
<li>The sequence pattern does <em>not</em> iterate through an iterable subject. All
|
||
elements are accessed through subscripting and slicing, and the subject must
|
||
be an instance of <code class="docutils literal notranslate"><span class="pre">collections.abc.Sequence</span></code>. This includes, of course,
|
||
lists and tuples, but excludes e.g. sets and dictionaries. While it would
|
||
include strings and bytes, we make an exception for these (see below).</li>
|
||
</ul>
|
||
<p>A sequence pattern cannot just iterate through any iterable object. The
|
||
consumption of elements from the iteration would have to be undone if the
|
||
overall pattern fails, which is not feasible.</p>
|
||
<p>To identify sequences we cannot rely on <code class="docutils literal notranslate"><span class="pre">len()</span></code> and subscripting and
|
||
slicing alone, because sequences share these protocols with mappings
|
||
(e.g. <code class="docutils literal notranslate"><span class="pre">dict</span></code>) in this regard. It would be surprising if a sequence
|
||
pattern also matched a dictionaries or other objects implementing
|
||
the mapping protocol (i.e. <code class="docutils literal notranslate"><span class="pre">__getitem__</span></code>). The interpreter therefore
|
||
performs an instance check to ensure that the subject in question really
|
||
is a sequence (of known type). (As an optimization of the most common
|
||
case, if the subject is exactly a list or a tuple, the instance check
|
||
can be skipped.)</p>
|
||
<p>String and bytes objects have a dual nature: they are both ‘atomic’ objects
|
||
in their own right, as well as sequences (with a strongly recursive nature
|
||
in that a string is a sequence of strings). The typical behavior and use
|
||
cases for strings and bytes are different enough from those of tuples and
|
||
lists to warrant a clear distinction. It is in fact often unintuitive and
|
||
unintended that strings pass for sequences, as evidenced by regular questions
|
||
and complaints. Strings and bytes are therefore not matched by a sequence
|
||
pattern, limiting the sequence pattern to a very specific understanding of
|
||
‘sequence’. The built-in <code class="docutils literal notranslate"><span class="pre">bytearray</span></code> type, being a mutable version of
|
||
<code class="docutils literal notranslate"><span class="pre">bytes</span></code>, also deserves an exception; but we don’t intend to
|
||
enumerate all other types that may be used to represent bytes
|
||
(e.g. some, but not all, instances of <code class="docutils literal notranslate"><span class="pre">memoryview</span></code> and <code class="docutils literal notranslate"><span class="pre">array.array</span></code>).</p>
|
||
</section>
|
||
<section id="mapping-patterns">
|
||
<span id="mapping-pattern"></span><h4><a class="toc-backref" href="#mapping-patterns" role="doc-backlink">Mapping Patterns</a></h4>
|
||
<p>Dictionaries or mappings in general are one of the most important and most
|
||
widely used data structures in Python. In contrast to sequences, mappings
|
||
are built for fast direct access to arbitrary elements identified by a key.
|
||
In most cases an element is retrieved from a dictionary by a known key
|
||
without regard for any ordering or other key-value pairs stored in the same
|
||
dictionary. Particularly common are string keys.</p>
|
||
<p>The mapping pattern reflects the common usage of dictionary lookup: it allows
|
||
the user to extract some values from a mapping by means of constant/known
|
||
keys and have the values match given subpatterns.
|
||
Extra keys in the subject are ignored even if <code class="docutils literal notranslate"><span class="pre">**rest</span></code> is not present.
|
||
This is different from sequence patterns, where extra items will cause a
|
||
match to fail. But mappings are actually different from sequences: they
|
||
have natural structural sub-typing behavior, i.e., passing a dictionary
|
||
with extra keys somewhere will likely just work.
|
||
Should it be
|
||
necessary to impose an upper bound on the mapping and ensure that no
|
||
additional keys are present, then the usual double-star-pattern <code class="docutils literal notranslate"><span class="pre">**rest</span></code>
|
||
can be used. The special case <code class="docutils literal notranslate"><span class="pre">**_</span></code> with a wildcard, however, is not
|
||
supported as it would not have any effect, but might lead to an incorrect
|
||
understanding of the mapping pattern’s semantics.</p>
|
||
<p>To avoid overly expensive matching algorithms, keys must be literals or
|
||
value patterns.</p>
|
||
<p>There is a subtle reason for using <code class="docutils literal notranslate"><span class="pre">get(key,</span> <span class="pre">default)</span></code> instead of
|
||
<code class="docutils literal notranslate"><span class="pre">__getitem__(key)</span></code> followed by a check for <code class="docutils literal notranslate"><span class="pre">AttributeError</span></code>: if
|
||
the subject happens to be a <code class="docutils literal notranslate"><span class="pre">defaultdict</span></code>, calling <code class="docutils literal notranslate"><span class="pre">__getitem__</span></code>
|
||
for a non-existent key would add the key. Using <code class="docutils literal notranslate"><span class="pre">get()</span></code> avoids this
|
||
unexpected side effect.</p>
|
||
<p><strong>Example</strong> using the Mapping pattern:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">change_red_to_blue</span><span class="p">(</span><span class="n">json_obj</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">json_obj</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="p">{</span> <span class="s1">'color'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'red'</span> <span class="o">|</span> <span class="s1">'#FF0000'</span><span class="p">)</span> <span class="p">}:</span>
|
||
<span class="n">json_obj</span><span class="p">[</span><span class="s1">'color'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'blue'</span>
|
||
<span class="k">case</span> <span class="p">{</span> <span class="s1">'children'</span><span class="p">:</span> <span class="n">children</span> <span class="p">}:</span>
|
||
<span class="k">for</span> <span class="n">child</span> <span class="ow">in</span> <span class="n">children</span><span class="p">:</span>
|
||
<span class="n">change_red_to_blue</span><span class="p">(</span><span class="n">child</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="class-patterns">
|
||
<span id="class-pattern"></span><h4><a class="toc-backref" href="#class-patterns" role="doc-backlink">Class Patterns</a></h4>
|
||
<p>Class patterns fulfill two purposes: checking whether a given subject is
|
||
indeed an instance of a specific class, and extracting data from specific
|
||
attributes of the subject. Anecdotal evidence revealed that <code class="docutils literal notranslate"><span class="pre">isinstance()</span></code>
|
||
is one of the most often used functions in Python in terms of
|
||
static occurrences in programs. Such instance checks typically precede
|
||
a subsequent access to information stored in the object, or a possible
|
||
manipulation thereof. A typical pattern might be along the lines of:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">traverse_tree</span><span class="p">(</span><span class="n">node</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">Node</span><span class="p">):</span>
|
||
<span class="n">traverse_tree</span><span class="p">(</span><span class="n">node</span><span class="o">.</span><span class="n">left</span><span class="p">)</span>
|
||
<span class="n">traverse_tree</span><span class="p">(</span><span class="n">node</span><span class="o">.</span><span class="n">right</span><span class="p">)</span>
|
||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">Leaf</span><span class="p">):</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="n">node</span><span class="o">.</span><span class="n">value</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>In many cases class patterns occur nested, as in the example
|
||
given in the motivation:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">if</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">BinOp</span><span class="p">)</span> <span class="ow">and</span> <span class="n">node</span><span class="o">.</span><span class="n">op</span> <span class="o">==</span> <span class="s2">"+"</span>
|
||
<span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="o">.</span><span class="n">right</span><span class="p">,</span> <span class="n">BinOp</span><span class="p">)</span> <span class="ow">and</span> <span class="n">node</span><span class="o">.</span><span class="n">right</span><span class="o">.</span><span class="n">op</span> <span class="o">==</span> <span class="s2">"*"</span><span class="p">):</span>
|
||
<span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span> <span class="o">=</span> <span class="n">node</span><span class="o">.</span><span class="n">left</span><span class="p">,</span> <span class="n">node</span><span class="o">.</span><span class="n">right</span><span class="o">.</span><span class="n">left</span><span class="p">,</span> <span class="n">node</span><span class="o">.</span><span class="n">right</span><span class="o">.</span><span class="n">right</span>
|
||
<span class="c1"># Handle a + b*c</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The class pattern lets you concisely specify both an instance check
|
||
and relevant attributes (with possible further constraints). It is
|
||
thereby very tempting to write, e.g., <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">Node(left,</span> <span class="pre">right):</span></code> in the
|
||
first case above and <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">Leaf(value):</span></code> in the second. While this
|
||
indeed works well for languages with strict algebraic data types, it is
|
||
problematic with the structure of Python objects.</p>
|
||
<p>When dealing with general Python objects, we face a potentially very large
|
||
number of unordered attributes: an instance of <code class="docutils literal notranslate"><span class="pre">Node</span></code> contains a large
|
||
number of attributes (most of which are ‘special methods’ such as
|
||
<code class="docutils literal notranslate"><span class="pre">__repr__</span></code>). Moreover, the interpreter cannot reliably deduce the
|
||
ordering of attributes. For an object that
|
||
represents a circle, say, there is no inherently obvious ordering of the
|
||
attributes <code class="docutils literal notranslate"><span class="pre">x</span></code>, <code class="docutils literal notranslate"><span class="pre">y</span></code> and <code class="docutils literal notranslate"><span class="pre">radius</span></code>.</p>
|
||
<p>We envision two possibilities for dealing with this issue: either explicitly
|
||
name the attributes of interest, or provide an additional mapping that tells
|
||
the interpreter which attributes to extract and in which order. Both
|
||
approaches are supported. Moreover, explicitly naming the attributes of
|
||
interest lets you further specify the required structure of an object; if
|
||
an object lacks an attribute specified by the pattern, the match fails.</p>
|
||
<ul>
|
||
<li>Attributes that are explicitly named pick up the syntax of named arguments.
|
||
If an object of class <code class="docutils literal notranslate"><span class="pre">Node</span></code> has two attributes <code class="docutils literal notranslate"><span class="pre">left</span></code> and <code class="docutils literal notranslate"><span class="pre">right</span></code>
|
||
as above, the pattern <code class="docutils literal notranslate"><span class="pre">Node(left=x,</span> <span class="pre">right=y)</span></code> will extract the values of
|
||
both attributes and assign them to <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code>, respectively. The data
|
||
flow from left to right seems unusual, but is in line with mapping patterns
|
||
and has precedents such as assignments via <code class="docutils literal notranslate"><span class="pre">as</span></code> in <em>with</em>- or
|
||
<em>import</em>-statements (and indeed AS patterns).<p>Naming the attributes in question explicitly will be mostly used for more
|
||
complex cases where the positional form (below) is insufficient.</p>
|
||
</li>
|
||
<li>The class field <code class="docutils literal notranslate"><span class="pre">__match_args__</span></code> specifies a number of attributes
|
||
together with their ordering, allowing class patterns to rely on positional
|
||
sub-patterns without having to explicitly name the attributes in question.
|
||
This is particularly handy for smaller objects or instances of data classes,
|
||
where the attributes of interest are rather obvious and often have a
|
||
well-defined ordering. In a way, <code class="docutils literal notranslate"><span class="pre">__match_args__</span></code> is similar to the
|
||
declaration of formal parameters, which allows calling functions with
|
||
positional arguments rather than naming all the parameters.<p>This is a class attribute, because it needs to be looked up on the class
|
||
named in the class pattern, not on the subject instance.</p>
|
||
</li>
|
||
</ul>
|
||
<p>The syntax of class patterns is based on the idea that de-construction
|
||
mirrors the syntax of construction. This is already the case in virtually
|
||
any Python construct, be assignment targets, function definitions or
|
||
iterable unpacking. In all these cases, we find that the syntax for
|
||
sending and that for receiving ‘data’ are virtually identical.</p>
|
||
<ul class="simple">
|
||
<li>Assignment targets such as variables, attributes and subscripts:
|
||
<code class="docutils literal notranslate"><span class="pre">foo.bar[2]</span> <span class="pre">=</span> <span class="pre">foo.bar[3]</span></code>;</li>
|
||
<li>Function definitions: a function defined with <code class="docutils literal notranslate"><span class="pre">def</span> <span class="pre">foo(x,</span> <span class="pre">y,</span> <span class="pre">z=6)</span></code>
|
||
is called as, e.g., <code class="docutils literal notranslate"><span class="pre">foo(123,</span> <span class="pre">y=45)</span></code>, where the actual arguments
|
||
provided at the call site are matched against the formal parameters
|
||
at the definition site;</li>
|
||
<li>Iterable unpacking: <code class="docutils literal notranslate"><span class="pre">a,</span> <span class="pre">b</span> <span class="pre">=</span> <span class="pre">b,</span> <span class="pre">a</span></code> or <code class="docutils literal notranslate"><span class="pre">[a,</span> <span class="pre">b]</span> <span class="pre">=</span> <span class="pre">[b,</span> <span class="pre">a]</span></code> or
|
||
<code class="docutils literal notranslate"><span class="pre">(a,</span> <span class="pre">b)</span> <span class="pre">=</span> <span class="pre">(b,</span> <span class="pre">a)</span></code>, just to name a few equivalent possibilities.</li>
|
||
</ul>
|
||
<p>Using the same syntax for reading and writing, l- and r-values, or
|
||
construction and de-construction is widely accepted for its benefits in
|
||
thinking about data, its flow and manipulation. This equally extends to
|
||
the explicit construction of instances, where class patterns <code class="docutils literal notranslate"><span class="pre">C(p,</span> <span class="pre">q)</span></code>
|
||
deliberately mirror the syntax of creating instances.</p>
|
||
<p>The special case for the built-in classes <code class="docutils literal notranslate"><span class="pre">bool</span></code>, <code class="docutils literal notranslate"><span class="pre">bytearray</span></code>
|
||
etc. (where e.g. <code class="docutils literal notranslate"><span class="pre">str(x)</span></code> captures the subject value in <code class="docutils literal notranslate"><span class="pre">x</span></code>) can
|
||
be emulated by a user-defined class as follows:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">MyClass</span><span class="p">:</span>
|
||
<span class="n">__match_args__</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"__myself__"</span><span class="p">]</span>
|
||
<span class="n">__myself__</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="bp">self</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>Type annotations for pattern variables.</strong>
|
||
The proposal was to combine patterns with type annotations:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">match</span> <span class="n">x</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">a</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="nb">str</span><span class="p">]:</span> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"An int </span><span class="si">{</span><span class="n">a</span><span class="si">}</span><span class="s2"> and a string </span><span class="si">{</span><span class="n">b</span><span class="si">}</span><span class="s2">:)</span>
|
||
<span class="k">case</span> <span class="p">[</span><span class="n">a</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">c</span><span class="p">:</span> <span class="nb">int</span><span class="p">]:</span> <span class="nb">print</span><span class="p">(</span><span class="s2">"Three ints"</span><span class="p">,</span> <span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span>
|
||
<span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This idea has a lot of problems. For one, the colon can only
|
||
be used inside of brackets or parentheses, otherwise the syntax becomes
|
||
ambiguous. And because Python disallows <code class="docutils literal notranslate"><span class="pre">isinstance()</span></code> checks
|
||
on generic types, type annotations containing generics will not
|
||
work as expected.</p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="history-and-context">
|
||
<h2><a class="toc-backref" href="#history-and-context" role="doc-backlink">History and Context</a></h2>
|
||
<p>Pattern matching emerged in the late 1970s in the form of tuple unpacking
|
||
and as a means to handle recursive data structures such as linked lists or
|
||
trees (object-oriented languages usually use the visitor pattern for handling
|
||
recursive data structures). The early proponents of pattern matching
|
||
organised structured data in ‘tagged tuples’ rather than <code class="docutils literal notranslate"><span class="pre">struct</span></code> as in
|
||
<em>C</em> or the objects introduced later. A node in a binary tree would, for
|
||
instance, be a tuple with two elements for the left and right branches,
|
||
respectively, and a <code class="docutils literal notranslate"><span class="pre">Node</span></code> tag, written as <code class="docutils literal notranslate"><span class="pre">Node(left,</span> <span class="pre">right)</span></code>. In
|
||
Python we would probably put the tag inside the tuple as
|
||
<code class="docutils literal notranslate"><span class="pre">('Node',</span> <span class="pre">left,</span> <span class="pre">right)</span></code> or define a data class <code class="docutils literal notranslate"><span class="pre">Node</span></code> to achieve the
|
||
same effect.</p>
|
||
<p>Using modern syntax, a depth-first tree traversal would then be written as
|
||
follows:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">traverse</span><span class="p">(</span><span class="n">node</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">node</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="n">Node</span><span class="p">(</span><span class="n">left</span><span class="p">,</span> <span class="n">right</span><span class="p">):</span>
|
||
<span class="n">traverse</span><span class="p">(</span><span class="n">left</span><span class="p">)</span>
|
||
<span class="n">traverse</span><span class="p">(</span><span class="n">right</span><span class="p">)</span>
|
||
<span class="k">case</span> <span class="n">Leaf</span><span class="p">(</span><span class="n">value</span><span class="p">):</span>
|
||
<span class="n">handle</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The notion of handling recursive data structures with pattern matching
|
||
immediately gave rise to the idea of handling more general recursive
|
||
‘patterns’ (i.e. recursion beyond recursive data structures)
|
||
with pattern matching. Pattern matching would thus also be used to define
|
||
recursive functions such as:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">fib</span><span class="p">(</span><span class="n">arg</span><span class="p">):</span>
|
||
<span class="k">match</span> <span class="n">arg</span><span class="p">:</span>
|
||
<span class="k">case</span> <span class="mi">0</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="mi">1</span>
|
||
<span class="k">case</span> <span class="mi">1</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="mi">1</span>
|
||
<span class="k">case</span> <span class="n">n</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">fib</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>As pattern matching was repeatedly integrated into new and emerging
|
||
programming languages, its syntax slightly evolved and expanded. The two
|
||
first cases in the <code class="docutils literal notranslate"><span class="pre">fib</span></code> example above could be written more succinctly
|
||
as <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">0</span> <span class="pre">|</span> <span class="pre">1:</span></code> with <code class="docutils literal notranslate"><span class="pre">|</span></code> denoting alternative patterns. Moreover, the
|
||
underscore <code class="docutils literal notranslate"><span class="pre">_</span></code> was widely adopted as a wildcard, a filler where neither
|
||
the structure nor value of parts of a pattern were of substance. Since the
|
||
underscore is already frequently used in equivalent capacity in Python’s
|
||
iterable unpacking (e.g., <code class="docutils literal notranslate"><span class="pre">_,</span> <span class="pre">_,</span> <span class="pre">third,</span> <span class="pre">_*</span> <span class="pre">=</span> <span class="pre">something</span></code>) we kept these
|
||
universal standards.</p>
|
||
<p>It is noteworthy that the concept of pattern matching has always been
|
||
closely linked to the concept of functions. The different case clauses
|
||
have always been considered as something like semi-independent functions
|
||
where pattern variables take on the role of parameters. This becomes
|
||
most apparent when pattern matching is written as an overloaded function,
|
||
along the lines of (Standard ML):</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">fun</span> <span class="n">fib</span> <span class="mi">0</span> <span class="o">=</span> <span class="mi">1</span>
|
||
<span class="o">|</span> <span class="n">fib</span> <span class="mi">1</span> <span class="o">=</span> <span class="mi">1</span>
|
||
<span class="o">|</span> <span class="n">fib</span> <span class="n">n</span> <span class="o">=</span> <span class="n">fib</span> <span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span> <span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Even though such a strict separation of case clauses into independent
|
||
functions does not apply in Python, we find that patterns share many
|
||
syntactic rules with parameters, such as binding arguments to unqualified
|
||
names only or that variable/parameter names must not be repeated for
|
||
a particular pattern/function.</p>
|
||
<p>With its emphasis on abstraction and encapsulation, object-oriented
|
||
programming posed a serious challenge to pattern matching. In short: in
|
||
object-oriented programming, we can no longer view objects as tagged tuples.
|
||
The arguments passed into the constructor do not necessarily specify the
|
||
attributes or fields of the objects. Moreover, there is no longer a strict
|
||
ordering of an object’s fields and some of the fields might be private and
|
||
thus inaccessible. And on top of this, the given object might actually be
|
||
an instance of a subclass with slightly different structure.</p>
|
||
<p>To address this challenge, patterns became increasingly independent of the
|
||
original tuple constructors. In a pattern like <code class="docutils literal notranslate"><span class="pre">Node(left,</span> <span class="pre">right)</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">Node</span></code> is no longer a passive tag, but rather a function that can actively
|
||
check for any given object whether it has the right structure and extract a
|
||
<code class="docutils literal notranslate"><span class="pre">left</span></code> and <code class="docutils literal notranslate"><span class="pre">right</span></code> field. In other words: the <code class="docutils literal notranslate"><span class="pre">Node</span></code>-tag becomes a
|
||
function that transforms an object into a tuple or returns some failure
|
||
indicator if it is not possible.</p>
|
||
<p>In Python, we simply use <code class="docutils literal notranslate"><span class="pre">isinstance()</span></code> together with the <code class="docutils literal notranslate"><span class="pre">__match_args__</span></code>
|
||
field of a class to check whether an object has the correct structure and
|
||
then transform some of its attributes into a tuple. For the <code class="docutils literal notranslate"><span class="pre">Node</span></code> example
|
||
above, for instance, we would have <code class="docutils literal notranslate"><span class="pre">__match_args__</span> <span class="pre">=</span> <span class="pre">('left',</span> <span class="pre">'right')</span></code> to
|
||
indicate that these two attributes should be extracted to form the tuple.
|
||
That is, <code class="docutils literal notranslate"><span class="pre">case</span> <span class="pre">Node(x,</span> <span class="pre">y)</span></code> would first check whether a given object is an
|
||
instance of <code class="docutils literal notranslate"><span class="pre">Node</span></code> and then assign <code class="docutils literal notranslate"><span class="pre">left</span></code> to <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">right</span></code> to <code class="docutils literal notranslate"><span class="pre">y</span></code>,
|
||
respectively.</p>
|
||
<p>Paying tribute to Python’s dynamic nature with ‘duck typing’, however, we
|
||
also added a more direct way to specify the presence of, or constraints on
|
||
specific attributes. Instead of <code class="docutils literal notranslate"><span class="pre">Node(x,</span> <span class="pre">y)</span></code> you could also write
|
||
<code class="docutils literal notranslate"><span class="pre">object(left=x,</span> <span class="pre">right=y)</span></code>, effectively eliminating the <code class="docutils literal notranslate"><span class="pre">isinstance()</span></code>
|
||
check and thus supporting any object with <code class="docutils literal notranslate"><span class="pre">left</span></code> and <code class="docutils literal notranslate"><span class="pre">right</span></code> attributes.
|
||
Or you would combine these ideas to write <code class="docutils literal notranslate"><span class="pre">Node(right=y)</span></code> so as to require
|
||
an instance of <code class="docutils literal notranslate"><span class="pre">Node</span></code> but only extract the value of the <code class="docutils literal notranslate"><span class="pre">right</span></code> attribute.</p>
|
||
</section>
|
||
<section id="backwards-compatibility">
|
||
<h2><a class="toc-backref" href="#backwards-compatibility" role="doc-backlink">Backwards Compatibility</a></h2>
|
||
<p>Through its use of “soft keywords” and the new PEG parser (<a class="pep reference internal" href="../pep-0617/" title="PEP 617 – New PEG parser for CPython">PEP 617</a>),
|
||
the proposal remains fully backwards compatible. However, 3rd party
|
||
tooling that uses a LL(1) parser to parse Python source code may be
|
||
forced to switch parser technology to be able to support those same
|
||
features.</p>
|
||
</section>
|
||
<section id="security-implications">
|
||
<h2><a class="toc-backref" href="#security-implications" role="doc-backlink">Security Implications</a></h2>
|
||
<p>We do not expect any security implications from this language feature.</p>
|
||
</section>
|
||
<section id="reference-implementation">
|
||
<h2><a class="toc-backref" href="#reference-implementation" role="doc-backlink">Reference Implementation</a></h2>
|
||
<p>A <a class="reference external" href="https://github.com/brandtbucher/cpython/tree/patma">feature-complete CPython implementation</a> is available on
|
||
GitHub.</p>
|
||
<p>An <a class="reference external" href="https://mybinder.org/v2/gh/gvanrossum/patma/master?urlpath=lab/tree/playground-622.ipynb">interactive playground</a>
|
||
based on the above implementation was created using Binder <a class="footnote-reference brackets" href="#id7" id="id4">[2]</a> and Jupyter <a class="footnote-reference brackets" href="#id8" id="id5">[3]</a>.</p>
|
||
</section>
|
||
<section id="references">
|
||
<h2><a class="toc-backref" href="#references" role="doc-backlink">References</a></h2>
|
||
<aside class="footnote-list brackets">
|
||
<aside class="footnote brackets" id="id6" role="doc-footnote">
|
||
<dt class="label" id="id6">[<a href="#id1">1</a>]</dt>
|
||
<dd>Kohn et al., Dynamic Pattern Matching with Python
|
||
<a class="reference external" href="https://gvanrossum.github.io/docs/PyPatternMatching.pdf">https://gvanrossum.github.io/docs/PyPatternMatching.pdf</a></aside>
|
||
<aside class="footnote brackets" id="id7" role="doc-footnote">
|
||
<dt class="label" id="id7">[<a href="#id4">2</a>]</dt>
|
||
<dd>Binder
|
||
<a class="reference external" href="https://mybinder.org">https://mybinder.org</a></aside>
|
||
<aside class="footnote brackets" id="id8" role="doc-footnote">
|
||
<dt class="label" id="id8">[<a href="#id5">3</a>]</dt>
|
||
<dd>Jupyter
|
||
<a class="reference external" href="https://jupyter.org">https://jupyter.org</a></aside>
|
||
</aside>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document is placed in the public domain or under the
|
||
CC0-1.0-Universal license, whichever is more permissive.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0635.rst">https://github.com/python/peps/blob/main/peps/pep-0635.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0635.rst">2023-09-09 17:39:29 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a><ul>
|
||
<li><a class="reference internal" href="#pattern-matching-and-oo">Pattern Matching and OO</a></li>
|
||
<li><a class="reference internal" href="#patterns-and-functional-style">Patterns and Functional Style</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a><ul>
|
||
<li><a class="reference internal" href="#overview-and-terminology">Overview and Terminology</a></li>
|
||
<li><a class="reference internal" href="#the-match-statement">The Match Statement</a><ul>
|
||
<li><a class="reference internal" href="#match-semantics">Match Semantics</a></li>
|
||
<li><a class="reference internal" href="#guards">Guards</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#patterns">Patterns</a><ul>
|
||
<li><a class="reference internal" href="#as-patterns">AS Patterns</a></li>
|
||
<li><a class="reference internal" href="#or-patterns">OR Patterns</a></li>
|
||
<li><a class="reference internal" href="#literal-patterns">Literal Patterns</a></li>
|
||
<li><a class="reference internal" href="#capture-patterns">Capture Patterns</a></li>
|
||
<li><a class="reference internal" href="#wildcard-pattern">Wildcard Pattern</a></li>
|
||
<li><a class="reference internal" href="#value-patterns">Value Patterns</a></li>
|
||
<li><a class="reference internal" href="#group-patterns">Group Patterns</a></li>
|
||
<li><a class="reference internal" href="#sequence-patterns">Sequence Patterns</a></li>
|
||
<li><a class="reference internal" href="#mapping-patterns">Mapping Patterns</a></li>
|
||
<li><a class="reference internal" href="#class-patterns">Class Patterns</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#history-and-context">History and Context</a></li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#security-implications">Security Implications</a></li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0635.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |