mirror of https://github.com/python/peps
296 lines
20 KiB
HTML
296 lines
20 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 278 – Universal Newline Support | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0278/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 278 – Universal Newline Support | peps.python.org'>
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0278/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="Python Enhancement Proposals (PEPs)">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 278</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 278 – Universal Newline Support</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Jack Jansen <jack at cwi.nl></dd>
|
||
<dt class="field-even">Status<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-odd">Type<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-even">Created<span class="colon">:</span></dt>
|
||
<dd class="field-even">14-Jan-2002</dd>
|
||
<dt class="field-odd">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-odd">2.3</dd>
|
||
<dt class="field-even">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p></p></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>This PEP discusses a way in which Python can support I/O on files
|
||
which have a newline format that is not the native format on the
|
||
platform, so that Python on each platform can read and import
|
||
files with CR (Macintosh), LF (Unix) or CR LF (Windows) line
|
||
endings.</p>
|
||
<p>It is more and more common to come across files that have an end
|
||
of line that does not match the standard on the current platform:
|
||
files downloaded over the net, remotely mounted filesystems on a
|
||
different platform, Mac OS X with its double standard of Mac and
|
||
Unix line endings, etc.</p>
|
||
<p>Many tools such as editors and compilers already handle this
|
||
gracefully, it would be good if Python did so too.</p>
|
||
</section>
|
||
<section id="specification">
|
||
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
||
<p>Universal newline support is enabled by default,
|
||
but can be disabled during the configure of Python.</p>
|
||
<p>In a Python with universal newline support the feature is
|
||
automatically enabled for all import statements and <code class="docutils literal notranslate"><span class="pre">execfile()</span></code>
|
||
calls. There is no special support for <code class="docutils literal notranslate"><span class="pre">eval()</span></code> or exec.</p>
|
||
<p>In a Python with universal newline support <code class="docutils literal notranslate"><span class="pre">open()</span></code> the mode
|
||
parameter can also be “U”, meaning “open for input as a text file
|
||
with universal newline interpretation”. Mode “rU” is also allowed,
|
||
for symmetry with “rb”. Mode “U” cannot be
|
||
combined with other mode flags such as “+”. Any line ending in the
|
||
input file will be seen as a <code class="docutils literal notranslate"><span class="pre">'\n'</span></code> in Python, so little other code has
|
||
to change to handle universal newlines.</p>
|
||
<p>Conversion of newlines happens in all calls that read data: <code class="docutils literal notranslate"><span class="pre">read()</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">readline()</span></code>, <code class="docutils literal notranslate"><span class="pre">readlines()</span></code>, etc.</p>
|
||
<p>There is no special support for output to file with a different
|
||
newline convention, and so mode “wU” is also illegal.</p>
|
||
<p>A file object that has been opened in universal newline mode gets
|
||
a new attribute “newlines” which reflects the newline convention
|
||
used in the file. The value for this attribute is one of None (no
|
||
newline read yet), <code class="docutils literal notranslate"><span class="pre">"\r"</span></code>, <code class="docutils literal notranslate"><span class="pre">"\n"</span></code>, <code class="docutils literal notranslate"><span class="pre">"\r\n"</span></code> or a tuple containing all the
|
||
newline types seen.</p>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>Universal newline support is implemented in C, not in Python.
|
||
This is done because we want files with a foreign newline
|
||
convention to be import-able, so a Python Lib directory can be
|
||
shared over a remote file system connection, or between MacPython
|
||
and Unix-Python on Mac OS X. For this to be feasible the
|
||
universal newline convention needs to have a reasonably small
|
||
impact on performance, which means a Python implementation is not
|
||
an option as it would bog down all imports. And because of files
|
||
with multiple newline conventions, which Visual C++ and other
|
||
Windows tools will happily produce, doing a quick check for the
|
||
newlines used in a file (handing off the import to C code if a
|
||
platform-local newline is seen) will not work. Finally, a C
|
||
implementation also allows tracebacks and such (which open the
|
||
Python source module) to be handled easily.</p>
|
||
<p>There is no output implementation of universal newlines, Python
|
||
programs are expected to handle this by themselves or write files
|
||
with platform-local convention otherwise. The reason for this is
|
||
that input is the difficult case, outputting different newlines to
|
||
a file is already easy enough in Python.</p>
|
||
<p>Also, an output implementation would be much more difficult than an
|
||
input implementation, surprisingly: a lot of output is done through
|
||
<code class="docutils literal notranslate"><span class="pre">PyXXX_Print()</span></code> methods, and at this point the file object is not
|
||
available anymore, only a <code class="docutils literal notranslate"><span class="pre">FILE</span> <span class="pre">*</span></code>. So, an output implementation would
|
||
need to somehow go from the <code class="docutils literal notranslate"><span class="pre">FILE*</span></code> to the file object, because that
|
||
is where the current newline delimiter is stored.</p>
|
||
<p>The input implementation has no such problem: there are no cases in
|
||
the Python source tree where files are partially read from C,
|
||
partially from Python, and such cases are expected to be rare in
|
||
extension modules. If such cases exist the only problem is that the
|
||
newlines attribute of the file object is not updated during the
|
||
<code class="docutils literal notranslate"><span class="pre">fread()</span></code> or <code class="docutils literal notranslate"><span class="pre">fgets()</span></code> calls that are done direct from C.</p>
|
||
<p>A partial output implementation, where strings passed to <code class="docutils literal notranslate"><span class="pre">fp.write()</span></code>
|
||
would be converted to use <code class="docutils literal notranslate"><span class="pre">fp.newlines</span></code> as their line terminator but
|
||
all other output would not is far too surprising, in my view.</p>
|
||
<p>Because there is no output support for universal newlines there is
|
||
also no support for a mode “rU+”: the surprise factor of the
|
||
previous paragraph would hold to an even stronger degree.</p>
|
||
<p>There is no support for universal newlines in strings passed to
|
||
<code class="docutils literal notranslate"><span class="pre">eval()</span></code> or <code class="docutils literal notranslate"><span class="pre">exec</span></code>. It is envisioned that such strings always have the
|
||
standard <code class="docutils literal notranslate"><span class="pre">\n</span></code> line feed, if the strings come from a file that file can
|
||
be read with universal newlines.</p>
|
||
<p>I think there are no special issues with unicode. utf-16 shouldn’t
|
||
pose any new problems, as such files need to be opened in binary
|
||
mode anyway. Interaction with utf-8 is fine too: values 0x0a and 0x0d
|
||
cannot occur as part of a multibyte sequence.</p>
|
||
<p>Universal newline files should work fine with iterators and
|
||
<code class="docutils literal notranslate"><span class="pre">xreadlines()</span></code> as these eventually call the normal file
|
||
readline/readlines methods.</p>
|
||
<p>While universal newlines are automatically enabled for import they
|
||
are not for opening, where you have to specifically say <code class="docutils literal notranslate"><span class="pre">open(...,</span>
|
||
<span class="pre">"U")</span></code>. This is open to debate, but here are a few reasons for this
|
||
design:</p>
|
||
<ul class="simple">
|
||
<li>Compatibility. Programs which already do their own
|
||
interpretation of <code class="docutils literal notranslate"><span class="pre">\r\n</span></code> in text files would break. Examples of such
|
||
programs would be editors which warn you when you open a file with
|
||
a different newline convention. If universal newlines was made the
|
||
default such an editor would silently convert your line endings to
|
||
the local convention on save. Programs which open binary files as
|
||
text files on Unix would also break (but it could be argued they
|
||
deserve it :-).</li>
|
||
<li>Interface clarity. Universal newlines are only supported for
|
||
input files, not for input/output files, as the semantics would
|
||
become muddy. Would you write Mac newlines if all reads so far
|
||
had encountered Mac newlines? But what if you then later read a
|
||
Unix newline?</li>
|
||
</ul>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">newlines</span></code> attribute is included so that programs that really
|
||
care about the newline convention, such as text editors, can
|
||
examine what was in a file. They can then save (a copy of) the
|
||
file with the same newline convention (or, in case of a file with
|
||
mixed newlines, ask the user what to do, or output in platform
|
||
convention).</p>
|
||
<p>Feedback is explicitly solicited on one item in the reference
|
||
implementation: whether or not the universal newlines routines
|
||
should grab the global interpreter lock. Currently they do not,
|
||
but this could be considered living dangerously, as they may
|
||
modify fields in a <code class="docutils literal notranslate"><span class="pre">FileObject</span></code>. But as these routines are
|
||
replacements for <code class="docutils literal notranslate"><span class="pre">fgets()</span></code> and <code class="docutils literal notranslate"><span class="pre">fread()</span></code> as well it may be difficult
|
||
to decide whether or not the lock is held when the routine is
|
||
called. Moreover, the only danger is that if two threads read the
|
||
same <code class="docutils literal notranslate"><span class="pre">FileObject</span></code> at the same time an extraneous newline may be seen
|
||
or the <code class="docutils literal notranslate"><span class="pre">newlines</span></code> attribute may inadvertently be set to mixed. I
|
||
would argue that if you read the same <code class="docutils literal notranslate"><span class="pre">FileObject</span></code> in two threads
|
||
simultaneously you are asking for trouble anyway.</p>
|
||
<p>Note that no globally accessible pointers are manipulated in the
|
||
<code class="docutils literal notranslate"><span class="pre">fgets()</span></code> or <code class="docutils literal notranslate"><span class="pre">fread()</span></code> replacement routines, just some integer-valued
|
||
flags, so the chances of core dumps are zero (he said:-).</p>
|
||
<p>Universal newline support can be disabled during configure because it does
|
||
have a small performance penalty, and moreover the implementation has
|
||
not been tested on all conceivable platforms yet. It might also be silly
|
||
on some platforms (WinCE or Palm devices, for instance). If universal
|
||
newline support is not enabled then file objects do not have the <code class="docutils literal notranslate"><span class="pre">newlines</span></code>
|
||
attribute, so testing whether the current Python has it can be done with a
|
||
simple:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="nb">open</span><span class="p">,</span> <span class="s1">'newlines'</span><span class="p">):</span>
|
||
<span class="nb">print</span> <span class="s1">'We have universal newline support'</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Note that this test uses the <code class="docutils literal notranslate"><span class="pre">open()</span></code> function rather than the <code class="docutils literal notranslate"><span class="pre">file</span></code>
|
||
type so that it won’t fail for versions of Python where the <code class="docutils literal notranslate"><span class="pre">file</span></code>
|
||
type was not available (the <code class="docutils literal notranslate"><span class="pre">file</span></code> type was added to the built-in
|
||
namespace in the same release as the universal newline feature was
|
||
added).</p>
|
||
<p>Additionally, note that this test fails again on Python versions
|
||
>= 2.5, when <code class="docutils literal notranslate"><span class="pre">open()</span></code> was made a function again and is not synonymous
|
||
with the <code class="docutils literal notranslate"><span class="pre">file</span></code> type anymore.</p>
|
||
</section>
|
||
<section id="reference-implementation">
|
||
<h2><a class="toc-backref" href="#reference-implementation" role="doc-backlink">Reference Implementation</a></h2>
|
||
<p>A reference implementation is available in SourceForge patch
|
||
#476814: <a class="reference external" href="https://bugs.python.org/issue476814">https://bugs.python.org/issue476814</a></p>
|
||
</section>
|
||
<section id="references">
|
||
<h2><a class="toc-backref" href="#references" role="doc-backlink">References</a></h2>
|
||
<p>None.</p>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document has been placed in the public domain.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0278.rst">https://github.com/python/peps/blob/main/peps/pep-0278.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0278.rst">2023-09-09 17:39:29 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0278.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |