add files
This commit is contained in:
888
python-3.7.4-docs-html/howto/argparse.html
Normal file
888
python-3.7.4-docs-html/howto/argparse.html
Normal file
@@ -0,0 +1,888 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Argparse Tutorial — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="An introduction to the ipaddress module" href="ipaddress.html" />
|
||||
<link rel="prev" title="HOWTO Fetch Internet Resources Using The urllib Package" href="urllib2.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/argparse.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="ipaddress.html" title="An introduction to the ipaddress module"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="urllib2.html" title="HOWTO Fetch Internet Resources Using The urllib Package"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="argparse-tutorial">
|
||||
<h1>Argparse Tutorial<a class="headerlink" href="#argparse-tutorial" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">author</dt>
|
||||
<dd class="field-odd"><p>Tshepang Lekhonkhobe</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<p id="id1">This tutorial is intended to be a gentle introduction to <a class="reference internal" href="../library/argparse.html#module-argparse" title="argparse: Command-line option and argument parsing library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">argparse</span></code></a>, the
|
||||
recommended command-line parsing module in the Python standard library.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>There are two other modules that fulfill the same task, namely
|
||||
<a class="reference internal" href="../library/getopt.html#module-getopt" title="getopt: Portable parser for command line options; support both short and long option names."><code class="xref py py-mod docutils literal notranslate"><span class="pre">getopt</span></code></a> (an equivalent for <code class="xref c c-func docutils literal notranslate"><span class="pre">getopt()</span></code> from the C
|
||||
language) and the deprecated <a class="reference internal" href="../library/optparse.html#module-optparse" title="optparse: Command-line option parsing library. (deprecated)"><code class="xref py py-mod docutils literal notranslate"><span class="pre">optparse</span></code></a>.
|
||||
Note also that <a class="reference internal" href="../library/argparse.html#module-argparse" title="argparse: Command-line option and argument parsing library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">argparse</span></code></a> is based on <a class="reference internal" href="../library/optparse.html#module-optparse" title="optparse: Command-line option parsing library. (deprecated)"><code class="xref py py-mod docutils literal notranslate"><span class="pre">optparse</span></code></a>,
|
||||
and therefore very similar in terms of usage.</p>
|
||||
</div>
|
||||
<div class="section" id="concepts">
|
||||
<h2>Concepts<a class="headerlink" href="#concepts" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Let’s show the sort of functionality that we are going to explore in this
|
||||
introductory tutorial by making use of the <strong class="command">ls</strong> command:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> ls
|
||||
<span class="go">cpython devguide prog.py pypy rm-unused-function.patch</span>
|
||||
<span class="gp">$</span> ls pypy
|
||||
<span class="go">ctypes_configure demo dotviewer include lib_pypy lib-python ...</span>
|
||||
<span class="gp">$</span> ls -l
|
||||
<span class="go">total 20</span>
|
||||
<span class="go">drwxr-xr-x 19 wena wena 4096 Feb 18 18:51 cpython</span>
|
||||
<span class="go">drwxr-xr-x 4 wena wena 4096 Feb 8 12:04 devguide</span>
|
||||
<span class="go">-rwxr-xr-x 1 wena wena 535 Feb 19 00:05 prog.py</span>
|
||||
<span class="go">drwxr-xr-x 14 wena wena 4096 Feb 7 00:59 pypy</span>
|
||||
<span class="go">-rw-r--r-- 1 wena wena 741 Feb 18 01:01 rm-unused-function.patch</span>
|
||||
<span class="gp">$</span> ls --help
|
||||
<span class="go">Usage: ls [OPTION]... [FILE]...</span>
|
||||
<span class="go">List information about the FILEs (the current directory by default).</span>
|
||||
<span class="go">Sort entries alphabetically if none of -cftuvSUX nor --sort is specified.</span>
|
||||
<span class="go">...</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>A few concepts we can learn from the four commands:</p>
|
||||
<ul class="simple">
|
||||
<li><p>The <strong class="command">ls</strong> command is useful when run without any options at all. It defaults
|
||||
to displaying the contents of the current directory.</p></li>
|
||||
<li><p>If we want beyond what it provides by default, we tell it a bit more. In
|
||||
this case, we want it to display a different directory, <code class="docutils literal notranslate"><span class="pre">pypy</span></code>.
|
||||
What we did is specify what is known as a positional argument. It’s named so
|
||||
because the program should know what to do with the value, solely based on
|
||||
where it appears on the command line. This concept is more relevant
|
||||
to a command like <strong class="command">cp</strong>, whose most basic usage is <code class="docutils literal notranslate"><span class="pre">cp</span> <span class="pre">SRC</span> <span class="pre">DEST</span></code>.
|
||||
The first position is <em>what you want copied,</em> and the second
|
||||
position is <em>where you want it copied to</em>.</p></li>
|
||||
<li><p>Now, say we want to change behaviour of the program. In our example,
|
||||
we display more info for each file instead of just showing the file names.
|
||||
The <code class="docutils literal notranslate"><span class="pre">-l</span></code> in that case is known as an optional argument.</p></li>
|
||||
<li><p>That’s a snippet of the help text. It’s very useful in that you can
|
||||
come across a program you have never used before, and can figure out
|
||||
how it works simply by reading its help text.</p></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="section" id="the-basics">
|
||||
<h2>The basics<a class="headerlink" href="#the-basics" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Let us start with a very simple example which does (almost) nothing:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Following is a result of running the code:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py
|
||||
<span class="gp">$</span> python3 prog.py --help
|
||||
<span class="go">usage: prog.py [-h]</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
<span class="gp">$</span> python3 prog.py --verbose
|
||||
<span class="go">usage: prog.py [-h]</span>
|
||||
<span class="go">prog.py: error: unrecognized arguments: --verbose</span>
|
||||
<span class="gp">$</span> python3 prog.py foo
|
||||
<span class="go">usage: prog.py [-h]</span>
|
||||
<span class="go">prog.py: error: unrecognized arguments: foo</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Here is what is happening:</p>
|
||||
<ul class="simple">
|
||||
<li><p>Running the script without any options results in nothing displayed to
|
||||
stdout. Not so useful.</p></li>
|
||||
<li><p>The second one starts to display the usefulness of the <a class="reference internal" href="../library/argparse.html#module-argparse" title="argparse: Command-line option and argument parsing library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">argparse</span></code></a>
|
||||
module. We have done almost nothing, but already we get a nice help message.</p></li>
|
||||
<li><p>The <code class="docutils literal notranslate"><span class="pre">--help</span></code> option, which can also be shortened to <code class="docutils literal notranslate"><span class="pre">-h</span></code>, is the only
|
||||
option we get for free (i.e. no need to specify it). Specifying anything
|
||||
else results in an error. But even then, we do get a useful usage message,
|
||||
also for free.</p></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="section" id="introducing-positional-arguments">
|
||||
<h2>Introducing Positional arguments<a class="headerlink" href="#introducing-positional-arguments" title="Permalink to this headline">¶</a></h2>
|
||||
<p>An example:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"echo"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">echo</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>And running the code:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py
|
||||
<span class="go">usage: prog.py [-h] echo</span>
|
||||
<span class="go">prog.py: error: the following arguments are required: echo</span>
|
||||
<span class="gp">$</span> python3 prog.py --help
|
||||
<span class="go">usage: prog.py [-h] echo</span>
|
||||
|
||||
<span class="go">positional arguments:</span>
|
||||
<span class="go"> echo</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
<span class="gp">$</span> python3 prog.py foo
|
||||
<span class="go">foo</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Here is what’s happening:</p>
|
||||
<ul class="simple">
|
||||
<li><p>We’ve added the <code class="xref py py-meth docutils literal notranslate"><span class="pre">add_argument()</span></code> method, which is what we use to specify
|
||||
which command-line options the program is willing to accept. In this case,
|
||||
I’ve named it <code class="docutils literal notranslate"><span class="pre">echo</span></code> so that it’s in line with its function.</p></li>
|
||||
<li><p>Calling our program now requires us to specify an option.</p></li>
|
||||
<li><p>The <code class="xref py py-meth docutils literal notranslate"><span class="pre">parse_args()</span></code> method actually returns some data from the
|
||||
options specified, in this case, <code class="docutils literal notranslate"><span class="pre">echo</span></code>.</p></li>
|
||||
<li><p>The variable is some form of ‘magic’ that <a class="reference internal" href="../library/argparse.html#module-argparse" title="argparse: Command-line option and argument parsing library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">argparse</span></code></a> performs for free
|
||||
(i.e. no need to specify which variable that value is stored in).
|
||||
You will also notice that its name matches the string argument given
|
||||
to the method, <code class="docutils literal notranslate"><span class="pre">echo</span></code>.</p></li>
|
||||
</ul>
|
||||
<p>Note however that, although the help display looks nice and all, it currently
|
||||
is not as helpful as it can be. For example we see that we got <code class="docutils literal notranslate"><span class="pre">echo</span></code> as a
|
||||
positional argument, but we don’t know what it does, other than by guessing or
|
||||
by reading the source code. So, let’s make it a bit more useful:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"echo"</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"echo the string you use here"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">echo</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>And we get:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py -h
|
||||
<span class="go">usage: prog.py [-h] echo</span>
|
||||
|
||||
<span class="go">positional arguments:</span>
|
||||
<span class="go"> echo echo the string you use here</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Now, how about doing something even more useful:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"square"</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"display a square of a given number"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Following is a result of running the code:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py <span class="m">4</span>
|
||||
<span class="go">Traceback (most recent call last):</span>
|
||||
<span class="go"> File "prog.py", line 5, in <module></span>
|
||||
<span class="go"> print(args.square**2)</span>
|
||||
<span class="go">TypeError: unsupported operand type(s) for ** or pow(): 'str' and 'int'</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>That didn’t go so well. That’s because <a class="reference internal" href="../library/argparse.html#module-argparse" title="argparse: Command-line option and argument parsing library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">argparse</span></code></a> treats the options we
|
||||
give it as strings, unless we tell it otherwise. So, let’s tell
|
||||
<a class="reference internal" href="../library/argparse.html#module-argparse" title="argparse: Command-line option and argument parsing library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">argparse</span></code></a> to treat that input as an integer:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"square"</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"display a square of a given number"</span><span class="p">,</span>
|
||||
<span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Following is a result of running the code:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py <span class="m">4</span>
|
||||
<span class="go">16</span>
|
||||
<span class="gp">$</span> python3 prog.py four
|
||||
<span class="go">usage: prog.py [-h] square</span>
|
||||
<span class="go">prog.py: error: argument square: invalid int value: 'four'</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>That went well. The program now even helpfully quits on bad illegal input
|
||||
before proceeding.</p>
|
||||
</div>
|
||||
<div class="section" id="introducing-optional-arguments">
|
||||
<h2>Introducing Optional arguments<a class="headerlink" href="#introducing-optional-arguments" title="Permalink to this headline">¶</a></h2>
|
||||
<p>So far we have been playing with positional arguments. Let us
|
||||
have a look on how to add optional ones:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"--verbosity"</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"increase output verbosity"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"verbosity turned on"</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>And the output:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py --verbosity <span class="m">1</span>
|
||||
<span class="go">verbosity turned on</span>
|
||||
<span class="gp">$</span> python3 prog.py
|
||||
<span class="gp">$</span> python3 prog.py --help
|
||||
<span class="go">usage: prog.py [-h] [--verbosity VERBOSITY]</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
<span class="go"> --verbosity VERBOSITY</span>
|
||||
<span class="go"> increase output verbosity</span>
|
||||
<span class="gp">$</span> python3 prog.py --verbosity
|
||||
<span class="go">usage: prog.py [-h] [--verbosity VERBOSITY]</span>
|
||||
<span class="go">prog.py: error: argument --verbosity: expected one argument</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Here is what is happening:</p>
|
||||
<ul class="simple">
|
||||
<li><p>The program is written so as to display something when <code class="docutils literal notranslate"><span class="pre">--verbosity</span></code> is
|
||||
specified and display nothing when not.</p></li>
|
||||
<li><p>To show that the option is actually optional, there is no error when running
|
||||
the program without it. Note that by default, if an optional argument isn’t
|
||||
used, the relevant variable, in this case <code class="xref py py-attr docutils literal notranslate"><span class="pre">args.verbosity</span></code>, is
|
||||
given <code class="docutils literal notranslate"><span class="pre">None</span></code> as a value, which is the reason it fails the truth
|
||||
test of the <a class="reference internal" href="../reference/compound_stmts.html#if"><code class="xref std std-keyword docutils literal notranslate"><span class="pre">if</span></code></a> statement.</p></li>
|
||||
<li><p>The help message is a bit different.</p></li>
|
||||
<li><p>When using the <code class="docutils literal notranslate"><span class="pre">--verbosity</span></code> option, one must also specify some value,
|
||||
any value.</p></li>
|
||||
</ul>
|
||||
<p>The above example accepts arbitrary integer values for <code class="docutils literal notranslate"><span class="pre">--verbosity</span></code>, but for
|
||||
our simple program, only two values are actually useful, <code class="docutils literal notranslate"><span class="pre">True</span></code> or <code class="docutils literal notranslate"><span class="pre">False</span></code>.
|
||||
Let’s modify the code accordingly:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"--verbose"</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"increase output verbosity"</span><span class="p">,</span>
|
||||
<span class="n">action</span><span class="o">=</span><span class="s2">"store_true"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"verbosity turned on"</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>And the output:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py --verbose
|
||||
<span class="go">verbosity turned on</span>
|
||||
<span class="gp">$</span> python3 prog.py --verbose <span class="m">1</span>
|
||||
<span class="go">usage: prog.py [-h] [--verbose]</span>
|
||||
<span class="go">prog.py: error: unrecognized arguments: 1</span>
|
||||
<span class="gp">$</span> python3 prog.py --help
|
||||
<span class="go">usage: prog.py [-h] [--verbose]</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
<span class="go"> --verbose increase output verbosity</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Here is what is happening:</p>
|
||||
<ul class="simple">
|
||||
<li><p>The option is now more of a flag than something that requires a value.
|
||||
We even changed the name of the option to match that idea.
|
||||
Note that we now specify a new keyword, <code class="docutils literal notranslate"><span class="pre">action</span></code>, and give it the value
|
||||
<code class="docutils literal notranslate"><span class="pre">"store_true"</span></code>. This means that, if the option is specified,
|
||||
assign the value <code class="docutils literal notranslate"><span class="pre">True</span></code> to <code class="xref py py-data docutils literal notranslate"><span class="pre">args.verbose</span></code>.
|
||||
Not specifying it implies <code class="docutils literal notranslate"><span class="pre">False</span></code>.</p></li>
|
||||
<li><p>It complains when you specify a value, in true spirit of what flags
|
||||
actually are.</p></li>
|
||||
<li><p>Notice the different help text.</p></li>
|
||||
</ul>
|
||||
<div class="section" id="short-options">
|
||||
<h3>Short options<a class="headerlink" href="#short-options" title="Permalink to this headline">¶</a></h3>
|
||||
<p>If you are familiar with command line usage,
|
||||
you will notice that I haven’t yet touched on the topic of short
|
||||
versions of the options. It’s quite simple:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbose"</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"increase output verbosity"</span><span class="p">,</span>
|
||||
<span class="n">action</span><span class="o">=</span><span class="s2">"store_true"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"verbosity turned on"</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>And here goes:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py -v
|
||||
<span class="go">verbosity turned on</span>
|
||||
<span class="gp">$</span> python3 prog.py --help
|
||||
<span class="go">usage: prog.py [-h] [-v]</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
<span class="go"> -v, --verbose increase output verbosity</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Note that the new ability is also reflected in the help text.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="combining-positional-and-optional-arguments">
|
||||
<h2>Combining Positional and Optional arguments<a class="headerlink" href="#combining-positional-and-optional-arguments" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Our program keeps growing in complexity:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"square"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"display a square of a given number"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbose"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"store_true"</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"increase output verbosity"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="o">**</span><span class="mi">2</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"the square of </span><span class="si">{}</span><span class="s2"> equals </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>And now the output:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py
|
||||
<span class="go">usage: prog.py [-h] [-v] square</span>
|
||||
<span class="go">prog.py: error: the following arguments are required: square</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span>
|
||||
<span class="go">16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> --verbose
|
||||
<span class="go">the square of 4 equals 16</span>
|
||||
<span class="gp">$</span> python3 prog.py --verbose <span class="m">4</span>
|
||||
<span class="go">the square of 4 equals 16</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<ul class="simple">
|
||||
<li><p>We’ve brought back a positional argument, hence the complaint.</p></li>
|
||||
<li><p>Note that the order does not matter.</p></li>
|
||||
</ul>
|
||||
<p>How about we give this program of ours back the ability to have
|
||||
multiple verbosity values, and actually get to use them:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"square"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"display a square of a given number"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbosity"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"increase output verbosity"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="o">**</span><span class="mi">2</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"the square of </span><span class="si">{}</span><span class="s2"> equals </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">elif</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2">^2 == </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>And the output:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py <span class="m">4</span>
|
||||
<span class="go">16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -v
|
||||
<span class="go">usage: prog.py [-h] [-v VERBOSITY] square</span>
|
||||
<span class="go">prog.py: error: argument -v/--verbosity: expected one argument</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -v <span class="m">1</span>
|
||||
<span class="go">4^2 == 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -v <span class="m">2</span>
|
||||
<span class="go">the square of 4 equals 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -v <span class="m">3</span>
|
||||
<span class="go">16</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>These all look good except the last one, which exposes a bug in our program.
|
||||
Let’s fix it by restricting the values the <code class="docutils literal notranslate"><span class="pre">--verbosity</span></code> option can accept:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"square"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"display a square of a given number"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbosity"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">choices</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"increase output verbosity"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="o">**</span><span class="mi">2</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"the square of </span><span class="si">{}</span><span class="s2"> equals </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">elif</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2">^2 == </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>And the output:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py <span class="m">4</span> -v <span class="m">3</span>
|
||||
<span class="go">usage: prog.py [-h] [-v {0,1,2}] square</span>
|
||||
<span class="go">prog.py: error: argument -v/--verbosity: invalid choice: 3 (choose from 0, 1, 2)</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -h
|
||||
<span class="go">usage: prog.py [-h] [-v {0,1,2}] square</span>
|
||||
|
||||
<span class="go">positional arguments:</span>
|
||||
<span class="go"> square display a square of a given number</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
<span class="go"> -v {0,1,2}, --verbosity {0,1,2}</span>
|
||||
<span class="go"> increase output verbosity</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Note that the change also reflects both in the error message as well as the
|
||||
help string.</p>
|
||||
<p>Now, let’s use a different approach of playing with verbosity, which is pretty
|
||||
common. It also matches the way the CPython executable handles its own
|
||||
verbosity argument (check the output of <code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">--help</span></code>):</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"square"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"display the square of a given number"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbosity"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"count"</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"increase output verbosity"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="o">**</span><span class="mi">2</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"the square of </span><span class="si">{}</span><span class="s2"> equals </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">elif</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2">^2 == </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>We have introduced another action, “count”,
|
||||
to count the number of occurrences of a specific optional arguments:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py <span class="m">4</span>
|
||||
<span class="go">16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -v
|
||||
<span class="go">4^2 == 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -vv
|
||||
<span class="go">the square of 4 equals 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> --verbosity --verbosity
|
||||
<span class="go">the square of 4 equals 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -v <span class="m">1</span>
|
||||
<span class="go">usage: prog.py [-h] [-v] square</span>
|
||||
<span class="go">prog.py: error: unrecognized arguments: 1</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -h
|
||||
<span class="go">usage: prog.py [-h] [-v] square</span>
|
||||
|
||||
<span class="go">positional arguments:</span>
|
||||
<span class="go"> square display a square of a given number</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
<span class="go"> -v, --verbosity increase output verbosity</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -vvv
|
||||
<span class="go">16</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<ul class="simple">
|
||||
<li><p>Yes, it’s now more of a flag (similar to <code class="docutils literal notranslate"><span class="pre">action="store_true"</span></code>) in the
|
||||
previous version of our script. That should explain the complaint.</p></li>
|
||||
<li><p>It also behaves similar to “store_true” action.</p></li>
|
||||
<li><p>Now here’s a demonstration of what the “count” action gives. You’ve probably
|
||||
seen this sort of usage before.</p></li>
|
||||
<li><p>And if you don’t specify the <code class="docutils literal notranslate"><span class="pre">-v</span></code> flag, that flag is considered to have
|
||||
<code class="docutils literal notranslate"><span class="pre">None</span></code> value.</p></li>
|
||||
<li><p>As should be expected, specifying the long form of the flag, we should get
|
||||
the same output.</p></li>
|
||||
<li><p>Sadly, our help output isn’t very informative on the new ability our script
|
||||
has acquired, but that can always be fixed by improving the documentation for
|
||||
our script (e.g. via the <code class="docutils literal notranslate"><span class="pre">help</span></code> keyword argument).</p></li>
|
||||
<li><p>That last output exposes a bug in our program.</p></li>
|
||||
</ul>
|
||||
<p>Let’s fix:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"square"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"display a square of a given number"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbosity"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"count"</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"increase output verbosity"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="o">**</span><span class="mi">2</span>
|
||||
|
||||
<span class="c1"># bugfix: replace == with >=</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">>=</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"the square of </span><span class="si">{}</span><span class="s2"> equals </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">elif</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2">^2 == </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>And this is what it gives:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py <span class="m">4</span> -vvv
|
||||
<span class="go">the square of 4 equals 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> -vvvv
|
||||
<span class="go">the square of 4 equals 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span>
|
||||
<span class="go">Traceback (most recent call last):</span>
|
||||
<span class="go"> File "prog.py", line 11, in <module></span>
|
||||
<span class="go"> if args.verbosity >= 2:</span>
|
||||
<span class="go">TypeError: '>=' not supported between instances of 'NoneType' and 'int'</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<ul class="simple">
|
||||
<li><p>First output went well, and fixes the bug we had before.
|
||||
That is, we want any value >= 2 to be as verbose as possible.</p></li>
|
||||
<li><p>Third output not so good.</p></li>
|
||||
</ul>
|
||||
<p>Let’s fix that bug:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"square"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"display a square of a given number"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbosity"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"count"</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
||||
<span class="n">help</span><span class="o">=</span><span class="s2">"increase output verbosity"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="o">**</span><span class="mi">2</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">>=</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"the square of </span><span class="si">{}</span><span class="s2"> equals </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">elif</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2">^2 == </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">square</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>We’ve just introduced yet another keyword, <code class="docutils literal notranslate"><span class="pre">default</span></code>.
|
||||
We’ve set it to <code class="docutils literal notranslate"><span class="pre">0</span></code> in order to make it comparable to the other int values.
|
||||
Remember that by default,
|
||||
if an optional argument isn’t specified,
|
||||
it gets the <code class="docutils literal notranslate"><span class="pre">None</span></code> value, and that cannot be compared to an int value
|
||||
(hence the <a class="reference internal" href="../library/exceptions.html#TypeError" title="TypeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> exception).</p>
|
||||
<p>And:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py <span class="m">4</span>
|
||||
<span class="go">16</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>You can go quite far just with what we’ve learned so far,
|
||||
and we have only scratched the surface.
|
||||
The <a class="reference internal" href="../library/argparse.html#module-argparse" title="argparse: Command-line option and argument parsing library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">argparse</span></code></a> module is very powerful,
|
||||
and we’ll explore a bit more of it before we end this tutorial.</p>
|
||||
</div>
|
||||
<div class="section" id="getting-a-little-more-advanced">
|
||||
<h2>Getting a little more advanced<a class="headerlink" href="#getting-a-little-more-advanced" title="Permalink to this headline">¶</a></h2>
|
||||
<p>What if we wanted to expand our tiny program to perform other powers,
|
||||
not just squares:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"x"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"the base"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"y"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"the exponent"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbosity"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"count"</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="o">**</span><span class="n">args</span><span class="o">.</span><span class="n">y</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">>=</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2"> to the power </span><span class="si">{}</span><span class="s2"> equals </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">elif</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2">^</span><span class="si">{}</span><span class="s2"> == </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Output:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py
|
||||
<span class="go">usage: prog.py [-h] [-v] x y</span>
|
||||
<span class="go">prog.py: error: the following arguments are required: x, y</span>
|
||||
<span class="gp">$</span> python3 prog.py -h
|
||||
<span class="go">usage: prog.py [-h] [-v] x y</span>
|
||||
|
||||
<span class="go">positional arguments:</span>
|
||||
<span class="go"> x the base</span>
|
||||
<span class="go"> y the exponent</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
<span class="go"> -v, --verbosity</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> <span class="m">2</span> -v
|
||||
<span class="go">4^2 == 16</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Notice that so far we’ve been using verbosity level to <em>change</em> the text
|
||||
that gets displayed. The following example instead uses verbosity level
|
||||
to display <em>more</em> text instead:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"x"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"the base"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"y"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"the exponent"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbosity"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"count"</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="o">**</span><span class="n">args</span><span class="o">.</span><span class="n">y</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">>=</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Running '</span><span class="si">{}</span><span class="s2">'"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="vm">__file__</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2">^</span><span class="si">{}</span><span class="s2"> == "</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">y</span><span class="p">),</span> <span class="n">end</span><span class="o">=</span><span class="s2">""</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Output:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py <span class="m">4</span> <span class="m">2</span>
|
||||
<span class="go">16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> <span class="m">2</span> -v
|
||||
<span class="go">4^2 == 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> <span class="m">2</span> -vv
|
||||
<span class="go">Running 'prog.py'</span>
|
||||
<span class="go">4^2 == 16</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="section" id="conflicting-options">
|
||||
<h3>Conflicting options<a class="headerlink" href="#conflicting-options" title="Permalink to this headline">¶</a></h3>
|
||||
<p>So far, we have been working with two methods of an
|
||||
<a class="reference internal" href="../library/argparse.html#argparse.ArgumentParser" title="argparse.ArgumentParser"><code class="xref py py-class docutils literal notranslate"><span class="pre">argparse.ArgumentParser</span></code></a> instance. Let’s introduce a third one,
|
||||
<code class="xref py py-meth docutils literal notranslate"><span class="pre">add_mutually_exclusive_group()</span></code>. It allows for us to specify options that
|
||||
conflict with each other. Let’s also change the rest of the program so that
|
||||
the new functionality makes more sense:
|
||||
we’ll introduce the <code class="docutils literal notranslate"><span class="pre">--quiet</span></code> option,
|
||||
which will be the opposite of the <code class="docutils literal notranslate"><span class="pre">--verbose</span></code> one:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">()</span>
|
||||
<span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_mutually_exclusive_group</span><span class="p">()</span>
|
||||
<span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbose"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"store_true"</span><span class="p">)</span>
|
||||
<span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-q"</span><span class="p">,</span> <span class="s2">"--quiet"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"store_true"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"x"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"the base"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"y"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"the exponent"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="o">**</span><span class="n">args</span><span class="o">.</span><span class="n">y</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">quiet</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">args</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2"> to the power </span><span class="si">{}</span><span class="s2"> equals </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2">^</span><span class="si">{}</span><span class="s2"> == </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Our program is now simpler, and we’ve lost some functionality for the sake of
|
||||
demonstration. Anyways, here’s the output:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py <span class="m">4</span> <span class="m">2</span>
|
||||
<span class="go">4^2 == 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> <span class="m">2</span> -q
|
||||
<span class="go">16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> <span class="m">2</span> -v
|
||||
<span class="go">4 to the power 2 equals 16</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> <span class="m">2</span> -vq
|
||||
<span class="go">usage: prog.py [-h] [-v | -q] x y</span>
|
||||
<span class="go">prog.py: error: argument -q/--quiet: not allowed with argument -v/--verbose</span>
|
||||
<span class="gp">$</span> python3 prog.py <span class="m">4</span> <span class="m">2</span> -v --quiet
|
||||
<span class="go">usage: prog.py [-h] [-v | -q] x y</span>
|
||||
<span class="go">prog.py: error: argument -q/--quiet: not allowed with argument -v/--verbose</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>That should be easy to follow. I’ve added that last output so you can see the
|
||||
sort of flexibility you get, i.e. mixing long form options with short form
|
||||
ones.</p>
|
||||
<p>Before we conclude, you probably want to tell your users the main purpose of
|
||||
your program, just in case they don’t know:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
|
||||
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">(</span><span class="n">description</span><span class="o">=</span><span class="s2">"calculate X to the power of Y"</span><span class="p">)</span>
|
||||
<span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_mutually_exclusive_group</span><span class="p">()</span>
|
||||
<span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-v"</span><span class="p">,</span> <span class="s2">"--verbose"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"store_true"</span><span class="p">)</span>
|
||||
<span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-q"</span><span class="p">,</span> <span class="s2">"--quiet"</span><span class="p">,</span> <span class="n">action</span><span class="o">=</span><span class="s2">"store_true"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"x"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"the base"</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"y"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">"the exponent"</span><span class="p">)</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="o">**</span><span class="n">args</span><span class="o">.</span><span class="n">y</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">quiet</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">answer</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">args</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2"> to the power </span><span class="si">{}</span><span class="s2"> equals </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">{}</span><span class="s2">^</span><span class="si">{}</span><span class="s2"> == </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">x</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">answer</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Note that slight difference in the usage text. Note the <code class="docutils literal notranslate"><span class="pre">[-v</span> <span class="pre">|</span> <span class="pre">-q]</span></code>,
|
||||
which tells us that we can either use <code class="docutils literal notranslate"><span class="pre">-v</span></code> or <code class="docutils literal notranslate"><span class="pre">-q</span></code>,
|
||||
but not both at the same time:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 prog.py --help
|
||||
<span class="go">usage: prog.py [-h] [-v | -q] x y</span>
|
||||
|
||||
<span class="go">calculate X to the power of Y</span>
|
||||
|
||||
<span class="go">positional arguments:</span>
|
||||
<span class="go"> x the base</span>
|
||||
<span class="go"> y the exponent</span>
|
||||
|
||||
<span class="go">optional arguments:</span>
|
||||
<span class="go"> -h, --help show this help message and exit</span>
|
||||
<span class="go"> -v, --verbose</span>
|
||||
<span class="go"> -q, --quiet</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="conclusion">
|
||||
<h2>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The <a class="reference internal" href="../library/argparse.html#module-argparse" title="argparse: Command-line option and argument parsing library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">argparse</span></code></a> module offers a lot more than shown here.
|
||||
Its docs are quite detailed and thorough, and full of examples.
|
||||
Having gone through this tutorial, you should easily digest them
|
||||
without feeling overwhelmed.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">Argparse Tutorial</a><ul>
|
||||
<li><a class="reference internal" href="#concepts">Concepts</a></li>
|
||||
<li><a class="reference internal" href="#the-basics">The basics</a></li>
|
||||
<li><a class="reference internal" href="#introducing-positional-arguments">Introducing Positional arguments</a></li>
|
||||
<li><a class="reference internal" href="#introducing-optional-arguments">Introducing Optional arguments</a><ul>
|
||||
<li><a class="reference internal" href="#short-options">Short options</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#combining-positional-and-optional-arguments">Combining Positional and Optional arguments</a></li>
|
||||
<li><a class="reference internal" href="#getting-a-little-more-advanced">Getting a little more advanced</a><ul>
|
||||
<li><a class="reference internal" href="#conflicting-options">Conflicting options</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#conclusion">Conclusion</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="urllib2.html"
|
||||
title="previous chapter">HOWTO Fetch Internet Resources Using The urllib Package</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="ipaddress.html"
|
||||
title="next chapter">An introduction to the ipaddress module</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/argparse.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="ipaddress.html" title="An introduction to the ipaddress module"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="urllib2.html" title="HOWTO Fetch Internet Resources Using The urllib Package"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
1806
python-3.7.4-docs-html/howto/clinic.html
Normal file
1806
python-3.7.4-docs-html/howto/clinic.html
Normal file
File diff suppressed because it is too large
Load Diff
564
python-3.7.4-docs-html/howto/cporting.html
Normal file
564
python-3.7.4-docs-html/howto/cporting.html
Normal file
@@ -0,0 +1,564 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Porting Extension Modules to Python 3 — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Curses Programming with Python" href="curses.html" />
|
||||
<link rel="prev" title="Porting Python 2 Code to Python 3" href="pyporting.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/cporting.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="curses.html" title="Curses Programming with Python"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="pyporting.html" title="Porting Python 2 Code to Python 3"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="porting-extension-modules-to-python-3">
|
||||
<span id="cporting-howto"></span><h1>Porting Extension Modules to Python 3<a class="headerlink" href="#porting-extension-modules-to-python-3" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">author</dt>
|
||||
<dd class="field-odd"><p>Benjamin Peterson</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<div class="topic">
|
||||
<p class="topic-title first">Abstract</p>
|
||||
<p>Although changing the C-API was not one of Python 3’s objectives,
|
||||
the many Python-level changes made leaving Python 2’s API intact
|
||||
impossible. In fact, some changes such as <a class="reference internal" href="../library/functions.html#int" title="int"><code class="xref py py-func docutils literal notranslate"><span class="pre">int()</span></code></a> and
|
||||
<code class="xref py py-func docutils literal notranslate"><span class="pre">long()</span></code> unification are more obvious on the C level. This
|
||||
document endeavors to document incompatibilities and how they can
|
||||
be worked around.</p>
|
||||
</div>
|
||||
<div class="section" id="conditional-compilation">
|
||||
<h2>Conditional compilation<a class="headerlink" href="#conditional-compilation" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The easiest way to compile only some code for Python 3 is to check
|
||||
if <code class="xref c c-macro docutils literal notranslate"><span class="pre">PY_MAJOR_VERSION</span></code> is greater than or equal to 3.</p>
|
||||
<div class="highlight-c notranslate"><div class="highlight"><pre><span></span><span class="cp">#if PY_MAJOR_VERSION >= 3</span>
|
||||
<span class="cp">#define IS_PY3K</span>
|
||||
<span class="cp">#endif</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>API functions that are not present can be aliased to their equivalents within
|
||||
conditional blocks.</p>
|
||||
</div>
|
||||
<div class="section" id="changes-to-object-apis">
|
||||
<h2>Changes to Object APIs<a class="headerlink" href="#changes-to-object-apis" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Python 3 merged together some types with similar functions while cleanly
|
||||
separating others.</p>
|
||||
<div class="section" id="str-unicode-unification">
|
||||
<h3>str/unicode Unification<a class="headerlink" href="#str-unicode-unification" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Python 3’s <a class="reference internal" href="../library/stdtypes.html#str" title="str"><code class="xref py py-func docutils literal notranslate"><span class="pre">str()</span></code></a> type is equivalent to Python 2’s <code class="xref py py-func docutils literal notranslate"><span class="pre">unicode()</span></code>; the C
|
||||
functions are called <code class="docutils literal notranslate"><span class="pre">PyUnicode_*</span></code> for both. The old 8-bit string type has become
|
||||
<a class="reference internal" href="../library/stdtypes.html#bytes" title="bytes"><code class="xref py py-func docutils literal notranslate"><span class="pre">bytes()</span></code></a>, with C functions called <code class="docutils literal notranslate"><span class="pre">PyBytes_*</span></code>. Python 2.6 and later provide a compatibility header,
|
||||
<code class="file docutils literal notranslate"><span class="pre">bytesobject.h</span></code>, mapping <code class="docutils literal notranslate"><span class="pre">PyBytes</span></code> names to <code class="docutils literal notranslate"><span class="pre">PyString</span></code> ones. For best
|
||||
compatibility with Python 3, <code class="xref c c-type docutils literal notranslate"><span class="pre">PyUnicode</span></code> should be used for textual data and
|
||||
<code class="xref c c-type docutils literal notranslate"><span class="pre">PyBytes</span></code> for binary data. It’s also important to remember that
|
||||
<code class="xref c c-type docutils literal notranslate"><span class="pre">PyBytes</span></code> and <code class="xref c c-type docutils literal notranslate"><span class="pre">PyUnicode</span></code> in Python 3 are not interchangeable like
|
||||
<code class="xref c c-type docutils literal notranslate"><span class="pre">PyString</span></code> and <code class="xref c c-type docutils literal notranslate"><span class="pre">PyUnicode</span></code> are in Python 2. The following example
|
||||
shows best practices with regards to <code class="xref c c-type docutils literal notranslate"><span class="pre">PyUnicode</span></code>, <code class="xref c c-type docutils literal notranslate"><span class="pre">PyString</span></code>,
|
||||
and <code class="xref c c-type docutils literal notranslate"><span class="pre">PyBytes</span></code>.</p>
|
||||
<div class="highlight-c notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span> <span class="cpf">"stdlib.h"</span><span class="cp"></span>
|
||||
<span class="cp">#include</span> <span class="cpf">"Python.h"</span><span class="cp"></span>
|
||||
<span class="cp">#include</span> <span class="cpf">"bytesobject.h"</span><span class="cp"></span>
|
||||
|
||||
<span class="cm">/* text example */</span>
|
||||
<span class="k">static</span> <span class="n">PyObject</span> <span class="o">*</span>
|
||||
<span class="nf">say_hello</span><span class="p">(</span><span class="n">PyObject</span> <span class="o">*</span><span class="n">self</span><span class="p">,</span> <span class="n">PyObject</span> <span class="o">*</span><span class="n">args</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="n">PyObject</span> <span class="o">*</span><span class="n">name</span><span class="p">,</span> <span class="o">*</span><span class="n">result</span><span class="p">;</span>
|
||||
|
||||
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="n">PyArg_ParseTuple</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="s">"U:say_hello"</span><span class="p">,</span> <span class="o">&</span><span class="n">name</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="nb">NULL</span><span class="p">;</span>
|
||||
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="n">PyUnicode_FromFormat</span><span class="p">(</span><span class="s">"Hello, %S!"</span><span class="p">,</span> <span class="n">name</span><span class="p">);</span>
|
||||
<span class="k">return</span> <span class="n">result</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="cm">/* just a forward */</span>
|
||||
<span class="k">static</span> <span class="kt">char</span> <span class="o">*</span> <span class="nf">do_encode</span><span class="p">(</span><span class="n">PyObject</span> <span class="o">*</span><span class="p">);</span>
|
||||
|
||||
<span class="cm">/* bytes example */</span>
|
||||
<span class="k">static</span> <span class="n">PyObject</span> <span class="o">*</span>
|
||||
<span class="nf">encode_object</span><span class="p">(</span><span class="n">PyObject</span> <span class="o">*</span><span class="n">self</span><span class="p">,</span> <span class="n">PyObject</span> <span class="o">*</span><span class="n">args</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="kt">char</span> <span class="o">*</span><span class="n">encoded</span><span class="p">;</span>
|
||||
<span class="n">PyObject</span> <span class="o">*</span><span class="n">result</span><span class="p">,</span> <span class="o">*</span><span class="n">myobj</span><span class="p">;</span>
|
||||
|
||||
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="n">PyArg_ParseTuple</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="s">"O:encode_object"</span><span class="p">,</span> <span class="o">&</span><span class="n">myobj</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="nb">NULL</span><span class="p">;</span>
|
||||
|
||||
<span class="n">encoded</span> <span class="o">=</span> <span class="n">do_encode</span><span class="p">(</span><span class="n">myobj</span><span class="p">);</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">encoded</span> <span class="o">==</span> <span class="nb">NULL</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="nb">NULL</span><span class="p">;</span>
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="n">PyBytes_FromString</span><span class="p">(</span><span class="n">encoded</span><span class="p">);</span>
|
||||
<span class="n">free</span><span class="p">(</span><span class="n">encoded</span><span class="p">);</span>
|
||||
<span class="k">return</span> <span class="n">result</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="long-int-unification">
|
||||
<h3>long/int Unification<a class="headerlink" href="#long-int-unification" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Python 3 has only one integer type, <a class="reference internal" href="../library/functions.html#int" title="int"><code class="xref py py-func docutils literal notranslate"><span class="pre">int()</span></code></a>. But it actually
|
||||
corresponds to Python 2’s <code class="xref py py-func docutils literal notranslate"><span class="pre">long()</span></code> type—the <a class="reference internal" href="../library/functions.html#int" title="int"><code class="xref py py-func docutils literal notranslate"><span class="pre">int()</span></code></a> type
|
||||
used in Python 2 was removed. In the C-API, <code class="docutils literal notranslate"><span class="pre">PyInt_*</span></code> functions
|
||||
are replaced by their <code class="docutils literal notranslate"><span class="pre">PyLong_*</span></code> equivalents.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="module-initialization-and-state">
|
||||
<h2>Module initialization and state<a class="headerlink" href="#module-initialization-and-state" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Python 3 has a revamped extension module initialization system. (See
|
||||
<span class="target" id="index-0"></span><a class="pep reference external" href="https://www.python.org/dev/peps/pep-3121"><strong>PEP 3121</strong></a>.) Instead of storing module state in globals, they should
|
||||
be stored in an interpreter specific structure. Creating modules that
|
||||
act correctly in both Python 2 and Python 3 is tricky. The following
|
||||
simple example demonstrates how.</p>
|
||||
<div class="highlight-c notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span> <span class="cpf">"Python.h"</span><span class="cp"></span>
|
||||
|
||||
<span class="k">struct</span> <span class="n">module_state</span> <span class="p">{</span>
|
||||
<span class="n">PyObject</span> <span class="o">*</span><span class="n">error</span><span class="p">;</span>
|
||||
<span class="p">};</span>
|
||||
|
||||
<span class="cp">#if PY_MAJOR_VERSION >= 3</span>
|
||||
<span class="cp">#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))</span>
|
||||
<span class="cp">#else</span>
|
||||
<span class="cp">#define GETSTATE(m) (&_state)</span>
|
||||
<span class="k">static</span> <span class="k">struct</span> <span class="n">module_state</span> <span class="n">_state</span><span class="p">;</span>
|
||||
<span class="cp">#endif</span>
|
||||
|
||||
<span class="k">static</span> <span class="n">PyObject</span> <span class="o">*</span>
|
||||
<span class="nf">error_out</span><span class="p">(</span><span class="n">PyObject</span> <span class="o">*</span><span class="n">m</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="k">struct</span> <span class="n">module_state</span> <span class="o">*</span><span class="n">st</span> <span class="o">=</span> <span class="n">GETSTATE</span><span class="p">(</span><span class="n">m</span><span class="p">);</span>
|
||||
<span class="n">PyErr_SetString</span><span class="p">(</span><span class="n">st</span><span class="o">-></span><span class="n">error</span><span class="p">,</span> <span class="s">"something bad happened"</span><span class="p">);</span>
|
||||
<span class="k">return</span> <span class="nb">NULL</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">static</span> <span class="n">PyMethodDef</span> <span class="n">myextension_methods</span><span class="p">[]</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="p">{</span><span class="s">"error_out"</span><span class="p">,</span> <span class="p">(</span><span class="n">PyCFunction</span><span class="p">)</span><span class="n">error_out</span><span class="p">,</span> <span class="n">METH_NOARGS</span><span class="p">,</span> <span class="nb">NULL</span><span class="p">},</span>
|
||||
<span class="p">{</span><span class="nb">NULL</span><span class="p">,</span> <span class="nb">NULL</span><span class="p">}</span>
|
||||
<span class="p">};</span>
|
||||
|
||||
<span class="cp">#if PY_MAJOR_VERSION >= 3</span>
|
||||
|
||||
<span class="k">static</span> <span class="kt">int</span> <span class="nf">myextension_traverse</span><span class="p">(</span><span class="n">PyObject</span> <span class="o">*</span><span class="n">m</span><span class="p">,</span> <span class="n">visitproc</span> <span class="n">visit</span><span class="p">,</span> <span class="kt">void</span> <span class="o">*</span><span class="n">arg</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="n">Py_VISIT</span><span class="p">(</span><span class="n">GETSTATE</span><span class="p">(</span><span class="n">m</span><span class="p">)</span><span class="o">-></span><span class="n">error</span><span class="p">);</span>
|
||||
<span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">static</span> <span class="kt">int</span> <span class="nf">myextension_clear</span><span class="p">(</span><span class="n">PyObject</span> <span class="o">*</span><span class="n">m</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="n">Py_CLEAR</span><span class="p">(</span><span class="n">GETSTATE</span><span class="p">(</span><span class="n">m</span><span class="p">)</span><span class="o">-></span><span class="n">error</span><span class="p">);</span>
|
||||
<span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<span class="k">static</span> <span class="k">struct</span> <span class="n">PyModuleDef</span> <span class="n">moduledef</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="n">PyModuleDef_HEAD_INIT</span><span class="p">,</span>
|
||||
<span class="s">"myextension"</span><span class="p">,</span>
|
||||
<span class="nb">NULL</span><span class="p">,</span>
|
||||
<span class="k">sizeof</span><span class="p">(</span><span class="k">struct</span> <span class="n">module_state</span><span class="p">),</span>
|
||||
<span class="n">myextension_methods</span><span class="p">,</span>
|
||||
<span class="nb">NULL</span><span class="p">,</span>
|
||||
<span class="n">myextension_traverse</span><span class="p">,</span>
|
||||
<span class="n">myextension_clear</span><span class="p">,</span>
|
||||
<span class="nb">NULL</span>
|
||||
<span class="p">};</span>
|
||||
|
||||
<span class="cp">#define INITERROR return NULL</span>
|
||||
|
||||
<span class="n">PyMODINIT_FUNC</span>
|
||||
<span class="nf">PyInit_myextension</span><span class="p">(</span><span class="kt">void</span><span class="p">)</span>
|
||||
|
||||
<span class="cp">#else</span>
|
||||
<span class="cp">#define INITERROR return</span>
|
||||
|
||||
<span class="kt">void</span>
|
||||
<span class="n">initmyextension</span><span class="p">(</span><span class="kt">void</span><span class="p">)</span>
|
||||
<span class="cp">#endif</span>
|
||||
<span class="p">{</span>
|
||||
<span class="cp">#if PY_MAJOR_VERSION >= 3</span>
|
||||
<span class="n">PyObject</span> <span class="o">*</span><span class="n">module</span> <span class="o">=</span> <span class="n">PyModule_Create</span><span class="p">(</span><span class="o">&</span><span class="n">moduledef</span><span class="p">);</span>
|
||||
<span class="cp">#else</span>
|
||||
<span class="n">PyObject</span> <span class="o">*</span><span class="n">module</span> <span class="o">=</span> <span class="n">Py_InitModule</span><span class="p">(</span><span class="s">"myextension"</span><span class="p">,</span> <span class="n">myextension_methods</span><span class="p">);</span>
|
||||
<span class="cp">#endif</span>
|
||||
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">module</span> <span class="o">==</span> <span class="nb">NULL</span><span class="p">)</span>
|
||||
<span class="n">INITERROR</span><span class="p">;</span>
|
||||
<span class="k">struct</span> <span class="n">module_state</span> <span class="o">*</span><span class="n">st</span> <span class="o">=</span> <span class="n">GETSTATE</span><span class="p">(</span><span class="n">module</span><span class="p">);</span>
|
||||
|
||||
<span class="n">st</span><span class="o">-></span><span class="n">error</span> <span class="o">=</span> <span class="n">PyErr_NewException</span><span class="p">(</span><span class="s">"myextension.Error"</span><span class="p">,</span> <span class="nb">NULL</span><span class="p">,</span> <span class="nb">NULL</span><span class="p">);</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">st</span><span class="o">-></span><span class="n">error</span> <span class="o">==</span> <span class="nb">NULL</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="n">Py_DECREF</span><span class="p">(</span><span class="n">module</span><span class="p">);</span>
|
||||
<span class="n">INITERROR</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="cp">#if PY_MAJOR_VERSION >= 3</span>
|
||||
<span class="k">return</span> <span class="n">module</span><span class="p">;</span>
|
||||
<span class="cp">#endif</span>
|
||||
<span class="p">}</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="cobject-replaced-with-capsule">
|
||||
<h2>CObject replaced with Capsule<a class="headerlink" href="#cobject-replaced-with-capsule" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The <code class="xref c c-type docutils literal notranslate"><span class="pre">Capsule</span></code> object was introduced in Python 3.1 and 2.7 to replace
|
||||
<code class="xref c c-type docutils literal notranslate"><span class="pre">CObject</span></code>. CObjects were useful,
|
||||
but the <code class="xref c c-type docutils literal notranslate"><span class="pre">CObject</span></code> API was problematic: it didn’t permit distinguishing
|
||||
between valid CObjects, which allowed mismatched CObjects to crash the
|
||||
interpreter, and some of its APIs relied on undefined behavior in C.
|
||||
(For further reading on the rationale behind Capsules, please see <a class="reference external" href="https://bugs.python.org/issue5630">bpo-5630</a>.)</p>
|
||||
<p>If you’re currently using CObjects, and you want to migrate to 3.1 or newer,
|
||||
you’ll need to switch to Capsules.
|
||||
<code class="xref c c-type docutils literal notranslate"><span class="pre">CObject</span></code> was deprecated in 3.1 and 2.7 and completely removed in
|
||||
Python 3.2. If you only support 2.7, or 3.1 and above, you
|
||||
can simply switch to <code class="xref c c-type docutils literal notranslate"><span class="pre">Capsule</span></code>. If you need to support Python 3.0,
|
||||
or versions of Python earlier than 2.7,
|
||||
you’ll have to support both CObjects and Capsules.
|
||||
(Note that Python 3.0 is no longer supported, and it is not recommended
|
||||
for production use.)</p>
|
||||
<p>The following example header file <code class="file docutils literal notranslate"><span class="pre">capsulethunk.h</span></code> may
|
||||
solve the problem for you. Simply write your code against the
|
||||
<code class="xref c c-type docutils literal notranslate"><span class="pre">Capsule</span></code> API and include this header file after
|
||||
<code class="file docutils literal notranslate"><span class="pre">Python.h</span></code>. Your code will automatically use Capsules
|
||||
in versions of Python with Capsules, and switch to CObjects
|
||||
when Capsules are unavailable.</p>
|
||||
<p><code class="file docutils literal notranslate"><span class="pre">capsulethunk.h</span></code> simulates Capsules using CObjects. However,
|
||||
<code class="xref c c-type docutils literal notranslate"><span class="pre">CObject</span></code> provides no place to store the capsule’s “name”. As a
|
||||
result the simulated <code class="xref c c-type docutils literal notranslate"><span class="pre">Capsule</span></code> objects created by <code class="file docutils literal notranslate"><span class="pre">capsulethunk.h</span></code>
|
||||
behave slightly differently from real Capsules. Specifically:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p>The name parameter passed in to <a class="reference internal" href="../c-api/capsule.html#c.PyCapsule_New" title="PyCapsule_New"><code class="xref c c-func docutils literal notranslate"><span class="pre">PyCapsule_New()</span></code></a> is ignored.</p></li>
|
||||
<li><p>The name parameter passed in to <a class="reference internal" href="../c-api/capsule.html#c.PyCapsule_IsValid" title="PyCapsule_IsValid"><code class="xref c c-func docutils literal notranslate"><span class="pre">PyCapsule_IsValid()</span></code></a> and
|
||||
<a class="reference internal" href="../c-api/capsule.html#c.PyCapsule_GetPointer" title="PyCapsule_GetPointer"><code class="xref c c-func docutils literal notranslate"><span class="pre">PyCapsule_GetPointer()</span></code></a> is ignored, and no error checking
|
||||
of the name is performed.</p></li>
|
||||
<li><p><a class="reference internal" href="../c-api/capsule.html#c.PyCapsule_GetName" title="PyCapsule_GetName"><code class="xref c c-func docutils literal notranslate"><span class="pre">PyCapsule_GetName()</span></code></a> always returns NULL.</p></li>
|
||||
<li><p><a class="reference internal" href="../c-api/capsule.html#c.PyCapsule_SetName" title="PyCapsule_SetName"><code class="xref c c-func docutils literal notranslate"><span class="pre">PyCapsule_SetName()</span></code></a> always raises an exception and
|
||||
returns failure. (Since there’s no way to store a name
|
||||
in a CObject, noisy failure of <a class="reference internal" href="../c-api/capsule.html#c.PyCapsule_SetName" title="PyCapsule_SetName"><code class="xref c c-func docutils literal notranslate"><span class="pre">PyCapsule_SetName()</span></code></a>
|
||||
was deemed preferable to silent failure here. If this is
|
||||
inconvenient, feel free to modify your local
|
||||
copy as you see fit.)</p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>You can find <code class="file docutils literal notranslate"><span class="pre">capsulethunk.h</span></code> in the Python source distribution
|
||||
as <a class="reference external" href="https://github.com/python/cpython/tree/3.7/Doc/includes/capsulethunk.h">Doc/includes/capsulethunk.h</a>. We also include it here for
|
||||
your convenience:</p>
|
||||
<div class="highlight-c notranslate"><div class="highlight"><pre><span></span><span class="cp">#ifndef __CAPSULETHUNK_H</span>
|
||||
<span class="cp">#define __CAPSULETHUNK_H</span>
|
||||
|
||||
<span class="cp">#if ( (PY_VERSION_HEX < 0x02070000) \</span>
|
||||
<span class="cp"> || ((PY_VERSION_HEX >= 0x03000000) \</span>
|
||||
<span class="cp"> && (PY_VERSION_HEX < 0x03010000)) )</span>
|
||||
|
||||
<span class="cp">#define __PyCapsule_GetField(capsule, field, default_value) \</span>
|
||||
<span class="cp"> ( PyCapsule_CheckExact(capsule) \</span>
|
||||
<span class="cp"> ? (((PyCObject *)capsule)->field) \</span>
|
||||
<span class="cp"> : (default_value) \</span>
|
||||
<span class="cp"> ) \</span>
|
||||
|
||||
<span class="cp">#define __PyCapsule_SetField(capsule, field, value) \</span>
|
||||
<span class="cp"> ( PyCapsule_CheckExact(capsule) \</span>
|
||||
<span class="cp"> ? (((PyCObject *)capsule)->field = value), 1 \</span>
|
||||
<span class="cp"> : 0 \</span>
|
||||
<span class="cp"> ) \</span>
|
||||
|
||||
|
||||
<span class="cp">#define PyCapsule_Type PyCObject_Type</span>
|
||||
|
||||
<span class="cp">#define PyCapsule_CheckExact(capsule) (PyCObject_Check(capsule))</span>
|
||||
<span class="cp">#define PyCapsule_IsValid(capsule, name) (PyCObject_Check(capsule))</span>
|
||||
|
||||
|
||||
<span class="cp">#define PyCapsule_New(pointer, name, destructor) \</span>
|
||||
<span class="cp"> (PyCObject_FromVoidPtr(pointer, destructor))</span>
|
||||
|
||||
|
||||
<span class="cp">#define PyCapsule_GetPointer(capsule, name) \</span>
|
||||
<span class="cp"> (PyCObject_AsVoidPtr(capsule))</span>
|
||||
|
||||
<span class="cm">/* Don't call PyCObject_SetPointer here, it fails if there's a destructor */</span>
|
||||
<span class="cp">#define PyCapsule_SetPointer(capsule, pointer) \</span>
|
||||
<span class="cp"> __PyCapsule_SetField(capsule, cobject, pointer)</span>
|
||||
|
||||
|
||||
<span class="cp">#define PyCapsule_GetDestructor(capsule) \</span>
|
||||
<span class="cp"> __PyCapsule_GetField(capsule, destructor)</span>
|
||||
|
||||
<span class="cp">#define PyCapsule_SetDestructor(capsule, dtor) \</span>
|
||||
<span class="cp"> __PyCapsule_SetField(capsule, destructor, dtor)</span>
|
||||
|
||||
|
||||
<span class="cm">/*</span>
|
||||
<span class="cm"> * Sorry, there's simply no place</span>
|
||||
<span class="cm"> * to store a Capsule "name" in a CObject.</span>
|
||||
<span class="cm"> */</span>
|
||||
<span class="cp">#define PyCapsule_GetName(capsule) NULL</span>
|
||||
|
||||
<span class="k">static</span> <span class="kt">int</span>
|
||||
<span class="nf">PyCapsule_SetName</span><span class="p">(</span><span class="n">PyObject</span> <span class="o">*</span><span class="n">capsule</span><span class="p">,</span> <span class="k">const</span> <span class="kt">char</span> <span class="o">*</span><span class="n">unused</span><span class="p">)</span>
|
||||
<span class="p">{</span>
|
||||
<span class="n">unused</span> <span class="o">=</span> <span class="n">unused</span><span class="p">;</span>
|
||||
<span class="n">PyErr_SetString</span><span class="p">(</span><span class="n">PyExc_NotImplementedError</span><span class="p">,</span>
|
||||
<span class="s">"can't use PyCapsule_SetName with CObjects"</span><span class="p">);</span>
|
||||
<span class="k">return</span> <span class="mi">1</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
|
||||
<span class="cp">#define PyCapsule_GetContext(capsule) \</span>
|
||||
<span class="cp"> __PyCapsule_GetField(capsule, descr)</span>
|
||||
|
||||
<span class="cp">#define PyCapsule_SetContext(capsule, context) \</span>
|
||||
<span class="cp"> __PyCapsule_SetField(capsule, descr, context)</span>
|
||||
|
||||
|
||||
<span class="k">static</span> <span class="kt">void</span> <span class="o">*</span>
|
||||
<span class="nf">PyCapsule_Import</span><span class="p">(</span><span class="k">const</span> <span class="kt">char</span> <span class="o">*</span><span class="n">name</span><span class="p">,</span> <span class="kt">int</span> <span class="n">no_block</span><span class="p">)</span>
|
||||
<span class="p">{</span>
|
||||
<span class="n">PyObject</span> <span class="o">*</span><span class="n">object</span> <span class="o">=</span> <span class="nb">NULL</span><span class="p">;</span>
|
||||
<span class="kt">void</span> <span class="o">*</span><span class="n">return_value</span> <span class="o">=</span> <span class="nb">NULL</span><span class="p">;</span>
|
||||
<span class="kt">char</span> <span class="o">*</span><span class="n">trace</span><span class="p">;</span>
|
||||
<span class="kt">size_t</span> <span class="n">name_length</span> <span class="o">=</span> <span class="p">(</span><span class="n">strlen</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="k">sizeof</span><span class="p">(</span><span class="kt">char</span><span class="p">);</span>
|
||||
<span class="kt">char</span> <span class="o">*</span><span class="n">name_dup</span> <span class="o">=</span> <span class="p">(</span><span class="kt">char</span> <span class="o">*</span><span class="p">)</span><span class="n">PyMem_MALLOC</span><span class="p">(</span><span class="n">name_length</span><span class="p">);</span>
|
||||
|
||||
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="n">name_dup</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="k">return</span> <span class="nb">NULL</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">memcpy</span><span class="p">(</span><span class="n">name_dup</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">name_length</span><span class="p">);</span>
|
||||
|
||||
<span class="n">trace</span> <span class="o">=</span> <span class="n">name_dup</span><span class="p">;</span>
|
||||
<span class="k">while</span> <span class="p">(</span><span class="n">trace</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="kt">char</span> <span class="o">*</span><span class="n">dot</span> <span class="o">=</span> <span class="n">strchr</span><span class="p">(</span><span class="n">trace</span><span class="p">,</span> <span class="sc">'.'</span><span class="p">);</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">dot</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="o">*</span><span class="n">dot</span><span class="o">++</span> <span class="o">=</span> <span class="sc">'\0'</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">object</span> <span class="o">==</span> <span class="nb">NULL</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">no_block</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="n">object</span> <span class="o">=</span> <span class="n">PyImport_ImportModuleNoBlock</span><span class="p">(</span><span class="n">trace</span><span class="p">);</span>
|
||||
<span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
|
||||
<span class="n">object</span> <span class="o">=</span> <span class="n">PyImport_ImportModule</span><span class="p">(</span><span class="n">trace</span><span class="p">);</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="n">object</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="n">PyErr_Format</span><span class="p">(</span><span class="n">PyExc_ImportError</span><span class="p">,</span>
|
||||
<span class="s">"PyCapsule_Import could not "</span>
|
||||
<span class="s">"import module </span><span class="se">\"</span><span class="s">%s</span><span class="se">\"</span><span class="s">"</span><span class="p">,</span> <span class="n">trace</span><span class="p">);</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
|
||||
<span class="n">PyObject</span> <span class="o">*</span><span class="n">object2</span> <span class="o">=</span> <span class="n">PyObject_GetAttrString</span><span class="p">(</span><span class="n">object</span><span class="p">,</span> <span class="n">trace</span><span class="p">);</span>
|
||||
<span class="n">Py_DECREF</span><span class="p">(</span><span class="n">object</span><span class="p">);</span>
|
||||
<span class="n">object</span> <span class="o">=</span> <span class="n">object2</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="n">object</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="k">goto</span> <span class="n">EXIT</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">trace</span> <span class="o">=</span> <span class="n">dot</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">PyCObject_Check</span><span class="p">(</span><span class="n">object</span><span class="p">))</span> <span class="p">{</span>
|
||||
<span class="n">PyCObject</span> <span class="o">*</span><span class="n">cobject</span> <span class="o">=</span> <span class="p">(</span><span class="n">PyCObject</span> <span class="o">*</span><span class="p">)</span><span class="n">object</span><span class="p">;</span>
|
||||
<span class="n">return_value</span> <span class="o">=</span> <span class="n">cobject</span><span class="o">-></span><span class="n">cobject</span><span class="p">;</span>
|
||||
<span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
|
||||
<span class="n">PyErr_Format</span><span class="p">(</span><span class="n">PyExc_AttributeError</span><span class="p">,</span>
|
||||
<span class="s">"PyCapsule_Import </span><span class="se">\"</span><span class="s">%s</span><span class="se">\"</span><span class="s"> is not valid"</span><span class="p">,</span>
|
||||
<span class="n">name</span><span class="p">);</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="nl">EXIT</span><span class="p">:</span>
|
||||
<span class="n">Py_XDECREF</span><span class="p">(</span><span class="n">object</span><span class="p">);</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">name_dup</span><span class="p">)</span> <span class="p">{</span>
|
||||
<span class="n">PyMem_FREE</span><span class="p">(</span><span class="n">name_dup</span><span class="p">);</span>
|
||||
<span class="p">}</span>
|
||||
<span class="k">return</span> <span class="n">return_value</span><span class="p">;</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="cp">#endif </span><span class="cm">/* #if PY_VERSION_HEX < 0x02070000 */</span><span class="cp"></span>
|
||||
|
||||
<span class="cp">#endif </span><span class="cm">/* __CAPSULETHUNK_H */</span><span class="cp"></span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="other-options">
|
||||
<h2>Other options<a class="headerlink" href="#other-options" title="Permalink to this headline">¶</a></h2>
|
||||
<p>If you are writing a new extension module, you might consider <a class="reference external" href="http://cython.org/">Cython</a>. It translates a Python-like language to C. The
|
||||
extension modules it creates are compatible with Python 3 and Python 2.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">Porting Extension Modules to Python 3</a><ul>
|
||||
<li><a class="reference internal" href="#conditional-compilation">Conditional compilation</a></li>
|
||||
<li><a class="reference internal" href="#changes-to-object-apis">Changes to Object APIs</a><ul>
|
||||
<li><a class="reference internal" href="#str-unicode-unification">str/unicode Unification</a></li>
|
||||
<li><a class="reference internal" href="#long-int-unification">long/int Unification</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#module-initialization-and-state">Module initialization and state</a></li>
|
||||
<li><a class="reference internal" href="#cobject-replaced-with-capsule">CObject replaced with Capsule</a></li>
|
||||
<li><a class="reference internal" href="#other-options">Other options</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="pyporting.html"
|
||||
title="previous chapter">Porting Python 2 Code to Python 3</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="curses.html"
|
||||
title="next chapter">Curses Programming with Python</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/cporting.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="curses.html" title="Curses Programming with Python"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="pyporting.html" title="Porting Python 2 Code to Python 3"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
729
python-3.7.4-docs-html/howto/curses.html
Normal file
729
python-3.7.4-docs-html/howto/curses.html
Normal file
@@ -0,0 +1,729 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Curses Programming with Python — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Descriptor HowTo Guide" href="descriptor.html" />
|
||||
<link rel="prev" title="Porting Extension Modules to Python 3" href="cporting.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/curses.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="descriptor.html" title="Descriptor HowTo Guide"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="cporting.html" title="Porting Extension Modules to Python 3"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="curses-programming-with-python">
|
||||
<span id="curses-howto"></span><h1>Curses Programming with Python<a class="headerlink" href="#curses-programming-with-python" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Author</dt>
|
||||
<dd class="field-odd"><p>A.M. Kuchling, Eric S. Raymond</p>
|
||||
</dd>
|
||||
<dt class="field-even">Release</dt>
|
||||
<dd class="field-even"><p>2.04</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<div class="topic">
|
||||
<p class="topic-title first">Abstract</p>
|
||||
<p>This document describes how to use the <a class="reference internal" href="../library/curses.html#module-curses" title="curses: An interface to the curses library, providing portable terminal handling. (Unix)"><code class="xref py py-mod docutils literal notranslate"><span class="pre">curses</span></code></a> extension
|
||||
module to control text-mode displays.</p>
|
||||
</div>
|
||||
<div class="section" id="what-is-curses">
|
||||
<h2>What is curses?<a class="headerlink" href="#what-is-curses" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The curses library supplies a terminal-independent screen-painting and
|
||||
keyboard-handling facility for text-based terminals; such terminals
|
||||
include VT100s, the Linux console, and the simulated terminal provided
|
||||
by various programs. Display terminals support various control codes
|
||||
to perform common operations such as moving the cursor, scrolling the
|
||||
screen, and erasing areas. Different terminals use widely differing
|
||||
codes, and often have their own minor quirks.</p>
|
||||
<p>In a world of graphical displays, one might ask “why bother”? It’s
|
||||
true that character-cell display terminals are an obsolete technology,
|
||||
but there are niches in which being able to do fancy things with them
|
||||
are still valuable. One niche is on small-footprint or embedded
|
||||
Unixes that don’t run an X server. Another is tools such as OS
|
||||
installers and kernel configurators that may have to run before any
|
||||
graphical support is available.</p>
|
||||
<p>The curses library provides fairly basic functionality, providing the
|
||||
programmer with an abstraction of a display containing multiple
|
||||
non-overlapping windows of text. The contents of a window can be
|
||||
changed in various ways—adding text, erasing it, changing its
|
||||
appearance—and the curses library will figure out what control codes
|
||||
need to be sent to the terminal to produce the right output. curses
|
||||
doesn’t provide many user-interface concepts such as buttons, checkboxes,
|
||||
or dialogs; if you need such features, consider a user interface library such as
|
||||
<a class="reference external" href="https://pypi.org/project/urwid/">Urwid</a>.</p>
|
||||
<p>The curses library was originally written for BSD Unix; the later System V
|
||||
versions of Unix from AT&T added many enhancements and new functions. BSD curses
|
||||
is no longer maintained, having been replaced by ncurses, which is an
|
||||
open-source implementation of the AT&T interface. If you’re using an
|
||||
open-source Unix such as Linux or FreeBSD, your system almost certainly uses
|
||||
ncurses. Since most current commercial Unix versions are based on System V
|
||||
code, all the functions described here will probably be available. The older
|
||||
versions of curses carried by some proprietary Unixes may not support
|
||||
everything, though.</p>
|
||||
<p>The Windows version of Python doesn’t include the <a class="reference internal" href="../library/curses.html#module-curses" title="curses: An interface to the curses library, providing portable terminal handling. (Unix)"><code class="xref py py-mod docutils literal notranslate"><span class="pre">curses</span></code></a>
|
||||
module. A ported version called <a class="reference external" href="https://pypi.org/project/UniCurses">UniCurses</a> is available. You could
|
||||
also try <a class="reference external" href="http://effbot.org/zone/console-index.htm">the Console module</a>
|
||||
written by Fredrik Lundh, which doesn’t
|
||||
use the same API as curses but provides cursor-addressable text output
|
||||
and full support for mouse and keyboard input.</p>
|
||||
<div class="section" id="the-python-curses-module">
|
||||
<h3>The Python curses module<a class="headerlink" href="#the-python-curses-module" title="Permalink to this headline">¶</a></h3>
|
||||
<p>The Python module is a fairly simple wrapper over the C functions provided by
|
||||
curses; if you’re already familiar with curses programming in C, it’s really
|
||||
easy to transfer that knowledge to Python. The biggest difference is that the
|
||||
Python interface makes things simpler by merging different C functions such as
|
||||
<code class="xref c c-func docutils literal notranslate"><span class="pre">addstr()</span></code>, <code class="xref c c-func docutils literal notranslate"><span class="pre">mvaddstr()</span></code>, and <code class="xref c c-func docutils literal notranslate"><span class="pre">mvwaddstr()</span></code> into a single
|
||||
<a class="reference internal" href="../library/curses.html#curses.window.addstr" title="curses.window.addstr"><code class="xref py py-meth docutils literal notranslate"><span class="pre">addstr()</span></code></a> method. You’ll see this covered in more
|
||||
detail later.</p>
|
||||
<p>This HOWTO is an introduction to writing text-mode programs with curses
|
||||
and Python. It doesn’t attempt to be a complete guide to the curses API; for
|
||||
that, see the Python library guide’s section on ncurses, and the C manual pages
|
||||
for ncurses. It will, however, give you the basic ideas.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="starting-and-ending-a-curses-application">
|
||||
<h2>Starting and ending a curses application<a class="headerlink" href="#starting-and-ending-a-curses-application" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Before doing anything, curses must be initialized. This is done by
|
||||
calling the <a class="reference internal" href="../library/curses.html#curses.initscr" title="curses.initscr"><code class="xref py py-func docutils literal notranslate"><span class="pre">initscr()</span></code></a> function, which will determine the
|
||||
terminal type, send any required setup codes to the terminal, and
|
||||
create various internal data structures. If successful,
|
||||
<code class="xref py py-func docutils literal notranslate"><span class="pre">initscr()</span></code> returns a window object representing the entire
|
||||
screen; this is usually called <code class="docutils literal notranslate"><span class="pre">stdscr</span></code> after the name of the
|
||||
corresponding C variable.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">curses</span>
|
||||
<span class="n">stdscr</span> <span class="o">=</span> <span class="n">curses</span><span class="o">.</span><span class="n">initscr</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Usually curses applications turn off automatic echoing of keys to the
|
||||
screen, in order to be able to read keys and only display them under
|
||||
certain circumstances. This requires calling the
|
||||
<a class="reference internal" href="../library/curses.html#curses.noecho" title="curses.noecho"><code class="xref py py-func docutils literal notranslate"><span class="pre">noecho()</span></code></a> function.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">curses</span><span class="o">.</span><span class="n">noecho</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Applications will also commonly need to react to keys instantly,
|
||||
without requiring the Enter key to be pressed; this is called cbreak
|
||||
mode, as opposed to the usual buffered input mode.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">curses</span><span class="o">.</span><span class="n">cbreak</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Terminals usually return special keys, such as the cursor keys or navigation
|
||||
keys such as Page Up and Home, as a multibyte escape sequence. While you could
|
||||
write your application to expect such sequences and process them accordingly,
|
||||
curses can do it for you, returning a special value such as
|
||||
<code class="xref py py-const docutils literal notranslate"><span class="pre">curses.KEY_LEFT</span></code>. To get curses to do the job, you’ll have to enable
|
||||
keypad mode.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">stdscr</span><span class="o">.</span><span class="n">keypad</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Terminating a curses application is much easier than starting one. You’ll need
|
||||
to call:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">curses</span><span class="o">.</span><span class="n">nocbreak</span><span class="p">()</span>
|
||||
<span class="n">stdscr</span><span class="o">.</span><span class="n">keypad</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="n">curses</span><span class="o">.</span><span class="n">echo</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>to reverse the curses-friendly terminal settings. Then call the
|
||||
<a class="reference internal" href="../library/curses.html#curses.endwin" title="curses.endwin"><code class="xref py py-func docutils literal notranslate"><span class="pre">endwin()</span></code></a> function to restore the terminal to its original
|
||||
operating mode.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">curses</span><span class="o">.</span><span class="n">endwin</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>A common problem when debugging a curses application is to get your terminal
|
||||
messed up when the application dies without restoring the terminal to its
|
||||
previous state. In Python this commonly happens when your code is buggy and
|
||||
raises an uncaught exception. Keys are no longer echoed to the screen when
|
||||
you type them, for example, which makes using the shell difficult.</p>
|
||||
<p>In Python you can avoid these complications and make debugging much easier by
|
||||
importing the <a class="reference internal" href="../library/curses.html#curses.wrapper" title="curses.wrapper"><code class="xref py py-func docutils literal notranslate"><span class="pre">curses.wrapper()</span></code></a> function and using it like this:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">curses</span> <span class="k">import</span> <span class="n">wrapper</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">stdscr</span><span class="p">):</span>
|
||||
<span class="c1"># Clear screen</span>
|
||||
<span class="n">stdscr</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># This raises ZeroDivisionError when i == 10.</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">11</span><span class="p">):</span>
|
||||
<span class="n">v</span> <span class="o">=</span> <span class="n">i</span><span class="o">-</span><span class="mi">10</span>
|
||||
<span class="n">stdscr</span><span class="o">.</span><span class="n">addstr</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">'10 divided by </span><span class="si">{}</span><span class="s1"> is </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="mi">10</span><span class="o">/</span><span class="n">v</span><span class="p">))</span>
|
||||
|
||||
<span class="n">stdscr</span><span class="o">.</span><span class="n">refresh</span><span class="p">()</span>
|
||||
<span class="n">stdscr</span><span class="o">.</span><span class="n">getkey</span><span class="p">()</span>
|
||||
|
||||
<span class="n">wrapper</span><span class="p">(</span><span class="n">main</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The <a class="reference internal" href="../library/curses.html#curses.wrapper" title="curses.wrapper"><code class="xref py py-func docutils literal notranslate"><span class="pre">wrapper()</span></code></a> function takes a callable object and does the
|
||||
initializations described above, also initializing colors if color
|
||||
support is present. <code class="xref py py-func docutils literal notranslate"><span class="pre">wrapper()</span></code> then runs your provided callable.
|
||||
Once the callable returns, <code class="xref py py-func docutils literal notranslate"><span class="pre">wrapper()</span></code> will restore the original
|
||||
state of the terminal. The callable is called inside a
|
||||
<a class="reference internal" href="../reference/compound_stmts.html#try"><code class="xref std std-keyword docutils literal notranslate"><span class="pre">try</span></code></a>…<a class="reference internal" href="../reference/compound_stmts.html#except"><code class="xref std std-keyword docutils literal notranslate"><span class="pre">except</span></code></a> that catches exceptions, restores
|
||||
the state of the terminal, and then re-raises the exception. Therefore
|
||||
your terminal won’t be left in a funny state on exception and you’ll be
|
||||
able to read the exception’s message and traceback.</p>
|
||||
</div>
|
||||
<div class="section" id="windows-and-pads">
|
||||
<h2>Windows and Pads<a class="headerlink" href="#windows-and-pads" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Windows are the basic abstraction in curses. A window object represents a
|
||||
rectangular area of the screen, and supports methods to display text,
|
||||
erase it, allow the user to input strings, and so forth.</p>
|
||||
<p>The <code class="docutils literal notranslate"><span class="pre">stdscr</span></code> object returned by the <a class="reference internal" href="../library/curses.html#curses.initscr" title="curses.initscr"><code class="xref py py-func docutils literal notranslate"><span class="pre">initscr()</span></code></a> function is a
|
||||
window object that covers the entire screen. Many programs may need
|
||||
only this single window, but you might wish to divide the screen into
|
||||
smaller windows, in order to redraw or clear them separately. The
|
||||
<a class="reference internal" href="../library/curses.html#curses.newwin" title="curses.newwin"><code class="xref py py-func docutils literal notranslate"><span class="pre">newwin()</span></code></a> function creates a new window of a given size,
|
||||
returning the new window object.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">begin_x</span> <span class="o">=</span> <span class="mi">20</span><span class="p">;</span> <span class="n">begin_y</span> <span class="o">=</span> <span class="mi">7</span>
|
||||
<span class="n">height</span> <span class="o">=</span> <span class="mi">5</span><span class="p">;</span> <span class="n">width</span> <span class="o">=</span> <span class="mi">40</span>
|
||||
<span class="n">win</span> <span class="o">=</span> <span class="n">curses</span><span class="o">.</span><span class="n">newwin</span><span class="p">(</span><span class="n">height</span><span class="p">,</span> <span class="n">width</span><span class="p">,</span> <span class="n">begin_y</span><span class="p">,</span> <span class="n">begin_x</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Note that the coordinate system used in curses is unusual.
|
||||
Coordinates are always passed in the order <em>y,x</em>, and the top-left
|
||||
corner of a window is coordinate (0,0). This breaks the normal
|
||||
convention for handling coordinates where the <em>x</em> coordinate comes
|
||||
first. This is an unfortunate difference from most other computer
|
||||
applications, but it’s been part of curses since it was first written,
|
||||
and it’s too late to change things now.</p>
|
||||
<p>Your application can determine the size of the screen by using the
|
||||
<code class="xref py py-data docutils literal notranslate"><span class="pre">curses.LINES</span></code> and <code class="xref py py-data docutils literal notranslate"><span class="pre">curses.COLS</span></code> variables to obtain the <em>y</em> and
|
||||
<em>x</em> sizes. Legal coordinates will then extend from <code class="docutils literal notranslate"><span class="pre">(0,0)</span></code> to
|
||||
<code class="docutils literal notranslate"><span class="pre">(curses.LINES</span> <span class="pre">-</span> <span class="pre">1,</span> <span class="pre">curses.COLS</span> <span class="pre">-</span> <span class="pre">1)</span></code>.</p>
|
||||
<p>When you call a method to display or erase text, the effect doesn’t
|
||||
immediately show up on the display. Instead you must call the
|
||||
<a class="reference internal" href="../library/curses.html#curses.window.refresh" title="curses.window.refresh"><code class="xref py py-meth docutils literal notranslate"><span class="pre">refresh()</span></code></a> method of window objects to update the
|
||||
screen.</p>
|
||||
<p>This is because curses was originally written with slow 300-baud
|
||||
terminal connections in mind; with these terminals, minimizing the
|
||||
time required to redraw the screen was very important. Instead curses
|
||||
accumulates changes to the screen and displays them in the most
|
||||
efficient manner when you call <code class="xref py py-meth docutils literal notranslate"><span class="pre">refresh()</span></code>. For example, if your
|
||||
program displays some text in a window and then clears the window,
|
||||
there’s no need to send the original text because they’re never
|
||||
visible.</p>
|
||||
<p>In practice, explicitly telling curses to redraw a window doesn’t
|
||||
really complicate programming with curses much. Most programs go into a flurry
|
||||
of activity, and then pause waiting for a keypress or some other action on the
|
||||
part of the user. All you have to do is to be sure that the screen has been
|
||||
redrawn before pausing to wait for user input, by first calling
|
||||
<code class="docutils literal notranslate"><span class="pre">stdscr.refresh()</span></code> or the <code class="xref py py-meth docutils literal notranslate"><span class="pre">refresh()</span></code> method of some other relevant
|
||||
window.</p>
|
||||
<p>A pad is a special case of a window; it can be larger than the actual display
|
||||
screen, and only a portion of the pad displayed at a time. Creating a pad
|
||||
requires the pad’s height and width, while refreshing a pad requires giving the
|
||||
coordinates of the on-screen area where a subsection of the pad will be
|
||||
displayed.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">pad</span> <span class="o">=</span> <span class="n">curses</span><span class="o">.</span><span class="n">newpad</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
|
||||
<span class="c1"># These loops fill the pad with letters; addch() is</span>
|
||||
<span class="c1"># explained in the next section</span>
|
||||
<span class="k">for</span> <span class="n">y</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">99</span><span class="p">):</span>
|
||||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">99</span><span class="p">):</span>
|
||||
<span class="n">pad</span><span class="o">.</span><span class="n">addch</span><span class="p">(</span><span class="n">y</span><span class="p">,</span><span class="n">x</span><span class="p">,</span> <span class="nb">ord</span><span class="p">(</span><span class="s1">'a'</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="n">x</span><span class="o">*</span><span class="n">x</span><span class="o">+</span><span class="n">y</span><span class="o">*</span><span class="n">y</span><span class="p">)</span> <span class="o">%</span> <span class="mi">26</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Displays a section of the pad in the middle of the screen.</span>
|
||||
<span class="c1"># (0,0) : coordinate of upper-left corner of pad area to display.</span>
|
||||
<span class="c1"># (5,5) : coordinate of upper-left corner of window area to be filled</span>
|
||||
<span class="c1"># with pad content.</span>
|
||||
<span class="c1"># (20, 75) : coordinate of lower-right corner of window area to be</span>
|
||||
<span class="c1"># : filled with pad content.</span>
|
||||
<span class="n">pad</span><span class="o">.</span><span class="n">refresh</span><span class="p">(</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span><span class="mi">75</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The <code class="xref py py-meth docutils literal notranslate"><span class="pre">refresh()</span></code> call displays a section of the pad in the rectangle
|
||||
extending from coordinate (5,5) to coordinate (20,75) on the screen; the upper
|
||||
left corner of the displayed section is coordinate (0,0) on the pad. Beyond
|
||||
that difference, pads are exactly like ordinary windows and support the same
|
||||
methods.</p>
|
||||
<p>If you have multiple windows and pads on screen there is a more
|
||||
efficient way to update the screen and prevent annoying screen flicker
|
||||
as each part of the screen gets updated. <code class="xref py py-meth docutils literal notranslate"><span class="pre">refresh()</span></code> actually
|
||||
does two things:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p>Calls the <a class="reference internal" href="../library/curses.html#curses.window.noutrefresh" title="curses.window.noutrefresh"><code class="xref py py-meth docutils literal notranslate"><span class="pre">noutrefresh()</span></code></a> method of each window
|
||||
to update an underlying data structure representing the desired
|
||||
state of the screen.</p></li>
|
||||
<li><p>Calls the function <a class="reference internal" href="../library/curses.html#curses.doupdate" title="curses.doupdate"><code class="xref py py-func docutils literal notranslate"><span class="pre">doupdate()</span></code></a> function to change the
|
||||
physical screen to match the desired state recorded in the data structure.</p></li>
|
||||
</ol>
|
||||
<p>Instead you can call <code class="xref py py-meth docutils literal notranslate"><span class="pre">noutrefresh()</span></code> on a number of windows to
|
||||
update the data structure, and then call <code class="xref py py-func docutils literal notranslate"><span class="pre">doupdate()</span></code> to update
|
||||
the screen.</p>
|
||||
</div>
|
||||
<div class="section" id="displaying-text">
|
||||
<h2>Displaying Text<a class="headerlink" href="#displaying-text" title="Permalink to this headline">¶</a></h2>
|
||||
<p>From a C programmer’s point of view, curses may sometimes look like a
|
||||
twisty maze of functions, all subtly different. For example,
|
||||
<code class="xref c c-func docutils literal notranslate"><span class="pre">addstr()</span></code> displays a string at the current cursor location in
|
||||
the <code class="docutils literal notranslate"><span class="pre">stdscr</span></code> window, while <code class="xref c c-func docutils literal notranslate"><span class="pre">mvaddstr()</span></code> moves to a given y,x
|
||||
coordinate first before displaying the string. <code class="xref c c-func docutils literal notranslate"><span class="pre">waddstr()</span></code> is just
|
||||
like <code class="xref c c-func docutils literal notranslate"><span class="pre">addstr()</span></code>, but allows specifying a window to use instead of
|
||||
using <code class="docutils literal notranslate"><span class="pre">stdscr</span></code> by default. <code class="xref c c-func docutils literal notranslate"><span class="pre">mvwaddstr()</span></code> allows specifying both
|
||||
a window and a coordinate.</p>
|
||||
<p>Fortunately the Python interface hides all these details. <code class="docutils literal notranslate"><span class="pre">stdscr</span></code>
|
||||
is a window object like any other, and methods such as
|
||||
<a class="reference internal" href="../library/curses.html#curses.window.addstr" title="curses.window.addstr"><code class="xref py py-meth docutils literal notranslate"><span class="pre">addstr()</span></code></a> accept multiple argument forms. Usually there
|
||||
are four different forms.</p>
|
||||
<table class="docutils align-center">
|
||||
<colgroup>
|
||||
<col style="width: 41%" />
|
||||
<col style="width: 59%" />
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Form</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p><em>str</em> or <em>ch</em></p></td>
|
||||
<td><p>Display the string <em>str</em> or character <em>ch</em> at
|
||||
the current position</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p><em>str</em> or <em>ch</em>, <em>attr</em></p></td>
|
||||
<td><p>Display the string <em>str</em> or character <em>ch</em>,
|
||||
using attribute <em>attr</em> at the current
|
||||
position</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p><em>y</em>, <em>x</em>, <em>str</em> or <em>ch</em></p></td>
|
||||
<td><p>Move to position <em>y,x</em> within the window, and
|
||||
display <em>str</em> or <em>ch</em></p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p><em>y</em>, <em>x</em>, <em>str</em> or <em>ch</em>, <em>attr</em></p></td>
|
||||
<td><p>Move to position <em>y,x</em> within the window, and
|
||||
display <em>str</em> or <em>ch</em>, using attribute <em>attr</em></p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>Attributes allow displaying text in highlighted forms such as boldface,
|
||||
underline, reverse code, or in color. They’ll be explained in more detail in
|
||||
the next subsection.</p>
|
||||
<p>The <a class="reference internal" href="../library/curses.html#curses.window.addstr" title="curses.window.addstr"><code class="xref py py-meth docutils literal notranslate"><span class="pre">addstr()</span></code></a> method takes a Python string or
|
||||
bytestring as the value to be displayed. The contents of bytestrings
|
||||
are sent to the terminal as-is. Strings are encoded to bytes using
|
||||
the value of the window’s <code class="xref py py-attr docutils literal notranslate"><span class="pre">encoding</span></code> attribute; this defaults to
|
||||
the default system encoding as returned by
|
||||
<a class="reference internal" href="../library/locale.html#locale.getpreferredencoding" title="locale.getpreferredencoding"><code class="xref py py-func docutils literal notranslate"><span class="pre">locale.getpreferredencoding()</span></code></a>.</p>
|
||||
<p>The <a class="reference internal" href="../library/curses.html#curses.window.addch" title="curses.window.addch"><code class="xref py py-meth docutils literal notranslate"><span class="pre">addch()</span></code></a> methods take a character, which can be
|
||||
either a string of length 1, a bytestring of length 1, or an integer.</p>
|
||||
<p>Constants are provided for extension characters; these constants are
|
||||
integers greater than 255. For example, <code class="xref py py-const docutils literal notranslate"><span class="pre">ACS_PLMINUS</span></code> is a +/-
|
||||
symbol, and <code class="xref py py-const docutils literal notranslate"><span class="pre">ACS_ULCORNER</span></code> is the upper left corner of a box
|
||||
(handy for drawing borders). You can also use the appropriate Unicode
|
||||
character.</p>
|
||||
<p>Windows remember where the cursor was left after the last operation, so if you
|
||||
leave out the <em>y,x</em> coordinates, the string or character will be displayed
|
||||
wherever the last operation left off. You can also move the cursor with the
|
||||
<code class="docutils literal notranslate"><span class="pre">move(y,x)</span></code> method. Because some terminals always display a flashing cursor,
|
||||
you may want to ensure that the cursor is positioned in some location where it
|
||||
won’t be distracting; it can be confusing to have the cursor blinking at some
|
||||
apparently random location.</p>
|
||||
<p>If your application doesn’t need a blinking cursor at all, you can
|
||||
call <code class="docutils literal notranslate"><span class="pre">curs_set(False)</span></code> to make it invisible. For compatibility
|
||||
with older curses versions, there’s a <code class="docutils literal notranslate"><span class="pre">leaveok(bool)</span></code> function
|
||||
that’s a synonym for <a class="reference internal" href="../library/curses.html#curses.curs_set" title="curses.curs_set"><code class="xref py py-func docutils literal notranslate"><span class="pre">curs_set()</span></code></a>. When <em>bool</em> is true, the
|
||||
curses library will attempt to suppress the flashing cursor, and you
|
||||
won’t need to worry about leaving it in odd locations.</p>
|
||||
<div class="section" id="attributes-and-color">
|
||||
<h3>Attributes and Color<a class="headerlink" href="#attributes-and-color" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Characters can be displayed in different ways. Status lines in a text-based
|
||||
application are commonly shown in reverse video, or a text viewer may need to
|
||||
highlight certain words. curses supports this by allowing you to specify an
|
||||
attribute for each cell on the screen.</p>
|
||||
<p>An attribute is an integer, each bit representing a different
|
||||
attribute. You can try to display text with multiple attribute bits
|
||||
set, but curses doesn’t guarantee that all the possible combinations
|
||||
are available, or that they’re all visually distinct. That depends on
|
||||
the ability of the terminal being used, so it’s safest to stick to the
|
||||
most commonly available attributes, listed here.</p>
|
||||
<table class="docutils align-center">
|
||||
<colgroup>
|
||||
<col style="width: 37%" />
|
||||
<col style="width: 63%" />
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Attribute</p></th>
|
||||
<th class="head"><p>Description</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p><code class="xref py py-const docutils literal notranslate"><span class="pre">A_BLINK</span></code></p></td>
|
||||
<td><p>Blinking text</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p><code class="xref py py-const docutils literal notranslate"><span class="pre">A_BOLD</span></code></p></td>
|
||||
<td><p>Extra bright or bold text</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p><code class="xref py py-const docutils literal notranslate"><span class="pre">A_DIM</span></code></p></td>
|
||||
<td><p>Half bright text</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p><code class="xref py py-const docutils literal notranslate"><span class="pre">A_REVERSE</span></code></p></td>
|
||||
<td><p>Reverse-video text</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p><code class="xref py py-const docutils literal notranslate"><span class="pre">A_STANDOUT</span></code></p></td>
|
||||
<td><p>The best highlighting mode available</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p><code class="xref py py-const docutils literal notranslate"><span class="pre">A_UNDERLINE</span></code></p></td>
|
||||
<td><p>Underlined text</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>So, to display a reverse-video status line on the top line of the screen, you
|
||||
could code:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">stdscr</span><span class="o">.</span><span class="n">addstr</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">"Current mode: Typing mode"</span><span class="p">,</span>
|
||||
<span class="n">curses</span><span class="o">.</span><span class="n">A_REVERSE</span><span class="p">)</span>
|
||||
<span class="n">stdscr</span><span class="o">.</span><span class="n">refresh</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The curses library also supports color on those terminals that provide it. The
|
||||
most common such terminal is probably the Linux console, followed by color
|
||||
xterms.</p>
|
||||
<p>To use color, you must call the <a class="reference internal" href="../library/curses.html#curses.start_color" title="curses.start_color"><code class="xref py py-func docutils literal notranslate"><span class="pre">start_color()</span></code></a> function soon
|
||||
after calling <a class="reference internal" href="../library/curses.html#curses.initscr" title="curses.initscr"><code class="xref py py-func docutils literal notranslate"><span class="pre">initscr()</span></code></a>, to initialize the default color set
|
||||
(the <a class="reference internal" href="../library/curses.html#curses.wrapper" title="curses.wrapper"><code class="xref py py-func docutils literal notranslate"><span class="pre">curses.wrapper()</span></code></a> function does this automatically). Once that’s
|
||||
done, the <a class="reference internal" href="../library/curses.html#curses.has_colors" title="curses.has_colors"><code class="xref py py-func docutils literal notranslate"><span class="pre">has_colors()</span></code></a> function returns TRUE if the terminal
|
||||
in use can
|
||||
actually display color. (Note: curses uses the American spelling ‘color’,
|
||||
instead of the Canadian/British spelling ‘colour’. If you’re used to the
|
||||
British spelling, you’ll have to resign yourself to misspelling it for the sake
|
||||
of these functions.)</p>
|
||||
<p>The curses library maintains a finite number of color pairs, containing a
|
||||
foreground (or text) color and a background color. You can get the attribute
|
||||
value corresponding to a color pair with the <a class="reference internal" href="../library/curses.html#curses.color_pair" title="curses.color_pair"><code class="xref py py-func docutils literal notranslate"><span class="pre">color_pair()</span></code></a>
|
||||
function; this can be bitwise-OR’ed with other attributes such as
|
||||
<code class="xref py py-const docutils literal notranslate"><span class="pre">A_REVERSE</span></code>, but again, such combinations are not guaranteed to work
|
||||
on all terminals.</p>
|
||||
<p>An example, which displays a line of text using color pair 1:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">stdscr</span><span class="o">.</span><span class="n">addstr</span><span class="p">(</span><span class="s2">"Pretty text"</span><span class="p">,</span> <span class="n">curses</span><span class="o">.</span><span class="n">color_pair</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
|
||||
<span class="n">stdscr</span><span class="o">.</span><span class="n">refresh</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>As I said before, a color pair consists of a foreground and background color.
|
||||
The <code class="docutils literal notranslate"><span class="pre">init_pair(n,</span> <span class="pre">f,</span> <span class="pre">b)</span></code> function changes the definition of color pair <em>n</em>, to
|
||||
foreground color f and background color b. Color pair 0 is hard-wired to white
|
||||
on black, and cannot be changed.</p>
|
||||
<p>Colors are numbered, and <code class="xref py py-func docutils literal notranslate"><span class="pre">start_color()</span></code> initializes 8 basic
|
||||
colors when it activates color mode. They are: 0:black, 1:red,
|
||||
2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and 7:white. The <a class="reference internal" href="../library/curses.html#module-curses" title="curses: An interface to the curses library, providing portable terminal handling. (Unix)"><code class="xref py py-mod docutils literal notranslate"><span class="pre">curses</span></code></a>
|
||||
module defines named constants for each of these colors:
|
||||
<code class="xref py py-const docutils literal notranslate"><span class="pre">curses.COLOR_BLACK</span></code>, <code class="xref py py-const docutils literal notranslate"><span class="pre">curses.COLOR_RED</span></code>, and so forth.</p>
|
||||
<p>Let’s put all this together. To change color 1 to red text on a white
|
||||
background, you would call:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">curses</span><span class="o">.</span><span class="n">init_pair</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">curses</span><span class="o">.</span><span class="n">COLOR_RED</span><span class="p">,</span> <span class="n">curses</span><span class="o">.</span><span class="n">COLOR_WHITE</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>When you change a color pair, any text already displayed using that color pair
|
||||
will change to the new colors. You can also display new text in this color
|
||||
with:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">stdscr</span><span class="o">.</span><span class="n">addstr</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span> <span class="s2">"RED ALERT!"</span><span class="p">,</span> <span class="n">curses</span><span class="o">.</span><span class="n">color_pair</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Very fancy terminals can change the definitions of the actual colors to a given
|
||||
RGB value. This lets you change color 1, which is usually red, to purple or
|
||||
blue or any other color you like. Unfortunately, the Linux console doesn’t
|
||||
support this, so I’m unable to try it out, and can’t provide any examples. You
|
||||
can check if your terminal can do this by calling
|
||||
<a class="reference internal" href="../library/curses.html#curses.can_change_color" title="curses.can_change_color"><code class="xref py py-func docutils literal notranslate"><span class="pre">can_change_color()</span></code></a>, which returns <code class="docutils literal notranslate"><span class="pre">True</span></code> if the capability is
|
||||
there. If you’re lucky enough to have such a talented terminal, consult your
|
||||
system’s man pages for more information.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="user-input">
|
||||
<h2>User Input<a class="headerlink" href="#user-input" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The C curses library offers only very simple input mechanisms. Python’s
|
||||
<a class="reference internal" href="../library/curses.html#module-curses" title="curses: An interface to the curses library, providing portable terminal handling. (Unix)"><code class="xref py py-mod docutils literal notranslate"><span class="pre">curses</span></code></a> module adds a basic text-input widget. (Other libraries
|
||||
such as <a class="reference external" href="https://pypi.org/project/urwid/">Urwid</a> have more extensive
|
||||
collections of widgets.)</p>
|
||||
<p>There are two methods for getting input from a window:</p>
|
||||
<ul class="simple">
|
||||
<li><p><a class="reference internal" href="../library/curses.html#curses.window.getch" title="curses.window.getch"><code class="xref py py-meth docutils literal notranslate"><span class="pre">getch()</span></code></a> refreshes the screen and then waits for
|
||||
the user to hit a key, displaying the key if <a class="reference internal" href="../library/curses.html#curses.echo" title="curses.echo"><code class="xref py py-func docutils literal notranslate"><span class="pre">echo()</span></code></a> has been
|
||||
called earlier. You can optionally specify a coordinate to which
|
||||
the cursor should be moved before pausing.</p></li>
|
||||
<li><p><a class="reference internal" href="../library/curses.html#curses.window.getkey" title="curses.window.getkey"><code class="xref py py-meth docutils literal notranslate"><span class="pre">getkey()</span></code></a> does the same thing but converts the
|
||||
integer to a string. Individual characters are returned as
|
||||
1-character strings, and special keys such as function keys return
|
||||
longer strings containing a key name such as <code class="docutils literal notranslate"><span class="pre">KEY_UP</span></code> or <code class="docutils literal notranslate"><span class="pre">^G</span></code>.</p></li>
|
||||
</ul>
|
||||
<p>It’s possible to not wait for the user using the
|
||||
<a class="reference internal" href="../library/curses.html#curses.window.nodelay" title="curses.window.nodelay"><code class="xref py py-meth docutils literal notranslate"><span class="pre">nodelay()</span></code></a> window method. After <code class="docutils literal notranslate"><span class="pre">nodelay(True)</span></code>,
|
||||
<code class="xref py py-meth docutils literal notranslate"><span class="pre">getch()</span></code> and <code class="xref py py-meth docutils literal notranslate"><span class="pre">getkey()</span></code> for the window become
|
||||
non-blocking. To signal that no input is ready, <code class="xref py py-meth docutils literal notranslate"><span class="pre">getch()</span></code> returns
|
||||
<code class="docutils literal notranslate"><span class="pre">curses.ERR</span></code> (a value of -1) and <code class="xref py py-meth docutils literal notranslate"><span class="pre">getkey()</span></code> raises an exception.
|
||||
There’s also a <a class="reference internal" href="../library/curses.html#curses.halfdelay" title="curses.halfdelay"><code class="xref py py-func docutils literal notranslate"><span class="pre">halfdelay()</span></code></a> function, which can be used to (in
|
||||
effect) set a timer on each <code class="xref py py-meth docutils literal notranslate"><span class="pre">getch()</span></code>; if no input becomes
|
||||
available within a specified delay (measured in tenths of a second),
|
||||
curses raises an exception.</p>
|
||||
<p>The <code class="xref py py-meth docutils literal notranslate"><span class="pre">getch()</span></code> method returns an integer; if it’s between 0 and 255, it
|
||||
represents the ASCII code of the key pressed. Values greater than 255 are
|
||||
special keys such as Page Up, Home, or the cursor keys. You can compare the
|
||||
value returned to constants such as <code class="xref py py-const docutils literal notranslate"><span class="pre">curses.KEY_PPAGE</span></code>,
|
||||
<code class="xref py py-const docutils literal notranslate"><span class="pre">curses.KEY_HOME</span></code>, or <code class="xref py py-const docutils literal notranslate"><span class="pre">curses.KEY_LEFT</span></code>. The main loop of
|
||||
your program may look something like this:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
||||
<span class="n">c</span> <span class="o">=</span> <span class="n">stdscr</span><span class="o">.</span><span class="n">getch</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">c</span> <span class="o">==</span> <span class="nb">ord</span><span class="p">(</span><span class="s1">'p'</span><span class="p">):</span>
|
||||
<span class="n">PrintDocument</span><span class="p">()</span>
|
||||
<span class="k">elif</span> <span class="n">c</span> <span class="o">==</span> <span class="nb">ord</span><span class="p">(</span><span class="s1">'q'</span><span class="p">):</span>
|
||||
<span class="k">break</span> <span class="c1"># Exit the while loop</span>
|
||||
<span class="k">elif</span> <span class="n">c</span> <span class="o">==</span> <span class="n">curses</span><span class="o">.</span><span class="n">KEY_HOME</span><span class="p">:</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">y</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The <a class="reference internal" href="../library/curses.ascii.html#module-curses.ascii" title="curses.ascii: Constants and set-membership functions for ASCII characters."><code class="xref py py-mod docutils literal notranslate"><span class="pre">curses.ascii</span></code></a> module supplies ASCII class membership functions that
|
||||
take either integer or 1-character string arguments; these may be useful in
|
||||
writing more readable tests for such loops. It also supplies
|
||||
conversion functions that take either integer or 1-character-string arguments
|
||||
and return the same type. For example, <a class="reference internal" href="../library/curses.ascii.html#curses.ascii.ctrl" title="curses.ascii.ctrl"><code class="xref py py-func docutils literal notranslate"><span class="pre">curses.ascii.ctrl()</span></code></a> returns the
|
||||
control character corresponding to its argument.</p>
|
||||
<p>There’s also a method to retrieve an entire string,
|
||||
<a class="reference internal" href="../library/curses.html#curses.window.getstr" title="curses.window.getstr"><code class="xref py py-meth docutils literal notranslate"><span class="pre">getstr()</span></code></a>. It isn’t used very often, because its
|
||||
functionality is quite limited; the only editing keys available are
|
||||
the backspace key and the Enter key, which terminates the string. It
|
||||
can optionally be limited to a fixed number of characters.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">curses</span><span class="o">.</span><span class="n">echo</span><span class="p">()</span> <span class="c1"># Enable echoing of characters</span>
|
||||
|
||||
<span class="c1"># Get a 15-character string, with the cursor on the top line</span>
|
||||
<span class="n">s</span> <span class="o">=</span> <span class="n">stdscr</span><span class="o">.</span><span class="n">getstr</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span> <span class="mi">15</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The <a class="reference internal" href="../library/curses.html#module-curses.textpad" title="curses.textpad: Emacs-like input editing in a curses window."><code class="xref py py-mod docutils literal notranslate"><span class="pre">curses.textpad</span></code></a> module supplies a text box that supports an
|
||||
Emacs-like set of keybindings. Various methods of the
|
||||
<a class="reference internal" href="../library/curses.html#curses.textpad.Textbox" title="curses.textpad.Textbox"><code class="xref py py-class docutils literal notranslate"><span class="pre">Textbox</span></code></a> class support editing with input
|
||||
validation and gathering the edit results either with or without
|
||||
trailing spaces. Here’s an example:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">curses</span>
|
||||
<span class="kn">from</span> <span class="nn">curses.textpad</span> <span class="k">import</span> <span class="n">Textbox</span><span class="p">,</span> <span class="n">rectangle</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">stdscr</span><span class="p">):</span>
|
||||
<span class="n">stdscr</span><span class="o">.</span><span class="n">addstr</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">"Enter IM message: (hit Ctrl-G to send)"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">editwin</span> <span class="o">=</span> <span class="n">curses</span><span class="o">.</span><span class="n">newwin</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span><span class="mi">30</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">rectangle</span><span class="p">(</span><span class="n">stdscr</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="o">+</span><span class="mi">5</span><span class="o">+</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="o">+</span><span class="mi">30</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">stdscr</span><span class="o">.</span><span class="n">refresh</span><span class="p">()</span>
|
||||
|
||||
<span class="n">box</span> <span class="o">=</span> <span class="n">Textbox</span><span class="p">(</span><span class="n">editwin</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Let the user edit until Ctrl-G is struck.</span>
|
||||
<span class="n">box</span><span class="o">.</span><span class="n">edit</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># Get resulting contents</span>
|
||||
<span class="n">message</span> <span class="o">=</span> <span class="n">box</span><span class="o">.</span><span class="n">gather</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>See the library documentation on <a class="reference internal" href="../library/curses.html#module-curses.textpad" title="curses.textpad: Emacs-like input editing in a curses window."><code class="xref py py-mod docutils literal notranslate"><span class="pre">curses.textpad</span></code></a> for more details.</p>
|
||||
</div>
|
||||
<div class="section" id="for-more-information">
|
||||
<h2>For More Information<a class="headerlink" href="#for-more-information" title="Permalink to this headline">¶</a></h2>
|
||||
<p>This HOWTO doesn’t cover some advanced topics, such as reading the
|
||||
contents of the screen or capturing mouse events from an xterm
|
||||
instance, but the Python library page for the <a class="reference internal" href="../library/curses.html#module-curses" title="curses: An interface to the curses library, providing portable terminal handling. (Unix)"><code class="xref py py-mod docutils literal notranslate"><span class="pre">curses</span></code></a> module is now
|
||||
reasonably complete. You should browse it next.</p>
|
||||
<p>If you’re in doubt about the detailed behavior of the curses
|
||||
functions, consult the manual pages for your curses implementation,
|
||||
whether it’s ncurses or a proprietary Unix vendor’s. The manual pages
|
||||
will document any quirks, and provide complete lists of all the
|
||||
functions, attributes, and <code class="xref py py-const docutils literal notranslate"><span class="pre">ACS_*</span></code> characters available to
|
||||
you.</p>
|
||||
<p>Because the curses API is so large, some functions aren’t supported in
|
||||
the Python interface. Often this isn’t because they’re difficult to
|
||||
implement, but because no one has needed them yet. Also, Python
|
||||
doesn’t yet support the menu library associated with ncurses.
|
||||
Patches adding support for these would be welcome; see
|
||||
<a class="reference external" href="https://devguide.python.org/">the Python Developer’s Guide</a> to
|
||||
learn more about submitting patches to Python.</p>
|
||||
<ul class="simple">
|
||||
<li><p><a class="reference external" href="http://invisible-island.net/ncurses/ncurses-intro.html">Writing Programs with NCURSES</a>:
|
||||
a lengthy tutorial for C programmers.</p></li>
|
||||
<li><p><a class="reference external" href="https://linux.die.net/man/3/ncurses">The ncurses man page</a></p></li>
|
||||
<li><p><a class="reference external" href="http://invisible-island.net/ncurses/ncurses.faq.html">The ncurses FAQ</a></p></li>
|
||||
<li><p><a class="reference external" href="https://www.youtube.com/watch?v=eN1eZtjLEnU">“Use curses… don’t swear”</a>:
|
||||
video of a PyCon 2013 talk on controlling terminals using curses or Urwid.</p></li>
|
||||
<li><p><a class="reference external" href="http://www.pyvideo.org/video/1568/console-applications-with-urwid">“Console Applications with Urwid”</a>:
|
||||
video of a PyCon CA 2012 talk demonstrating some applications written using
|
||||
Urwid.</p></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">Curses Programming with Python</a><ul>
|
||||
<li><a class="reference internal" href="#what-is-curses">What is curses?</a><ul>
|
||||
<li><a class="reference internal" href="#the-python-curses-module">The Python curses module</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#starting-and-ending-a-curses-application">Starting and ending a curses application</a></li>
|
||||
<li><a class="reference internal" href="#windows-and-pads">Windows and Pads</a></li>
|
||||
<li><a class="reference internal" href="#displaying-text">Displaying Text</a><ul>
|
||||
<li><a class="reference internal" href="#attributes-and-color">Attributes and Color</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#user-input">User Input</a></li>
|
||||
<li><a class="reference internal" href="#for-more-information">For More Information</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="cporting.html"
|
||||
title="previous chapter">Porting Extension Modules to Python 3</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="descriptor.html"
|
||||
title="next chapter">Descriptor HowTo Guide</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/curses.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="descriptor.html" title="Descriptor HowTo Guide"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="cporting.html" title="Porting Extension Modules to Python 3"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
636
python-3.7.4-docs-html/howto/descriptor.html
Normal file
636
python-3.7.4-docs-html/howto/descriptor.html
Normal file
@@ -0,0 +1,636 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Descriptor HowTo Guide — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Functional Programming HOWTO" href="functional.html" />
|
||||
<link rel="prev" title="Curses Programming with Python" href="curses.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/descriptor.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="functional.html" title="Functional Programming HOWTO"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="curses.html" title="Curses Programming with Python"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="descriptor-howto-guide">
|
||||
<h1><a class="toc-backref" href="#id1">Descriptor HowTo Guide</a><a class="headerlink" href="#descriptor-howto-guide" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Author</dt>
|
||||
<dd class="field-odd"><p>Raymond Hettinger</p>
|
||||
</dd>
|
||||
<dt class="field-even">Contact</dt>
|
||||
<dd class="field-even"><p><python at rcn dot com></p>
|
||||
</dd>
|
||||
</dl>
|
||||
<div class="contents topic" id="contents">
|
||||
<p class="topic-title first">Contents</p>
|
||||
<ul class="simple">
|
||||
<li><p><a class="reference internal" href="#descriptor-howto-guide" id="id1">Descriptor HowTo Guide</a></p>
|
||||
<ul>
|
||||
<li><p><a class="reference internal" href="#abstract" id="id2">Abstract</a></p></li>
|
||||
<li><p><a class="reference internal" href="#definition-and-introduction" id="id3">Definition and Introduction</a></p></li>
|
||||
<li><p><a class="reference internal" href="#descriptor-protocol" id="id4">Descriptor Protocol</a></p></li>
|
||||
<li><p><a class="reference internal" href="#invoking-descriptors" id="id5">Invoking Descriptors</a></p></li>
|
||||
<li><p><a class="reference internal" href="#descriptor-example" id="id6">Descriptor Example</a></p></li>
|
||||
<li><p><a class="reference internal" href="#properties" id="id7">Properties</a></p></li>
|
||||
<li><p><a class="reference internal" href="#functions-and-methods" id="id8">Functions and Methods</a></p></li>
|
||||
<li><p><a class="reference internal" href="#static-methods-and-class-methods" id="id9">Static Methods and Class Methods</a></p></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="section" id="abstract">
|
||||
<h2><a class="toc-backref" href="#id2">Abstract</a><a class="headerlink" href="#abstract" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Defines descriptors, summarizes the protocol, and shows how descriptors are
|
||||
called. Examines a custom descriptor and several built-in Python descriptors
|
||||
including functions, properties, static methods, and class methods. Shows how
|
||||
each works by giving a pure Python equivalent and a sample application.</p>
|
||||
<p>Learning about descriptors not only provides access to a larger toolset, it
|
||||
creates a deeper understanding of how Python works and an appreciation for the
|
||||
elegance of its design.</p>
|
||||
</div>
|
||||
<div class="section" id="definition-and-introduction">
|
||||
<h2><a class="toc-backref" href="#id3">Definition and Introduction</a><a class="headerlink" href="#definition-and-introduction" title="Permalink to this headline">¶</a></h2>
|
||||
<p>In general, a descriptor is an object attribute with “binding behavior”, one
|
||||
whose attribute access has been overridden by methods in the descriptor
|
||||
protocol. Those methods are <a class="reference internal" href="../reference/datamodel.html#object.__get__" title="object.__get__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__get__()</span></code></a>, <a class="reference internal" href="../reference/datamodel.html#object.__set__" title="object.__set__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__set__()</span></code></a>, and
|
||||
<a class="reference internal" href="../reference/datamodel.html#object.__delete__" title="object.__delete__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__delete__()</span></code></a>. If any of those methods are defined for an object, it is
|
||||
said to be a descriptor.</p>
|
||||
<p>The default behavior for attribute access is to get, set, or delete the
|
||||
attribute from an object’s dictionary. For instance, <code class="docutils literal notranslate"><span class="pre">a.x</span></code> has a lookup chain
|
||||
starting with <code class="docutils literal notranslate"><span class="pre">a.__dict__['x']</span></code>, then <code class="docutils literal notranslate"><span class="pre">type(a).__dict__['x']</span></code>, and
|
||||
continuing through the base classes of <code class="docutils literal notranslate"><span class="pre">type(a)</span></code> excluding metaclasses. If the
|
||||
looked-up value is an object defining one of the descriptor methods, then Python
|
||||
may override the default behavior and invoke the descriptor method instead.
|
||||
Where this occurs in the precedence chain depends on which descriptor methods
|
||||
were defined.</p>
|
||||
<p>Descriptors are a powerful, general purpose protocol. They are the mechanism
|
||||
behind properties, methods, static methods, class methods, and <a class="reference internal" href="../library/functions.html#super" title="super"><code class="xref py py-func docutils literal notranslate"><span class="pre">super()</span></code></a>.
|
||||
They are used throughout Python itself to implement the new style classes
|
||||
introduced in version 2.2. Descriptors simplify the underlying C-code and offer
|
||||
a flexible set of new tools for everyday Python programs.</p>
|
||||
</div>
|
||||
<div class="section" id="descriptor-protocol">
|
||||
<h2><a class="toc-backref" href="#id4">Descriptor Protocol</a><a class="headerlink" href="#descriptor-protocol" title="Permalink to this headline">¶</a></h2>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">descr.__get__(self,</span> <span class="pre">obj,</span> <span class="pre">type=None)</span> <span class="pre">-></span> <span class="pre">value</span></code></p>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">descr.__set__(self,</span> <span class="pre">obj,</span> <span class="pre">value)</span> <span class="pre">-></span> <span class="pre">None</span></code></p>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">descr.__delete__(self,</span> <span class="pre">obj)</span> <span class="pre">-></span> <span class="pre">None</span></code></p>
|
||||
<p>That is all there is to it. Define any of these methods and an object is
|
||||
considered a descriptor and can override default behavior upon being looked up
|
||||
as an attribute.</p>
|
||||
<p>If an object defines both <a class="reference internal" href="../reference/datamodel.html#object.__get__" title="object.__get__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__get__()</span></code></a> and <a class="reference internal" href="../reference/datamodel.html#object.__set__" title="object.__set__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__set__()</span></code></a>, it is considered
|
||||
a data descriptor. Descriptors that only define <a class="reference internal" href="../reference/datamodel.html#object.__get__" title="object.__get__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__get__()</span></code></a> are called
|
||||
non-data descriptors (they are typically used for methods but other uses are
|
||||
possible).</p>
|
||||
<p>Data and non-data descriptors differ in how overrides are calculated with
|
||||
respect to entries in an instance’s dictionary. If an instance’s dictionary
|
||||
has an entry with the same name as a data descriptor, the data descriptor
|
||||
takes precedence. If an instance’s dictionary has an entry with the same
|
||||
name as a non-data descriptor, the dictionary entry takes precedence.</p>
|
||||
<p>To make a read-only data descriptor, define both <a class="reference internal" href="../reference/datamodel.html#object.__get__" title="object.__get__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__get__()</span></code></a> and
|
||||
<a class="reference internal" href="../reference/datamodel.html#object.__set__" title="object.__set__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__set__()</span></code></a> with the <a class="reference internal" href="../reference/datamodel.html#object.__set__" title="object.__set__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__set__()</span></code></a> raising an <a class="reference internal" href="../library/exceptions.html#AttributeError" title="AttributeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">AttributeError</span></code></a> when
|
||||
called. Defining the <a class="reference internal" href="../reference/datamodel.html#object.__set__" title="object.__set__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__set__()</span></code></a> method with an exception raising
|
||||
placeholder is enough to make it a data descriptor.</p>
|
||||
</div>
|
||||
<div class="section" id="invoking-descriptors">
|
||||
<h2><a class="toc-backref" href="#id5">Invoking Descriptors</a><a class="headerlink" href="#invoking-descriptors" title="Permalink to this headline">¶</a></h2>
|
||||
<p>A descriptor can be called directly by its method name. For example,
|
||||
<code class="docutils literal notranslate"><span class="pre">d.__get__(obj)</span></code>.</p>
|
||||
<p>Alternatively, it is more common for a descriptor to be invoked automatically
|
||||
upon attribute access. For example, <code class="docutils literal notranslate"><span class="pre">obj.d</span></code> looks up <code class="docutils literal notranslate"><span class="pre">d</span></code> in the dictionary
|
||||
of <code class="docutils literal notranslate"><span class="pre">obj</span></code>. If <code class="docutils literal notranslate"><span class="pre">d</span></code> defines the method <a class="reference internal" href="../reference/datamodel.html#object.__get__" title="object.__get__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__get__()</span></code></a>, then <code class="docutils literal notranslate"><span class="pre">d.__get__(obj)</span></code>
|
||||
is invoked according to the precedence rules listed below.</p>
|
||||
<p>The details of invocation depend on whether <code class="docutils literal notranslate"><span class="pre">obj</span></code> is an object or a class.</p>
|
||||
<p>For objects, the machinery is in <a class="reference internal" href="../reference/datamodel.html#object.__getattribute__" title="object.__getattribute__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">object.__getattribute__()</span></code></a> which
|
||||
transforms <code class="docutils literal notranslate"><span class="pre">b.x</span></code> into <code class="docutils literal notranslate"><span class="pre">type(b).__dict__['x'].__get__(b,</span> <span class="pre">type(b))</span></code>. The
|
||||
implementation works through a precedence chain that gives data descriptors
|
||||
priority over instance variables, instance variables priority over non-data
|
||||
descriptors, and assigns lowest priority to <a class="reference internal" href="../reference/datamodel.html#object.__getattr__" title="object.__getattr__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__getattr__()</span></code></a> if provided.
|
||||
The full C implementation can be found in <a class="reference internal" href="../c-api/object.html#c.PyObject_GenericGetAttr" title="PyObject_GenericGetAttr"><code class="xref c c-func docutils literal notranslate"><span class="pre">PyObject_GenericGetAttr()</span></code></a> in
|
||||
<a class="reference external" href="https://github.com/python/cpython/tree/3.7/Objects/object.c">Objects/object.c</a>.</p>
|
||||
<p>For classes, the machinery is in <code class="xref py py-meth docutils literal notranslate"><span class="pre">type.__getattribute__()</span></code> which transforms
|
||||
<code class="docutils literal notranslate"><span class="pre">B.x</span></code> into <code class="docutils literal notranslate"><span class="pre">B.__dict__['x'].__get__(None,</span> <span class="pre">B)</span></code>. In pure Python, it looks
|
||||
like:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">__getattribute__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">):</span>
|
||||
<span class="s2">"Emulate type_getattro() in Objects/typeobject.c"</span>
|
||||
<span class="n">v</span> <span class="o">=</span> <span class="nb">object</span><span class="o">.</span><span class="fm">__getattribute__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="s1">'__get__'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">v</span><span class="o">.</span><span class="fm">__get__</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">v</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The important points to remember are:</p>
|
||||
<ul class="simple">
|
||||
<li><p>descriptors are invoked by the <a class="reference internal" href="../reference/datamodel.html#object.__getattribute__" title="object.__getattribute__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__getattribute__()</span></code></a> method</p></li>
|
||||
<li><p>overriding <a class="reference internal" href="../reference/datamodel.html#object.__getattribute__" title="object.__getattribute__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__getattribute__()</span></code></a> prevents automatic descriptor calls</p></li>
|
||||
<li><p><a class="reference internal" href="../reference/datamodel.html#object.__getattribute__" title="object.__getattribute__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">object.__getattribute__()</span></code></a> and <code class="xref py py-meth docutils literal notranslate"><span class="pre">type.__getattribute__()</span></code> make
|
||||
different calls to <a class="reference internal" href="../reference/datamodel.html#object.__get__" title="object.__get__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__get__()</span></code></a>.</p></li>
|
||||
<li><p>data descriptors always override instance dictionaries.</p></li>
|
||||
<li><p>non-data descriptors may be overridden by instance dictionaries.</p></li>
|
||||
</ul>
|
||||
<p>The object returned by <code class="docutils literal notranslate"><span class="pre">super()</span></code> also has a custom <a class="reference internal" href="../reference/datamodel.html#object.__getattribute__" title="object.__getattribute__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__getattribute__()</span></code></a>
|
||||
method for invoking descriptors. The call <code class="docutils literal notranslate"><span class="pre">super(B,</span> <span class="pre">obj).m()</span></code> searches
|
||||
<code class="docutils literal notranslate"><span class="pre">obj.__class__.__mro__</span></code> for the base class <code class="docutils literal notranslate"><span class="pre">A</span></code> immediately following <code class="docutils literal notranslate"><span class="pre">B</span></code>
|
||||
and then returns <code class="docutils literal notranslate"><span class="pre">A.__dict__['m'].__get__(obj,</span> <span class="pre">B)</span></code>. If not a descriptor,
|
||||
<code class="docutils literal notranslate"><span class="pre">m</span></code> is returned unchanged. If not in the dictionary, <code class="docutils literal notranslate"><span class="pre">m</span></code> reverts to a
|
||||
search using <a class="reference internal" href="../reference/datamodel.html#object.__getattribute__" title="object.__getattribute__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">object.__getattribute__()</span></code></a>.</p>
|
||||
<p>The implementation details are in <code class="xref c c-func docutils literal notranslate"><span class="pre">super_getattro()</span></code> in
|
||||
<a class="reference external" href="https://github.com/python/cpython/tree/3.7/Objects/typeobject.c">Objects/typeobject.c</a>. and a pure Python equivalent can be found in
|
||||
<a class="reference external" href="https://www.python.org/download/releases/2.2.3/descrintro/#cooperation">Guido’s Tutorial</a>.</p>
|
||||
<p>The details above show that the mechanism for descriptors is embedded in the
|
||||
<a class="reference internal" href="../reference/datamodel.html#object.__getattribute__" title="object.__getattribute__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__getattribute__()</span></code></a> methods for <a class="reference internal" href="../library/functions.html#object" title="object"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a>, <a class="reference internal" href="../library/functions.html#type" title="type"><code class="xref py py-class docutils literal notranslate"><span class="pre">type</span></code></a>, and
|
||||
<a class="reference internal" href="../library/functions.html#super" title="super"><code class="xref py py-func docutils literal notranslate"><span class="pre">super()</span></code></a>. Classes inherit this machinery when they derive from
|
||||
<a class="reference internal" href="../library/functions.html#object" title="object"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a> or if they have a meta-class providing similar functionality.
|
||||
Likewise, classes can turn-off descriptor invocation by overriding
|
||||
<a class="reference internal" href="../reference/datamodel.html#object.__getattribute__" title="object.__getattribute__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__getattribute__()</span></code></a>.</p>
|
||||
</div>
|
||||
<div class="section" id="descriptor-example">
|
||||
<h2><a class="toc-backref" href="#id6">Descriptor Example</a><a class="headerlink" href="#descriptor-example" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The following code creates a class whose objects are data descriptors which
|
||||
print a message for each get or set. Overriding <a class="reference internal" href="../reference/datamodel.html#object.__getattribute__" title="object.__getattribute__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__getattribute__()</span></code></a> is
|
||||
alternate approach that could do this for every attribute. However, this
|
||||
descriptor is useful for monitoring just a few chosen attributes:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">RevealAccess</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="sd">"""A data descriptor that sets and returns values</span>
|
||||
<span class="sd"> normally and prints a message logging their access.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">initval</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'var'</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val</span> <span class="o">=</span> <span class="n">initval</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__get__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">objtype</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Retrieving'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">val</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__set__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">val</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Updating'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val</span> <span class="o">=</span> <span class="n">val</span>
|
||||
|
||||
<span class="o">>>></span> <span class="k">class</span> <span class="nc">MyClass</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="o">...</span> <span class="n">x</span> <span class="o">=</span> <span class="n">RevealAccess</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="s1">'var "x"'</span><span class="p">)</span>
|
||||
<span class="o">...</span> <span class="n">y</span> <span class="o">=</span> <span class="mi">5</span>
|
||||
<span class="o">...</span>
|
||||
<span class="o">>>></span> <span class="n">m</span> <span class="o">=</span> <span class="n">MyClass</span><span class="p">()</span>
|
||||
<span class="o">>>></span> <span class="n">m</span><span class="o">.</span><span class="n">x</span>
|
||||
<span class="n">Retrieving</span> <span class="n">var</span> <span class="s2">"x"</span>
|
||||
<span class="mi">10</span>
|
||||
<span class="o">>>></span> <span class="n">m</span><span class="o">.</span><span class="n">x</span> <span class="o">=</span> <span class="mi">20</span>
|
||||
<span class="n">Updating</span> <span class="n">var</span> <span class="s2">"x"</span>
|
||||
<span class="o">>>></span> <span class="n">m</span><span class="o">.</span><span class="n">x</span>
|
||||
<span class="n">Retrieving</span> <span class="n">var</span> <span class="s2">"x"</span>
|
||||
<span class="mi">20</span>
|
||||
<span class="o">>>></span> <span class="n">m</span><span class="o">.</span><span class="n">y</span>
|
||||
<span class="mi">5</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The protocol is simple and offers exciting possibilities. Several use cases are
|
||||
so common that they have been packaged into individual function calls.
|
||||
Properties, bound methods, static methods, and class methods are all
|
||||
based on the descriptor protocol.</p>
|
||||
</div>
|
||||
<div class="section" id="properties">
|
||||
<h2><a class="toc-backref" href="#id7">Properties</a><a class="headerlink" href="#properties" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Calling <a class="reference internal" href="../library/functions.html#property" title="property"><code class="xref py py-func docutils literal notranslate"><span class="pre">property()</span></code></a> is a succinct way of building a data descriptor that
|
||||
triggers function calls upon access to an attribute. Its signature is:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="nb">property</span><span class="p">(</span><span class="n">fget</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">fset</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">fdel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">doc</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">property</span> <span class="n">attribute</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The documentation shows a typical use to define a managed attribute <code class="docutils literal notranslate"><span class="pre">x</span></code>:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">C</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">getx</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">__x</span>
|
||||
<span class="k">def</span> <span class="nf">setx</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span> <span class="bp">self</span><span class="o">.</span><span class="n">__x</span> <span class="o">=</span> <span class="n">value</span>
|
||||
<span class="k">def</span> <span class="nf">delx</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">__x</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">getx</span><span class="p">,</span> <span class="n">setx</span><span class="p">,</span> <span class="n">delx</span><span class="p">,</span> <span class="s2">"I'm the 'x' property."</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>To see how <a class="reference internal" href="../library/functions.html#property" title="property"><code class="xref py py-func docutils literal notranslate"><span class="pre">property()</span></code></a> is implemented in terms of the descriptor protocol,
|
||||
here is a pure Python equivalent:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Property</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="s2">"Emulate PyProperty_Type() in Objects/descrobject.c"</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fget</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">fset</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">fdel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">doc</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fget</span> <span class="o">=</span> <span class="n">fget</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fset</span> <span class="o">=</span> <span class="n">fset</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fdel</span> <span class="o">=</span> <span class="n">fdel</span>
|
||||
<span class="k">if</span> <span class="n">doc</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">fget</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">doc</span> <span class="o">=</span> <span class="n">fget</span><span class="o">.</span><span class="vm">__doc__</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">doc</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__get__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">objtype</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="bp">self</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">fget</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">"unreadable attribute"</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__set__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">fset</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">"can't set attribute"</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fset</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__delete__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">fdel</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">"can't delete attribute"</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fdel</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">getter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fget</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)(</span><span class="n">fget</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fset</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fdel</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__doc__</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">setter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fset</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)(</span><span class="bp">self</span><span class="o">.</span><span class="n">fget</span><span class="p">,</span> <span class="n">fset</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fdel</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__doc__</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">deleter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fdel</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)(</span><span class="bp">self</span><span class="o">.</span><span class="n">fget</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fset</span><span class="p">,</span> <span class="n">fdel</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__doc__</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The <a class="reference internal" href="../library/functions.html#property" title="property"><code class="xref py py-func docutils literal notranslate"><span class="pre">property()</span></code></a> builtin helps whenever a user interface has granted
|
||||
attribute access and then subsequent changes require the intervention of a
|
||||
method.</p>
|
||||
<p>For instance, a spreadsheet class may grant access to a cell value through
|
||||
<code class="docutils literal notranslate"><span class="pre">Cell('b10').value</span></code>. Subsequent improvements to the program require the cell
|
||||
to be recalculated on every access; however, the programmer does not want to
|
||||
affect existing client code accessing the attribute directly. The solution is
|
||||
to wrap access to the value attribute in a property data descriptor:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Cell</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="o">.</span> <span class="o">.</span> <span class="o">.</span>
|
||||
<span class="k">def</span> <span class="nf">getvalue</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="s2">"Recalculate the cell before returning value"</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">recalc</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_value</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">getvalue</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="functions-and-methods">
|
||||
<h2><a class="toc-backref" href="#id8">Functions and Methods</a><a class="headerlink" href="#functions-and-methods" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Python’s object oriented features are built upon a function based environment.
|
||||
Using non-data descriptors, the two are merged seamlessly.</p>
|
||||
<p>Class dictionaries store methods as functions. In a class definition, methods
|
||||
are written using <a class="reference internal" href="../reference/compound_stmts.html#def"><code class="xref std std-keyword docutils literal notranslate"><span class="pre">def</span></code></a> or <a class="reference internal" href="../reference/expressions.html#lambda"><code class="xref std std-keyword docutils literal notranslate"><span class="pre">lambda</span></code></a>, the usual tools for
|
||||
creating functions. Methods only differ from regular functions in that the
|
||||
first argument is reserved for the object instance. By Python convention, the
|
||||
instance reference is called <em>self</em> but may be called <em>this</em> or any other
|
||||
variable name.</p>
|
||||
<p>To support method calls, functions include the <a class="reference internal" href="../reference/datamodel.html#object.__get__" title="object.__get__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__get__()</span></code></a> method for
|
||||
binding methods during attribute access. This means that all functions are
|
||||
non-data descriptors which return bound methods when they are invoked from an
|
||||
object. In pure Python, it works like this:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Function</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="o">.</span> <span class="o">.</span> <span class="o">.</span>
|
||||
<span class="k">def</span> <span class="nf">__get__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">objtype</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="s2">"Simulate func_descr_get() in Objects/funcobject.c"</span>
|
||||
<span class="k">if</span> <span class="n">obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="bp">self</span>
|
||||
<span class="k">return</span> <span class="n">types</span><span class="o">.</span><span class="n">MethodType</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Running the interpreter shows how the function descriptor works in practice:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="k">class</span> <span class="nc">D</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="k">return</span> <span class="n">x</span>
|
||||
<span class="gp">...</span>
|
||||
<span class="gp">>>> </span><span class="n">d</span> <span class="o">=</span> <span class="n">D</span><span class="p">()</span>
|
||||
|
||||
<span class="go"># Access through the class dictionary does not invoke __get__.</span>
|
||||
<span class="go"># It just returns the underlying function object.</span>
|
||||
<span class="gp">>>> </span><span class="n">D</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">[</span><span class="s1">'f'</span><span class="p">]</span>
|
||||
<span class="go"><function D.f at 0x00C45070></span>
|
||||
|
||||
<span class="go"># Dotted access from a class calls __get__() which just returns</span>
|
||||
<span class="go"># the underlying function unchanged.</span>
|
||||
<span class="gp">>>> </span><span class="n">D</span><span class="o">.</span><span class="n">f</span>
|
||||
<span class="go"><function D.f at 0x00C45070></span>
|
||||
|
||||
<span class="go"># The function has a __qualname__ attribute to support introspection</span>
|
||||
<span class="gp">>>> </span><span class="n">D</span><span class="o">.</span><span class="n">f</span><span class="o">.</span><span class="vm">__qualname__</span>
|
||||
<span class="go">'D.f'</span>
|
||||
|
||||
<span class="go"># Dotted access from an instance calls __get__() which returns the</span>
|
||||
<span class="go"># function wrapped in a bound method object</span>
|
||||
<span class="gp">>>> </span><span class="n">d</span><span class="o">.</span><span class="n">f</span>
|
||||
<span class="go"><bound method D.f of <__main__.D object at 0x00B18C90>></span>
|
||||
|
||||
<span class="go"># Internally, the bound method stores the underlying function,</span>
|
||||
<span class="go"># the bound instance, and the class of the bound instance.</span>
|
||||
<span class="gp">>>> </span><span class="n">d</span><span class="o">.</span><span class="n">f</span><span class="o">.</span><span class="vm">__func__</span>
|
||||
<span class="go"><function D.f at 0x1012e5ae8></span>
|
||||
<span class="gp">>>> </span><span class="n">d</span><span class="o">.</span><span class="n">f</span><span class="o">.</span><span class="vm">__self__</span>
|
||||
<span class="go"><__main__.D object at 0x1012e1f98></span>
|
||||
<span class="gp">>>> </span><span class="n">d</span><span class="o">.</span><span class="n">f</span><span class="o">.</span><span class="vm">__class__</span>
|
||||
<span class="go"><class 'method'></span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="static-methods-and-class-methods">
|
||||
<h2><a class="toc-backref" href="#id9">Static Methods and Class Methods</a><a class="headerlink" href="#static-methods-and-class-methods" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Non-data descriptors provide a simple mechanism for variations on the usual
|
||||
patterns of binding functions into methods.</p>
|
||||
<p>To recap, functions have a <a class="reference internal" href="../reference/datamodel.html#object.__get__" title="object.__get__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__get__()</span></code></a> method so that they can be converted
|
||||
to a method when accessed as attributes. The non-data descriptor transforms an
|
||||
<code class="docutils literal notranslate"><span class="pre">obj.f(*args)</span></code> call into <code class="docutils literal notranslate"><span class="pre">f(obj,</span> <span class="pre">*args)</span></code>. Calling <code class="docutils literal notranslate"><span class="pre">klass.f(*args)</span></code>
|
||||
becomes <code class="docutils literal notranslate"><span class="pre">f(*args)</span></code>.</p>
|
||||
<p>This chart summarizes the binding and its two most useful variants:</p>
|
||||
<blockquote>
|
||||
<div><table class="docutils align-center">
|
||||
<colgroup>
|
||||
<col style="width: 30%" />
|
||||
<col style="width: 39%" />
|
||||
<col style="width: 32%" />
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="row-odd"><th class="head"><p>Transformation</p></th>
|
||||
<th class="head"><p>Called from an
|
||||
Object</p></th>
|
||||
<th class="head"><p>Called from a
|
||||
Class</p></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="row-even"><td><p>function</p></td>
|
||||
<td><p>f(obj, *args)</p></td>
|
||||
<td><p>f(*args)</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>staticmethod</p></td>
|
||||
<td><p>f(*args)</p></td>
|
||||
<td><p>f(*args)</p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>classmethod</p></td>
|
||||
<td><p>f(type(obj), *args)</p></td>
|
||||
<td><p>f(klass, *args)</p></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div></blockquote>
|
||||
<p>Static methods return the underlying function without changes. Calling either
|
||||
<code class="docutils literal notranslate"><span class="pre">c.f</span></code> or <code class="docutils literal notranslate"><span class="pre">C.f</span></code> is the equivalent of a direct lookup into
|
||||
<code class="docutils literal notranslate"><span class="pre">object.__getattribute__(c,</span> <span class="pre">"f")</span></code> or <code class="docutils literal notranslate"><span class="pre">object.__getattribute__(C,</span> <span class="pre">"f")</span></code>. As a
|
||||
result, the function becomes identically accessible from either an object or a
|
||||
class.</p>
|
||||
<p>Good candidates for static methods are methods that do not reference the
|
||||
<code class="docutils literal notranslate"><span class="pre">self</span></code> variable.</p>
|
||||
<p>For instance, a statistics package may include a container class for
|
||||
experimental data. The class provides normal methods for computing the average,
|
||||
mean, median, and other descriptive statistics that depend on the data. However,
|
||||
there may be useful functions which are conceptually related but do not depend
|
||||
on the data. For instance, <code class="docutils literal notranslate"><span class="pre">erf(x)</span></code> is handy conversion routine that comes up
|
||||
in statistical work but does not directly depend on a particular dataset.
|
||||
It can be called either from an object or the class: <code class="docutils literal notranslate"><span class="pre">s.erf(1.5)</span> <span class="pre">--></span> <span class="pre">.9332</span></code> or
|
||||
<code class="docutils literal notranslate"><span class="pre">Sample.erf(1.5)</span> <span class="pre">--></span> <span class="pre">.9332</span></code>.</p>
|
||||
<p>Since staticmethods return the underlying function with no changes, the example
|
||||
calls are unexciting:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="k">class</span> <span class="nc">E</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="nb">print</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||||
<span class="gp">... </span> <span class="n">f</span> <span class="o">=</span> <span class="nb">staticmethod</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
|
||||
<span class="gp">...</span>
|
||||
<span class="gp">>>> </span><span class="n">E</span><span class="o">.</span><span class="n">f</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
|
||||
<span class="go">3</span>
|
||||
<span class="gp">>>> </span><span class="n">E</span><span class="p">()</span><span class="o">.</span><span class="n">f</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
|
||||
<span class="go">3</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Using the non-data descriptor protocol, a pure Python version of
|
||||
<a class="reference internal" href="../library/functions.html#staticmethod" title="staticmethod"><code class="xref py py-func docutils literal notranslate"><span class="pre">staticmethod()</span></code></a> would look like this:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">StaticMethod</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="s2">"Emulate PyStaticMethod_Type() in Objects/funcobject.c"</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">f</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">f</span> <span class="o">=</span> <span class="n">f</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__get__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">objtype</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">f</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Unlike static methods, class methods prepend the class reference to the
|
||||
argument list before calling the function. This format is the same
|
||||
for whether the caller is an object or a class:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="k">class</span> <span class="nc">E</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">klass</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="k">return</span> <span class="n">klass</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> <span class="n">x</span>
|
||||
<span class="gp">... </span> <span class="n">f</span> <span class="o">=</span> <span class="nb">classmethod</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
|
||||
<span class="gp">...</span>
|
||||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">E</span><span class="o">.</span><span class="n">f</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span>
|
||||
<span class="go">('E', 3)</span>
|
||||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">E</span><span class="p">()</span><span class="o">.</span><span class="n">f</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span>
|
||||
<span class="go">('E', 3)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>This behavior is useful whenever the function only needs to have a class
|
||||
reference and does not care about any underlying data. One use for classmethods
|
||||
is to create alternate class constructors. In Python 2.3, the classmethod
|
||||
<a class="reference internal" href="../library/stdtypes.html#dict.fromkeys" title="dict.fromkeys"><code class="xref py py-func docutils literal notranslate"><span class="pre">dict.fromkeys()</span></code></a> creates a new dictionary from a list of keys. The pure
|
||||
Python equivalent is:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Dict</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="o">.</span> <span class="o">.</span> <span class="o">.</span>
|
||||
<span class="k">def</span> <span class="nf">fromkeys</span><span class="p">(</span><span class="n">klass</span><span class="p">,</span> <span class="n">iterable</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="s2">"Emulate dict_fromkeys() in Objects/dictobject.c"</span>
|
||||
<span class="n">d</span> <span class="o">=</span> <span class="n">klass</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">iterable</span><span class="p">:</span>
|
||||
<span class="n">d</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span>
|
||||
<span class="k">return</span> <span class="n">d</span>
|
||||
<span class="n">fromkeys</span> <span class="o">=</span> <span class="nb">classmethod</span><span class="p">(</span><span class="n">fromkeys</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Now a new dictionary of unique keys can be constructed like this:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">Dict</span><span class="o">.</span><span class="n">fromkeys</span><span class="p">(</span><span class="s1">'abracadabra'</span><span class="p">)</span>
|
||||
<span class="go">{'a': None, 'r': None, 'b': None, 'c': None, 'd': None}</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Using the non-data descriptor protocol, a pure Python version of
|
||||
<a class="reference internal" href="../library/functions.html#classmethod" title="classmethod"><code class="xref py py-func docutils literal notranslate"><span class="pre">classmethod()</span></code></a> would look like this:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">ClassMethod</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="s2">"Emulate PyClassMethod_Type() in Objects/funcobject.c"</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">f</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">f</span> <span class="o">=</span> <span class="n">f</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__get__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">klass</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">klass</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">klass</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span>
|
||||
<span class="k">def</span> <span class="nf">newfunc</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">f</span><span class="p">(</span><span class="n">klass</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">newfunc</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">Descriptor HowTo Guide</a><ul>
|
||||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||||
<li><a class="reference internal" href="#definition-and-introduction">Definition and Introduction</a></li>
|
||||
<li><a class="reference internal" href="#descriptor-protocol">Descriptor Protocol</a></li>
|
||||
<li><a class="reference internal" href="#invoking-descriptors">Invoking Descriptors</a></li>
|
||||
<li><a class="reference internal" href="#descriptor-example">Descriptor Example</a></li>
|
||||
<li><a class="reference internal" href="#properties">Properties</a></li>
|
||||
<li><a class="reference internal" href="#functions-and-methods">Functions and Methods</a></li>
|
||||
<li><a class="reference internal" href="#static-methods-and-class-methods">Static Methods and Class Methods</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="curses.html"
|
||||
title="previous chapter">Curses Programming with Python</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="functional.html"
|
||||
title="next chapter">Functional Programming HOWTO</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/descriptor.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="functional.html" title="Functional Programming HOWTO"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="curses.html" title="Curses Programming with Python"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
1350
python-3.7.4-docs-html/howto/functional.html
Normal file
1350
python-3.7.4-docs-html/howto/functional.html
Normal file
File diff suppressed because it is too large
Load Diff
205
python-3.7.4-docs-html/howto/index.html
Normal file
205
python-3.7.4-docs-html/howto/index.html
Normal file
@@ -0,0 +1,205 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Python HOWTOs — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Porting Python 2 Code to Python 3" href="pyporting.html" />
|
||||
<link rel="prev" title="Installing Python Modules" href="../installing/index.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/index.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="pyporting.html" title="Porting Python 2 Code to Python 3"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="../installing/index.html" title="Installing Python Modules"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="python-howtos">
|
||||
<h1>Python HOWTOs<a class="headerlink" href="#python-howtos" title="Permalink to this headline">¶</a></h1>
|
||||
<p>Python HOWTOs are documents that cover a single, specific topic,
|
||||
and attempt to cover it fairly completely. Modelled on the Linux
|
||||
Documentation Project’s HOWTO collection, this collection is an
|
||||
effort to foster documentation that’s more detailed than the
|
||||
Python Library Reference.</p>
|
||||
<p>Currently, the HOWTOs are:</p>
|
||||
<div class="toctree-wrapper compound">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="pyporting.html">Porting Python 2 Code to Python 3</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="cporting.html">Porting Extension Modules to Python 3</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="curses.html">Curses Programming with Python</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="descriptor.html">Descriptor HowTo Guide</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="functional.html">Functional Programming HOWTO</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="logging.html">Logging HOWTO</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="logging-cookbook.html">Logging Cookbook</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="regex.html">Regular Expression HOWTO</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="sockets.html">Socket Programming HOWTO</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="sorting.html">Sorting HOW TO</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="unicode.html">Unicode HOWTO</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="urllib2.html">HOWTO Fetch Internet Resources Using The urllib Package</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="argparse.html">Argparse Tutorial</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="ipaddress.html">An introduction to the ipaddress module</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="clinic.html">Argument Clinic How-To</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="instrumentation.html">Instrumenting CPython with DTrace and SystemTap</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="../installing/index.html"
|
||||
title="previous chapter">Installing Python Modules</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="pyporting.html"
|
||||
title="next chapter">Porting Python 2 Code to Python 3</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/index.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="pyporting.html" title="Porting Python 2 Code to Python 3"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="../installing/index.html" title="Installing Python Modules"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
598
python-3.7.4-docs-html/howto/instrumentation.html
Normal file
598
python-3.7.4-docs-html/howto/instrumentation.html
Normal file
@@ -0,0 +1,598 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Instrumenting CPython with DTrace and SystemTap — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Python Frequently Asked Questions" href="../faq/index.html" />
|
||||
<link rel="prev" title="Argument Clinic How-To" href="clinic.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/instrumentation.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="../faq/index.html" title="Python Frequently Asked Questions"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="clinic.html" title="Argument Clinic How-To"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="instrumenting-cpython-with-dtrace-and-systemtap">
|
||||
<span id="instrumentation"></span><h1>Instrumenting CPython with DTrace and SystemTap<a class="headerlink" href="#instrumenting-cpython-with-dtrace-and-systemtap" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">author</dt>
|
||||
<dd class="field-odd"><p>David Malcolm</p>
|
||||
</dd>
|
||||
<dt class="field-even">author</dt>
|
||||
<dd class="field-even"><p>Łukasz Langa</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<p>DTrace and SystemTap are monitoring tools, each providing a way to inspect
|
||||
what the processes on a computer system are doing. They both use
|
||||
domain-specific languages allowing a user to write scripts which:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p>filter which processes are to be observed</p></li>
|
||||
<li><p>gather data from the processes of interest</p></li>
|
||||
<li><p>generate reports on the data</p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>As of Python 3.6, CPython can be built with embedded “markers”, also
|
||||
known as “probes”, that can be observed by a DTrace or SystemTap script,
|
||||
making it easier to monitor what the CPython processes on a system are
|
||||
doing.</p>
|
||||
<div class="impl-detail compound">
|
||||
<p><strong>CPython implementation detail:</strong> DTrace markers are implementation details of the CPython interpreter.
|
||||
No guarantees are made about probe compatibility between versions of
|
||||
CPython. DTrace scripts can stop working or work incorrectly without
|
||||
warning when changing CPython versions.</p>
|
||||
</div>
|
||||
<div class="section" id="enabling-the-static-markers">
|
||||
<h2>Enabling the static markers<a class="headerlink" href="#enabling-the-static-markers" title="Permalink to this headline">¶</a></h2>
|
||||
<p>macOS comes with built-in support for DTrace. On Linux, in order to
|
||||
build CPython with the embedded markers for SystemTap, the SystemTap
|
||||
development tools must be installed.</p>
|
||||
<p>On a Linux machine, this can be done via:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> yum install systemtap-sdt-devel
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>or:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo apt-get install systemtap-sdt-dev
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>CPython must then be configured <code class="docutils literal notranslate"><span class="pre">--with-dtrace</span></code>:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>checking for --with-dtrace... yes
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>On macOS, you can list available DTrace probes by running a Python
|
||||
process in the background and listing all probes made available by the
|
||||
Python provider:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3.6 -q <span class="p">&</span>
|
||||
<span class="gp">$</span> sudo dtrace -l -P python<span class="nv">$!</span> <span class="c1"># or: dtrace -l -m python3.6</span>
|
||||
|
||||
<span class="go"> ID PROVIDER MODULE FUNCTION NAME</span>
|
||||
<span class="go">29564 python18035 python3.6 _PyEval_EvalFrameDefault function-entry</span>
|
||||
<span class="go">29565 python18035 python3.6 dtrace_function_entry function-entry</span>
|
||||
<span class="go">29566 python18035 python3.6 _PyEval_EvalFrameDefault function-return</span>
|
||||
<span class="go">29567 python18035 python3.6 dtrace_function_return function-return</span>
|
||||
<span class="go">29568 python18035 python3.6 collect gc-done</span>
|
||||
<span class="go">29569 python18035 python3.6 collect gc-start</span>
|
||||
<span class="go">29570 python18035 python3.6 _PyEval_EvalFrameDefault line</span>
|
||||
<span class="go">29571 python18035 python3.6 maybe_dtrace_line line</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>On Linux, you can verify if the SystemTap static markers are present in
|
||||
the built binary by seeing if it contains a “.note.stapsdt” section.</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> readelf -S ./python <span class="p">|</span> grep .note.stapsdt
|
||||
<span class="go">[30] .note.stapsdt NOTE 0000000000000000 00308d78</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>If you’ve built Python as a shared library (with –enable-shared), you
|
||||
need to look instead within the shared library. For example:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> readelf -S libpython3.3dm.so.1.0 <span class="p">|</span> grep .note.stapsdt
|
||||
<span class="go">[29] .note.stapsdt NOTE 0000000000000000 00365b68</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Sufficiently modern readelf can print the metadata:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> readelf -n ./python
|
||||
|
||||
<span class="go">Displaying notes found at file offset 0x00000254 with length 0x00000020:</span>
|
||||
<span class="go"> Owner Data size Description</span>
|
||||
<span class="go"> GNU 0x00000010 NT_GNU_ABI_TAG (ABI version tag)</span>
|
||||
<span class="go"> OS: Linux, ABI: 2.6.32</span>
|
||||
|
||||
<span class="go">Displaying notes found at file offset 0x00000274 with length 0x00000024:</span>
|
||||
<span class="go"> Owner Data size Description</span>
|
||||
<span class="go"> GNU 0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring)</span>
|
||||
<span class="go"> Build ID: df924a2b08a7e89f6e11251d4602022977af2670</span>
|
||||
|
||||
<span class="go">Displaying notes found at file offset 0x002d6c30 with length 0x00000144:</span>
|
||||
<span class="go"> Owner Data size Description</span>
|
||||
<span class="go"> stapsdt 0x00000031 NT_STAPSDT (SystemTap probe descriptors)</span>
|
||||
<span class="go"> Provider: python</span>
|
||||
<span class="go"> Name: gc__start</span>
|
||||
<span class="go"> Location: 0x00000000004371c3, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6bf6</span>
|
||||
<span class="go"> Arguments: -4@%ebx</span>
|
||||
<span class="go"> stapsdt 0x00000030 NT_STAPSDT (SystemTap probe descriptors)</span>
|
||||
<span class="go"> Provider: python</span>
|
||||
<span class="go"> Name: gc__done</span>
|
||||
<span class="go"> Location: 0x00000000004374e1, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6bf8</span>
|
||||
<span class="go"> Arguments: -8@%rax</span>
|
||||
<span class="go"> stapsdt 0x00000045 NT_STAPSDT (SystemTap probe descriptors)</span>
|
||||
<span class="go"> Provider: python</span>
|
||||
<span class="go"> Name: function__entry</span>
|
||||
<span class="go"> Location: 0x000000000053db6c, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6be8</span>
|
||||
<span class="go"> Arguments: 8@%rbp 8@%r12 -4@%eax</span>
|
||||
<span class="go"> stapsdt 0x00000046 NT_STAPSDT (SystemTap probe descriptors)</span>
|
||||
<span class="go"> Provider: python</span>
|
||||
<span class="go"> Name: function__return</span>
|
||||
<span class="go"> Location: 0x000000000053dba8, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6bea</span>
|
||||
<span class="go"> Arguments: 8@%rbp 8@%r12 -4@%eax</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The above metadata contains information for SystemTap describing how it
|
||||
can patch strategically-placed machine code instructions to enable the
|
||||
tracing hooks used by a SystemTap script.</p>
|
||||
</div>
|
||||
<div class="section" id="static-dtrace-probes">
|
||||
<h2>Static DTrace probes<a class="headerlink" href="#static-dtrace-probes" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The following example DTrace script can be used to show the call/return
|
||||
hierarchy of a Python script, only tracing within the invocation of
|
||||
a function called “start”. In other words, import-time function
|
||||
invocations are not going to be listed:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>self int indent;
|
||||
|
||||
python$target:::function-entry
|
||||
/copyinstr(arg1) == "start"/
|
||||
{
|
||||
self->trace = 1;
|
||||
}
|
||||
|
||||
python$target:::function-entry
|
||||
/self->trace/
|
||||
{
|
||||
printf("%d\t%*s:", timestamp, 15, probename);
|
||||
printf("%*s", self->indent, "");
|
||||
printf("%s:%s:%d\n", basename(copyinstr(arg0)), copyinstr(arg1), arg2);
|
||||
self->indent++;
|
||||
}
|
||||
|
||||
python$target:::function-return
|
||||
/self->trace/
|
||||
{
|
||||
self->indent--;
|
||||
printf("%d\t%*s:", timestamp, 15, probename);
|
||||
printf("%*s", self->indent, "");
|
||||
printf("%s:%s:%d\n", basename(copyinstr(arg0)), copyinstr(arg1), arg2);
|
||||
}
|
||||
|
||||
python$target:::function-return
|
||||
/copyinstr(arg1) == "start"/
|
||||
{
|
||||
self->trace = 0;
|
||||
}
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>It can be invoked like this:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo dtrace -q -s call_stack.d -c <span class="s2">"python3.6 script.py"</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The output looks like this:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>156641360502280 function-entry:call_stack.py:start:23
|
||||
156641360518804 function-entry: call_stack.py:function_1:1
|
||||
156641360532797 function-entry: call_stack.py:function_3:9
|
||||
156641360546807 function-return: call_stack.py:function_3:10
|
||||
156641360563367 function-return: call_stack.py:function_1:2
|
||||
156641360578365 function-entry: call_stack.py:function_2:5
|
||||
156641360591757 function-entry: call_stack.py:function_1:1
|
||||
156641360605556 function-entry: call_stack.py:function_3:9
|
||||
156641360617482 function-return: call_stack.py:function_3:10
|
||||
156641360629814 function-return: call_stack.py:function_1:2
|
||||
156641360642285 function-return: call_stack.py:function_2:6
|
||||
156641360656770 function-entry: call_stack.py:function_3:9
|
||||
156641360669707 function-return: call_stack.py:function_3:10
|
||||
156641360687853 function-entry: call_stack.py:function_4:13
|
||||
156641360700719 function-return: call_stack.py:function_4:14
|
||||
156641360719640 function-entry: call_stack.py:function_5:18
|
||||
156641360732567 function-return: call_stack.py:function_5:21
|
||||
156641360747370 function-return:call_stack.py:start:28
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="static-systemtap-markers">
|
||||
<h2>Static SystemTap markers<a class="headerlink" href="#static-systemtap-markers" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The low-level way to use the SystemTap integration is to use the static
|
||||
markers directly. This requires you to explicitly state the binary file
|
||||
containing them.</p>
|
||||
<p>For example, this SystemTap script can be used to show the call/return
|
||||
hierarchy of a Python script:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>probe process("python").mark("function__entry") {
|
||||
filename = user_string($arg1);
|
||||
funcname = user_string($arg2);
|
||||
lineno = $arg3;
|
||||
|
||||
printf("%s => %s in %s:%d\\n",
|
||||
thread_indent(1), funcname, filename, lineno);
|
||||
}
|
||||
|
||||
probe process("python").mark("function__return") {
|
||||
filename = user_string($arg1);
|
||||
funcname = user_string($arg2);
|
||||
lineno = $arg3;
|
||||
|
||||
printf("%s <= %s in %s:%d\\n",
|
||||
thread_indent(-1), funcname, filename, lineno);
|
||||
}
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>It can be invoked like this:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> stap <span class="se">\</span>
|
||||
show-call-hierarchy.stp <span class="se">\</span>
|
||||
-c <span class="s2">"./python test.py"</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The output looks like this:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>11408 python(8274): => __contains__ in Lib/_abcoll.py:362
|
||||
11414 python(8274): => __getitem__ in Lib/os.py:425
|
||||
11418 python(8274): => encode in Lib/os.py:490
|
||||
11424 python(8274): <= encode in Lib/os.py:493
|
||||
11428 python(8274): <= __getitem__ in Lib/os.py:426
|
||||
11433 python(8274): <= __contains__ in Lib/_abcoll.py:366
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>where the columns are:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p>time in microseconds since start of script</p></li>
|
||||
<li><p>name of executable</p></li>
|
||||
<li><p>PID of process</p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>and the remainder indicates the call/return hierarchy as the script executes.</p>
|
||||
<p>For a <cite>–enable-shared</cite> build of CPython, the markers are contained within the
|
||||
libpython shared library, and the probe’s dotted path needs to reflect this. For
|
||||
example, this line from the above example:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>probe process("python").mark("function__entry") {
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>should instead read:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>probe process("python").library("libpython3.6dm.so.1.0").mark("function__entry") {
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>(assuming a debug build of CPython 3.6)</p>
|
||||
</div>
|
||||
<div class="section" id="available-static-markers">
|
||||
<h2>Available static markers<a class="headerlink" href="#available-static-markers" title="Permalink to this headline">¶</a></h2>
|
||||
<dl class="function">
|
||||
<dt id="c.function__entry">
|
||||
<code class="descname">function__entry</code><span class="sig-paren">(</span>str<em> filename</em>, str<em> funcname</em>, int<em> lineno</em><span class="sig-paren">)</span><a class="headerlink" href="#c.function__entry" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>This marker indicates that execution of a Python function has begun.
|
||||
It is only triggered for pure-Python (bytecode) functions.</p>
|
||||
<p>The filename, function name, and line number are provided back to the
|
||||
tracing script as positional arguments, which must be accessed using
|
||||
<code class="docutils literal notranslate"><span class="pre">$arg1</span></code>, <code class="docutils literal notranslate"><span class="pre">$arg2</span></code>, <code class="docutils literal notranslate"><span class="pre">$arg3</span></code>:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">$arg1</span></code> : <code class="docutils literal notranslate"><span class="pre">(const</span> <span class="pre">char</span> <span class="pre">*)</span></code> filename, accessible using <code class="docutils literal notranslate"><span class="pre">user_string($arg1)</span></code></p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">$arg2</span></code> : <code class="docutils literal notranslate"><span class="pre">(const</span> <span class="pre">char</span> <span class="pre">*)</span></code> function name, accessible using
|
||||
<code class="docutils literal notranslate"><span class="pre">user_string($arg2)</span></code></p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">$arg3</span></code> : <code class="docutils literal notranslate"><span class="pre">int</span></code> line number</p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="function">
|
||||
<dt id="c.function__return">
|
||||
<code class="descname">function__return</code><span class="sig-paren">(</span>str<em> filename</em>, str<em> funcname</em>, int<em> lineno</em><span class="sig-paren">)</span><a class="headerlink" href="#c.function__return" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>This marker is the converse of <a class="reference internal" href="#c.function__entry" title="function__entry"><code class="xref c c-func docutils literal notranslate"><span class="pre">function__entry()</span></code></a>, and indicates that
|
||||
execution of a Python function has ended (either via <code class="docutils literal notranslate"><span class="pre">return</span></code>, or via an
|
||||
exception). It is only triggered for pure-Python (bytecode) functions.</p>
|
||||
<p>The arguments are the same as for <a class="reference internal" href="#c.function__entry" title="function__entry"><code class="xref c c-func docutils literal notranslate"><span class="pre">function__entry()</span></code></a></p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="function">
|
||||
<dt id="c.line">
|
||||
<code class="descname">line</code><span class="sig-paren">(</span>str<em> filename</em>, str<em> funcname</em>, int<em> lineno</em><span class="sig-paren">)</span><a class="headerlink" href="#c.line" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>This marker indicates a Python line is about to be executed. It is
|
||||
the equivalent of line-by-line tracing with a Python profiler. It is
|
||||
not triggered within C functions.</p>
|
||||
<p>The arguments are the same as for <a class="reference internal" href="#c.function__entry" title="function__entry"><code class="xref c c-func docutils literal notranslate"><span class="pre">function__entry()</span></code></a>.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="function">
|
||||
<dt id="c.gc__start">
|
||||
<code class="descname">gc__start</code><span class="sig-paren">(</span>int<em> generation</em><span class="sig-paren">)</span><a class="headerlink" href="#c.gc__start" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Fires when the Python interpreter starts a garbage collection cycle.
|
||||
<code class="docutils literal notranslate"><span class="pre">arg0</span></code> is the generation to scan, like <a class="reference internal" href="../library/gc.html#gc.collect" title="gc.collect"><code class="xref py py-func docutils literal notranslate"><span class="pre">gc.collect()</span></code></a>.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="function">
|
||||
<dt id="c.gc__done">
|
||||
<code class="descname">gc__done</code><span class="sig-paren">(</span>long<em> collected</em><span class="sig-paren">)</span><a class="headerlink" href="#c.gc__done" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Fires when the Python interpreter finishes a garbage collection
|
||||
cycle. <code class="docutils literal notranslate"><span class="pre">arg0</span></code> is the number of collected objects.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="function">
|
||||
<dt id="c.import__find__load__start">
|
||||
<code class="descname">import__find__load__start</code><span class="sig-paren">(</span>str<em> modulename</em><span class="sig-paren">)</span><a class="headerlink" href="#c.import__find__load__start" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Fires before <a class="reference internal" href="../library/importlib.html#module-importlib" title="importlib: The implementation of the import machinery."><code class="xref py py-mod docutils literal notranslate"><span class="pre">importlib</span></code></a> attempts to find and load the module.
|
||||
<code class="docutils literal notranslate"><span class="pre">arg0</span></code> is the module name.</p>
|
||||
<div class="versionadded">
|
||||
<p><span class="versionmodified added">New in version 3.7.</span></p>
|
||||
</div>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="function">
|
||||
<dt id="c.import__find__load__done">
|
||||
<code class="descname">import__find__load__done</code><span class="sig-paren">(</span>str<em> modulename</em>, int<em> found</em><span class="sig-paren">)</span><a class="headerlink" href="#c.import__find__load__done" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Fires after <a class="reference internal" href="../library/importlib.html#module-importlib" title="importlib: The implementation of the import machinery."><code class="xref py py-mod docutils literal notranslate"><span class="pre">importlib</span></code></a>’s find_and_load function is called.
|
||||
<code class="docutils literal notranslate"><span class="pre">arg0</span></code> is the module name, <code class="docutils literal notranslate"><span class="pre">arg1</span></code> indicates if module was
|
||||
successfully loaded.</p>
|
||||
<div class="versionadded">
|
||||
<p><span class="versionmodified added">New in version 3.7.</span></p>
|
||||
</div>
|
||||
</dd></dl>
|
||||
|
||||
</div>
|
||||
<div class="section" id="systemtap-tapsets">
|
||||
<h2>SystemTap Tapsets<a class="headerlink" href="#systemtap-tapsets" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The higher-level way to use the SystemTap integration is to use a “tapset”:
|
||||
SystemTap’s equivalent of a library, which hides some of the lower-level
|
||||
details of the static markers.</p>
|
||||
<p>Here is a tapset file, based on a non-shared build of CPython:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>/*
|
||||
Provide a higher-level wrapping around the function__entry and
|
||||
function__return markers:
|
||||
\*/
|
||||
probe python.function.entry = process("python").mark("function__entry")
|
||||
{
|
||||
filename = user_string($arg1);
|
||||
funcname = user_string($arg2);
|
||||
lineno = $arg3;
|
||||
frameptr = $arg4
|
||||
}
|
||||
probe python.function.return = process("python").mark("function__return")
|
||||
{
|
||||
filename = user_string($arg1);
|
||||
funcname = user_string($arg2);
|
||||
lineno = $arg3;
|
||||
frameptr = $arg4
|
||||
}
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>If this file is installed in SystemTap’s tapset directory (e.g.
|
||||
<code class="docutils literal notranslate"><span class="pre">/usr/share/systemtap/tapset</span></code>), then these additional probepoints become
|
||||
available:</p>
|
||||
<dl class="function">
|
||||
<dt id="c.python.function.entry">
|
||||
<code class="descname">python.function.entry</code><span class="sig-paren">(</span>str<em> filename</em>, str<em> funcname</em>, int<em> lineno</em>, frameptr<span class="sig-paren">)</span><a class="headerlink" href="#c.python.function.entry" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>This probe point indicates that execution of a Python function has begun.
|
||||
It is only triggered for pure-Python (bytecode) functions.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="function">
|
||||
<dt id="c.python.function.return">
|
||||
<code class="descname">python.function.return</code><span class="sig-paren">(</span>str<em> filename</em>, str<em> funcname</em>, int<em> lineno</em>, frameptr<span class="sig-paren">)</span><a class="headerlink" href="#c.python.function.return" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>This probe point is the converse of <a class="reference internal" href="#c.python.function.return" title="python.function.return"><code class="xref c c-func docutils literal notranslate"><span class="pre">python.function.return()</span></code></a>, and
|
||||
indicates that execution of a Python function has ended (either via
|
||||
<code class="docutils literal notranslate"><span class="pre">return</span></code>, or via an exception). It is only triggered for pure-Python
|
||||
(bytecode) functions.</p>
|
||||
</dd></dl>
|
||||
|
||||
</div>
|
||||
<div class="section" id="examples">
|
||||
<h2>Examples<a class="headerlink" href="#examples" title="Permalink to this headline">¶</a></h2>
|
||||
<p>This SystemTap script uses the tapset above to more cleanly implement the
|
||||
example given above of tracing the Python function-call hierarchy, without
|
||||
needing to directly name the static markers:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>probe python.function.entry
|
||||
{
|
||||
printf("%s => %s in %s:%d\n",
|
||||
thread_indent(1), funcname, filename, lineno);
|
||||
}
|
||||
|
||||
probe python.function.return
|
||||
{
|
||||
printf("%s <= %s in %s:%d\n",
|
||||
thread_indent(-1), funcname, filename, lineno);
|
||||
}
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The following script uses the tapset above to provide a top-like view of all
|
||||
running CPython code, showing the top 20 most frequently-entered bytecode
|
||||
frames, each second, across the whole system:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>global fn_calls;
|
||||
|
||||
probe python.function.entry
|
||||
{
|
||||
fn_calls[pid(), filename, funcname, lineno] += 1;
|
||||
}
|
||||
|
||||
probe timer.ms(1000) {
|
||||
printf("\033[2J\033[1;1H") /* clear screen \*/
|
||||
printf("%6s %80s %6s %30s %6s\n",
|
||||
"PID", "FILENAME", "LINE", "FUNCTION", "CALLS")
|
||||
foreach ([pid, filename, funcname, lineno] in fn_calls- limit 20) {
|
||||
printf("%6d %80s %6d %30s %6d\n",
|
||||
pid, filename, lineno, funcname,
|
||||
fn_calls[pid, filename, funcname, lineno]);
|
||||
}
|
||||
delete fn_calls;
|
||||
}
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">Instrumenting CPython with DTrace and SystemTap</a><ul>
|
||||
<li><a class="reference internal" href="#enabling-the-static-markers">Enabling the static markers</a></li>
|
||||
<li><a class="reference internal" href="#static-dtrace-probes">Static DTrace probes</a></li>
|
||||
<li><a class="reference internal" href="#static-systemtap-markers">Static SystemTap markers</a></li>
|
||||
<li><a class="reference internal" href="#available-static-markers">Available static markers</a></li>
|
||||
<li><a class="reference internal" href="#systemtap-tapsets">SystemTap Tapsets</a></li>
|
||||
<li><a class="reference internal" href="#examples">Examples</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="clinic.html"
|
||||
title="previous chapter">Argument Clinic How-To</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="../faq/index.html"
|
||||
title="next chapter">Python Frequently Asked Questions</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/instrumentation.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="../faq/index.html" title="Python Frequently Asked Questions"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="clinic.html" title="Argument Clinic How-To"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
510
python-3.7.4-docs-html/howto/ipaddress.html
Normal file
510
python-3.7.4-docs-html/howto/ipaddress.html
Normal file
@@ -0,0 +1,510 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>An introduction to the ipaddress module — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Argument Clinic How-To" href="clinic.html" />
|
||||
<link rel="prev" title="Argparse Tutorial" href="argparse.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/ipaddress.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="clinic.html" title="Argument Clinic How-To"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="argparse.html" title="Argparse Tutorial"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="an-introduction-to-the-ipaddress-module">
|
||||
<span id="ipaddress-howto"></span><h1>An introduction to the ipaddress module<a class="headerlink" href="#an-introduction-to-the-ipaddress-module" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">author</dt>
|
||||
<dd class="field-odd"><p>Peter Moody</p>
|
||||
</dd>
|
||||
<dt class="field-even">author</dt>
|
||||
<dd class="field-even"><p>Nick Coghlan</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<div class="topic">
|
||||
<p class="topic-title first">Overview</p>
|
||||
<p>This document aims to provide a gentle introduction to the
|
||||
<a class="reference internal" href="../library/ipaddress.html#module-ipaddress" title="ipaddress: IPv4/IPv6 manipulation library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">ipaddress</span></code></a> module. It is aimed primarily at users that aren’t
|
||||
already familiar with IP networking terminology, but may also be useful
|
||||
to network engineers wanting an overview of how <a class="reference internal" href="../library/ipaddress.html#module-ipaddress" title="ipaddress: IPv4/IPv6 manipulation library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">ipaddress</span></code></a>
|
||||
represents IP network addressing concepts.</p>
|
||||
</div>
|
||||
<div class="section" id="creating-address-network-interface-objects">
|
||||
<h2>Creating Address/Network/Interface objects<a class="headerlink" href="#creating-address-network-interface-objects" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Since <a class="reference internal" href="../library/ipaddress.html#module-ipaddress" title="ipaddress: IPv4/IPv6 manipulation library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">ipaddress</span></code></a> is a module for inspecting and manipulating IP addresses,
|
||||
the first thing you’ll want to do is create some objects. You can use
|
||||
<a class="reference internal" href="../library/ipaddress.html#module-ipaddress" title="ipaddress: IPv4/IPv6 manipulation library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">ipaddress</span></code></a> to create objects from strings and integers.</p>
|
||||
<div class="section" id="a-note-on-ip-versions">
|
||||
<h3>A Note on IP Versions<a class="headerlink" href="#a-note-on-ip-versions" title="Permalink to this headline">¶</a></h3>
|
||||
<p>For readers that aren’t particularly familiar with IP addressing, it’s
|
||||
important to know that the Internet Protocol is currently in the process
|
||||
of moving from version 4 of the protocol to version 6. This transition is
|
||||
occurring largely because version 4 of the protocol doesn’t provide enough
|
||||
addresses to handle the needs of the whole world, especially given the
|
||||
increasing number of devices with direct connections to the internet.</p>
|
||||
<p>Explaining the details of the differences between the two versions of the
|
||||
protocol is beyond the scope of this introduction, but readers need to at
|
||||
least be aware that these two versions exist, and it will sometimes be
|
||||
necessary to force the use of one version or the other.</p>
|
||||
</div>
|
||||
<div class="section" id="ip-host-addresses">
|
||||
<h3>IP Host Addresses<a class="headerlink" href="#ip-host-addresses" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Addresses, often referred to as “host addresses” are the most basic unit
|
||||
when working with IP addressing. The simplest way to create addresses is
|
||||
to use the <a class="reference internal" href="../library/ipaddress.html#ipaddress.ip_address" title="ipaddress.ip_address"><code class="xref py py-func docutils literal notranslate"><span class="pre">ipaddress.ip_address()</span></code></a> factory function, which automatically
|
||||
determines whether to create an IPv4 or IPv6 address based on the passed in
|
||||
value:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="s1">'192.0.2.1'</span><span class="p">)</span>
|
||||
<span class="go">IPv4Address('192.0.2.1')</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="s1">'2001:DB8::1'</span><span class="p">)</span>
|
||||
<span class="go">IPv6Address('2001:db8::1')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Addresses can also be created directly from integers. Values that will
|
||||
fit within 32 bits are assumed to be IPv4 addresses:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="mi">3221225985</span><span class="p">)</span>
|
||||
<span class="go">IPv4Address('192.0.2.1')</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="mi">42540766411282592856903984951653826561</span><span class="p">)</span>
|
||||
<span class="go">IPv6Address('2001:db8::1')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>To force the use of IPv4 or IPv6 addresses, the relevant classes can be
|
||||
invoked directly. This is particularly useful to force creation of IPv6
|
||||
addresses for small integers:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="go">IPv4Address('0.0.0.1')</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">IPv4Address</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="go">IPv4Address('0.0.0.1')</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">IPv6Address</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="go">IPv6Address('::1')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="defining-networks">
|
||||
<h3>Defining Networks<a class="headerlink" href="#defining-networks" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Host addresses are usually grouped together into IP networks, so
|
||||
<a class="reference internal" href="../library/ipaddress.html#module-ipaddress" title="ipaddress: IPv4/IPv6 manipulation library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">ipaddress</span></code></a> provides a way to create, inspect and manipulate network
|
||||
definitions. IP network objects are constructed from strings that define the
|
||||
range of host addresses that are part of that network. The simplest form
|
||||
for that information is a “network address/network prefix” pair, where the
|
||||
prefix defines the number of leading bits that are compared to determine
|
||||
whether or not an address is part of the network and the network address
|
||||
defines the expected value of those bits.</p>
|
||||
<p>As for addresses, a factory function is provided that determines the correct
|
||||
IP version automatically:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'192.0.2.0/24'</span><span class="p">)</span>
|
||||
<span class="go">IPv4Network('192.0.2.0/24')</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'2001:db8::0/96'</span><span class="p">)</span>
|
||||
<span class="go">IPv6Network('2001:db8::/96')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Network objects cannot have any host bits set. The practical effect of this
|
||||
is that <code class="docutils literal notranslate"><span class="pre">192.0.2.1/24</span></code> does not describe a network. Such definitions are
|
||||
referred to as interface objects since the ip-on-a-network notation is
|
||||
commonly used to describe network interfaces of a computer on a given network
|
||||
and are described further in the next section.</p>
|
||||
<p>By default, attempting to create a network object with host bits set will
|
||||
result in <a class="reference internal" href="../library/exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> being raised. To request that the
|
||||
additional bits instead be coerced to zero, the flag <code class="docutils literal notranslate"><span class="pre">strict=False</span></code> can
|
||||
be passed to the constructor:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'192.0.2.1/24'</span><span class="p">)</span>
|
||||
<span class="gt">Traceback (most recent call last):</span>
|
||||
<span class="c">...</span>
|
||||
<span class="gr">ValueError</span>: <span class="n">192.0.2.1/24 has host bits set</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'192.0.2.1/24'</span><span class="p">,</span> <span class="n">strict</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="go">IPv4Network('192.0.2.0/24')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>While the string form offers significantly more flexibility, networks can
|
||||
also be defined with integers, just like host addresses. In this case, the
|
||||
network is considered to contain only the single address identified by the
|
||||
integer, so the network prefix includes the entire network address:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="mi">3221225984</span><span class="p">)</span>
|
||||
<span class="go">IPv4Network('192.0.2.0/32')</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="mi">42540766411282592856903984951653826560</span><span class="p">)</span>
|
||||
<span class="go">IPv6Network('2001:db8::/128')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>As with addresses, creation of a particular kind of network can be forced
|
||||
by calling the class constructor directly instead of using the factory
|
||||
function.</p>
|
||||
</div>
|
||||
<div class="section" id="host-interfaces">
|
||||
<h3>Host Interfaces<a class="headerlink" href="#host-interfaces" title="Permalink to this headline">¶</a></h3>
|
||||
<p>As mentioned just above, if you need to describe an address on a particular
|
||||
network, neither the address nor the network classes are sufficient.
|
||||
Notation like <code class="docutils literal notranslate"><span class="pre">192.0.2.1/24</span></code> is commonly used by network engineers and the
|
||||
people who write tools for firewalls and routers as shorthand for “the host
|
||||
<code class="docutils literal notranslate"><span class="pre">192.0.2.1</span></code> on the network <code class="docutils literal notranslate"><span class="pre">192.0.2.0/24</span></code>”, Accordingly, <a class="reference internal" href="../library/ipaddress.html#module-ipaddress" title="ipaddress: IPv4/IPv6 manipulation library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">ipaddress</span></code></a>
|
||||
provides a set of hybrid classes that associate an address with a particular
|
||||
network. The interface for creation is identical to that for defining network
|
||||
objects, except that the address portion isn’t constrained to being a network
|
||||
address.</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_interface</span><span class="p">(</span><span class="s1">'192.0.2.1/24'</span><span class="p">)</span>
|
||||
<span class="go">IPv4Interface('192.0.2.1/24')</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_interface</span><span class="p">(</span><span class="s1">'2001:db8::1/96'</span><span class="p">)</span>
|
||||
<span class="go">IPv6Interface('2001:db8::1/96')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Integer inputs are accepted (as with networks), and use of a particular IP
|
||||
version can be forced by calling the relevant constructor directly.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="inspecting-address-network-interface-objects">
|
||||
<h2>Inspecting Address/Network/Interface Objects<a class="headerlink" href="#inspecting-address-network-interface-objects" title="Permalink to this headline">¶</a></h2>
|
||||
<p>You’ve gone to the trouble of creating an IPv(4|6)(Address|Network|Interface)
|
||||
object, so you probably want to get information about it. <a class="reference internal" href="../library/ipaddress.html#module-ipaddress" title="ipaddress: IPv4/IPv6 manipulation library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">ipaddress</span></code></a>
|
||||
tries to make doing this easy and intuitive.</p>
|
||||
<p>Extracting the IP version:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">addr4</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="s1">'192.0.2.1'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">addr6</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="s1">'2001:db8::1'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">addr6</span><span class="o">.</span><span class="n">version</span>
|
||||
<span class="go">6</span>
|
||||
<span class="gp">>>> </span><span class="n">addr4</span><span class="o">.</span><span class="n">version</span>
|
||||
<span class="go">4</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Obtaining the network from an interface:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">host4</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_interface</span><span class="p">(</span><span class="s1">'192.0.2.1/24'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">host4</span><span class="o">.</span><span class="n">network</span>
|
||||
<span class="go">IPv4Network('192.0.2.0/24')</span>
|
||||
<span class="gp">>>> </span><span class="n">host6</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_interface</span><span class="p">(</span><span class="s1">'2001:db8::1/96'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">host6</span><span class="o">.</span><span class="n">network</span>
|
||||
<span class="go">IPv6Network('2001:db8::/96')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Finding out how many individual addresses are in a network:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">net4</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'192.0.2.0/24'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">net4</span><span class="o">.</span><span class="n">num_addresses</span>
|
||||
<span class="go">256</span>
|
||||
<span class="gp">>>> </span><span class="n">net6</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'2001:db8::0/96'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">net6</span><span class="o">.</span><span class="n">num_addresses</span>
|
||||
<span class="go">4294967296</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Iterating through the “usable” addresses on a network:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">net4</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'192.0.2.0/24'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">net4</span><span class="o">.</span><span class="n">hosts</span><span class="p">():</span>
|
||||
<span class="gp">... </span> <span class="nb">print</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||||
<span class="go">192.0.2.1</span>
|
||||
<span class="go">192.0.2.2</span>
|
||||
<span class="go">192.0.2.3</span>
|
||||
<span class="go">192.0.2.4</span>
|
||||
<span class="gp">...</span>
|
||||
<span class="go">192.0.2.252</span>
|
||||
<span class="go">192.0.2.253</span>
|
||||
<span class="go">192.0.2.254</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Obtaining the netmask (i.e. set bits corresponding to the network prefix) or
|
||||
the hostmask (any bits that are not part of the netmask):</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">net4</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'192.0.2.0/24'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">net4</span><span class="o">.</span><span class="n">netmask</span>
|
||||
<span class="go">IPv4Address('255.255.255.0')</span>
|
||||
<span class="gp">>>> </span><span class="n">net4</span><span class="o">.</span><span class="n">hostmask</span>
|
||||
<span class="go">IPv4Address('0.0.0.255')</span>
|
||||
<span class="gp">>>> </span><span class="n">net6</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'2001:db8::0/96'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">net6</span><span class="o">.</span><span class="n">netmask</span>
|
||||
<span class="go">IPv6Address('ffff:ffff:ffff:ffff:ffff:ffff::')</span>
|
||||
<span class="gp">>>> </span><span class="n">net6</span><span class="o">.</span><span class="n">hostmask</span>
|
||||
<span class="go">IPv6Address('::ffff:ffff')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Exploding or compressing the address:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">addr6</span><span class="o">.</span><span class="n">exploded</span>
|
||||
<span class="go">'2001:0db8:0000:0000:0000:0000:0000:0001'</span>
|
||||
<span class="gp">>>> </span><span class="n">addr6</span><span class="o">.</span><span class="n">compressed</span>
|
||||
<span class="go">'2001:db8::1'</span>
|
||||
<span class="gp">>>> </span><span class="n">net6</span><span class="o">.</span><span class="n">exploded</span>
|
||||
<span class="go">'2001:0db8:0000:0000:0000:0000:0000:0000/96'</span>
|
||||
<span class="gp">>>> </span><span class="n">net6</span><span class="o">.</span><span class="n">compressed</span>
|
||||
<span class="go">'2001:db8::/96'</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>While IPv4 doesn’t support explosion or compression, the associated objects
|
||||
still provide the relevant properties so that version neutral code can
|
||||
easily ensure the most concise or most verbose form is used for IPv6
|
||||
addresses while still correctly handling IPv4 addresses.</p>
|
||||
</div>
|
||||
<div class="section" id="networks-as-lists-of-addresses">
|
||||
<h2>Networks as lists of Addresses<a class="headerlink" href="#networks-as-lists-of-addresses" title="Permalink to this headline">¶</a></h2>
|
||||
<p>It’s sometimes useful to treat networks as lists. This means it is possible
|
||||
to index them like this:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">net4</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="go">IPv4Address('192.0.2.1')</span>
|
||||
<span class="gp">>>> </span><span class="n">net4</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="go">IPv4Address('192.0.2.255')</span>
|
||||
<span class="gp">>>> </span><span class="n">net6</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="go">IPv6Address('2001:db8::1')</span>
|
||||
<span class="gp">>>> </span><span class="n">net6</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="go">IPv6Address('2001:db8::ffff:ffff')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>It also means that network objects lend themselves to using the list
|
||||
membership test syntax like this:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">if</span> <span class="n">address</span> <span class="ow">in</span> <span class="n">network</span><span class="p">:</span>
|
||||
<span class="c1"># do something</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Containment testing is done efficiently based on the network prefix:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">addr4</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="s1">'192.0.2.1'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">addr4</span> <span class="ow">in</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'192.0.2.0/24'</span><span class="p">)</span>
|
||||
<span class="go">True</span>
|
||||
<span class="gp">>>> </span><span class="n">addr4</span> <span class="ow">in</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s1">'192.0.3.0/24'</span><span class="p">)</span>
|
||||
<span class="go">False</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="comparisons">
|
||||
<h2>Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this headline">¶</a></h2>
|
||||
<p><a class="reference internal" href="../library/ipaddress.html#module-ipaddress" title="ipaddress: IPv4/IPv6 manipulation library."><code class="xref py py-mod docutils literal notranslate"><span class="pre">ipaddress</span></code></a> provides some simple, hopefully intuitive ways to compare
|
||||
objects, where it makes sense:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="s1">'192.0.2.1'</span><span class="p">)</span> <span class="o"><</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="s1">'192.0.2.2'</span><span class="p">)</span>
|
||||
<span class="go">True</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>A <a class="reference internal" href="../library/exceptions.html#TypeError" title="TypeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> exception is raised if you try to compare objects of
|
||||
different versions or different types.</p>
|
||||
</div>
|
||||
<div class="section" id="using-ip-addresses-with-other-modules">
|
||||
<h2>Using IP Addresses with other modules<a class="headerlink" href="#using-ip-addresses-with-other-modules" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Other modules that use IP addresses (such as <a class="reference internal" href="../library/socket.html#module-socket" title="socket: Low-level networking interface."><code class="xref py py-mod docutils literal notranslate"><span class="pre">socket</span></code></a>) usually won’t
|
||||
accept objects from this module directly. Instead, they must be coerced to
|
||||
an integer or string that the other module will accept:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">addr4</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="s1">'192.0.2.1'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="nb">str</span><span class="p">(</span><span class="n">addr4</span><span class="p">)</span>
|
||||
<span class="go">'192.0.2.1'</span>
|
||||
<span class="gp">>>> </span><span class="nb">int</span><span class="p">(</span><span class="n">addr4</span><span class="p">)</span>
|
||||
<span class="go">3221225985</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="getting-more-detail-when-instance-creation-fails">
|
||||
<h2>Getting more detail when instance creation fails<a class="headerlink" href="#getting-more-detail-when-instance-creation-fails" title="Permalink to this headline">¶</a></h2>
|
||||
<p>When creating address/network/interface objects using the version-agnostic
|
||||
factory functions, any errors will be reported as <a class="reference internal" href="../library/exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> with
|
||||
a generic error message that simply says the passed in value was not
|
||||
recognized as an object of that type. The lack of a specific error is
|
||||
because it’s necessary to know whether the value is <em>supposed</em> to be IPv4
|
||||
or IPv6 in order to provide more detail on why it has been rejected.</p>
|
||||
<p>To support use cases where it is useful to have access to this additional
|
||||
detail, the individual class constructors actually raise the
|
||||
<a class="reference internal" href="../library/exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> subclasses <a class="reference internal" href="../library/ipaddress.html#ipaddress.AddressValueError" title="ipaddress.AddressValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ipaddress.AddressValueError</span></code></a> and
|
||||
<a class="reference internal" href="../library/ipaddress.html#ipaddress.NetmaskValueError" title="ipaddress.NetmaskValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ipaddress.NetmaskValueError</span></code></a> to indicate exactly which part of
|
||||
the definition failed to parse correctly.</p>
|
||||
<p>The error messages are significantly more detailed when using the
|
||||
class constructors directly. For example:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_address</span><span class="p">(</span><span class="s2">"192.168.0.256"</span><span class="p">)</span>
|
||||
<span class="gt">Traceback (most recent call last):</span>
|
||||
<span class="c">...</span>
|
||||
<span class="gr">ValueError</span>: <span class="n">'192.168.0.256' does not appear to be an IPv4 or IPv6 address</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">IPv4Address</span><span class="p">(</span><span class="s2">"192.168.0.256"</span><span class="p">)</span>
|
||||
<span class="gt">Traceback (most recent call last):</span>
|
||||
<span class="c">...</span>
|
||||
<span class="gr">ipaddress.AddressValueError</span>: <span class="n">Octet 256 (> 255) not permitted in '192.168.0.256'</span>
|
||||
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">ip_network</span><span class="p">(</span><span class="s2">"192.168.0.1/64"</span><span class="p">)</span>
|
||||
<span class="gt">Traceback (most recent call last):</span>
|
||||
<span class="c">...</span>
|
||||
<span class="gr">ValueError</span>: <span class="n">'192.168.0.1/64' does not appear to be an IPv4 or IPv6 network</span>
|
||||
<span class="gp">>>> </span><span class="n">ipaddress</span><span class="o">.</span><span class="n">IPv4Network</span><span class="p">(</span><span class="s2">"192.168.0.1/64"</span><span class="p">)</span>
|
||||
<span class="gt">Traceback (most recent call last):</span>
|
||||
<span class="c">...</span>
|
||||
<span class="gr">ipaddress.NetmaskValueError</span>: <span class="n">'64' is not a valid netmask</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>However, both of the module specific exceptions have <a class="reference internal" href="../library/exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> as their
|
||||
parent class, so if you’re not concerned with the particular type of error,
|
||||
you can still write code like the following:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">network</span> <span class="o">=</span> <span class="n">ipaddress</span><span class="o">.</span><span class="n">IPv4Network</span><span class="p">(</span><span class="n">address</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'address/netmask is invalid for IPv4:'</span><span class="p">,</span> <span class="n">address</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">An introduction to the ipaddress module</a><ul>
|
||||
<li><a class="reference internal" href="#creating-address-network-interface-objects">Creating Address/Network/Interface objects</a><ul>
|
||||
<li><a class="reference internal" href="#a-note-on-ip-versions">A Note on IP Versions</a></li>
|
||||
<li><a class="reference internal" href="#ip-host-addresses">IP Host Addresses</a></li>
|
||||
<li><a class="reference internal" href="#defining-networks">Defining Networks</a></li>
|
||||
<li><a class="reference internal" href="#host-interfaces">Host Interfaces</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#inspecting-address-network-interface-objects">Inspecting Address/Network/Interface Objects</a></li>
|
||||
<li><a class="reference internal" href="#networks-as-lists-of-addresses">Networks as lists of Addresses</a></li>
|
||||
<li><a class="reference internal" href="#comparisons">Comparisons</a></li>
|
||||
<li><a class="reference internal" href="#using-ip-addresses-with-other-modules">Using IP Addresses with other modules</a></li>
|
||||
<li><a class="reference internal" href="#getting-more-detail-when-instance-creation-fails">Getting more detail when instance creation fails</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="argparse.html"
|
||||
title="previous chapter">Argparse Tutorial</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="clinic.html"
|
||||
title="next chapter">Argument Clinic How-To</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/ipaddress.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="clinic.html" title="Argument Clinic How-To"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="argparse.html" title="Argparse Tutorial"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
2589
python-3.7.4-docs-html/howto/logging-cookbook.html
Normal file
2589
python-3.7.4-docs-html/howto/logging-cookbook.html
Normal file
File diff suppressed because it is too large
Load Diff
1233
python-3.7.4-docs-html/howto/logging.html
Normal file
1233
python-3.7.4-docs-html/howto/logging.html
Normal file
File diff suppressed because it is too large
Load Diff
594
python-3.7.4-docs-html/howto/pyporting.html
Normal file
594
python-3.7.4-docs-html/howto/pyporting.html
Normal file
@@ -0,0 +1,594 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Porting Python 2 Code to Python 3 — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Porting Extension Modules to Python 3" href="cporting.html" />
|
||||
<link rel="prev" title="Python HOWTOs" href="index.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/pyporting.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="cporting.html" title="Porting Extension Modules to Python 3"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="index.html" title="Python HOWTOs"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="porting-python-2-code-to-python-3">
|
||||
<span id="pyporting-howto"></span><h1>Porting Python 2 Code to Python 3<a class="headerlink" href="#porting-python-2-code-to-python-3" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">author</dt>
|
||||
<dd class="field-odd"><p>Brett Cannon</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<div class="topic">
|
||||
<p class="topic-title first">Abstract</p>
|
||||
<p>With Python 3 being the future of Python while Python 2 is still in active
|
||||
use, it is good to have your project available for both major releases of
|
||||
Python. This guide is meant to help you figure out how best to support both
|
||||
Python 2 & 3 simultaneously.</p>
|
||||
<p>If you are looking to port an extension module instead of pure Python code,
|
||||
please see <a class="reference internal" href="cporting.html#cporting-howto"><span class="std std-ref">Porting Extension Modules to Python 3</span></a>.</p>
|
||||
<p>If you would like to read one core Python developer’s take on why Python 3
|
||||
came into existence, you can read Nick Coghlan’s <a class="reference external" href="https://ncoghlan-devs-python-notes.readthedocs.io/en/latest/python3/questions_and_answers.html">Python 3 Q & A</a> or
|
||||
Brett Cannon’s <a class="reference external" href="https://snarky.ca/why-python-3-exists">Why Python 3 exists</a>.</p>
|
||||
<p>For help with porting, you can email the <a class="reference external" href="https://mail.python.org/mailman/listinfo/python-porting">python-porting</a> mailing list with
|
||||
questions.</p>
|
||||
</div>
|
||||
<div class="section" id="the-short-explanation">
|
||||
<h2>The Short Explanation<a class="headerlink" href="#the-short-explanation" title="Permalink to this headline">¶</a></h2>
|
||||
<p>To make your project be single-source Python 2/3 compatible, the basic steps
|
||||
are:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p>Only worry about supporting Python 2.7</p></li>
|
||||
<li><p>Make sure you have good test coverage (<a class="reference external" href="https://pypi.org/project/coverage">coverage.py</a> can help;
|
||||
<code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">coverage</span></code>)</p></li>
|
||||
<li><p>Learn the differences between Python 2 & 3</p></li>
|
||||
<li><p>Use <a class="reference external" href="http://python-future.org/automatic_conversion.html">Futurize</a> (or <a class="reference external" href="https://python-modernize.readthedocs.io/">Modernize</a>) to update your code (e.g. <code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">future</span></code>)</p></li>
|
||||
<li><p>Use <a class="reference external" href="https://pypi.org/project/pylint">Pylint</a> to help make sure you don’t regress on your Python 3 support
|
||||
(<code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">pylint</span></code>)</p></li>
|
||||
<li><p>Use <a class="reference external" href="https://pypi.org/project/caniusepython3">caniusepython3</a> to find out which of your dependencies are blocking your
|
||||
use of Python 3 (<code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">caniusepython3</span></code>)</p></li>
|
||||
<li><p>Once your dependencies are no longer blocking you, use continuous integration
|
||||
to make sure you stay compatible with Python 2 & 3 (<a class="reference external" href="https://pypi.org/project/tox">tox</a> can help test
|
||||
against multiple versions of Python; <code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">tox</span></code>)</p></li>
|
||||
<li><p>Consider using optional static type checking to make sure your type usage
|
||||
works in both Python 2 & 3 (e.g. use <a class="reference external" href="http://mypy-lang.org/">mypy</a> to check your typing under both
|
||||
Python 2 & Python 3).</p></li>
|
||||
</ol>
|
||||
</div>
|
||||
<div class="section" id="details">
|
||||
<h2>Details<a class="headerlink" href="#details" title="Permalink to this headline">¶</a></h2>
|
||||
<p>A key point about supporting Python 2 & 3 simultaneously is that you can start
|
||||
<strong>today</strong>! Even if your dependencies are not supporting Python 3 yet that does
|
||||
not mean you can’t modernize your code <strong>now</strong> to support Python 3. Most changes
|
||||
required to support Python 3 lead to cleaner code using newer practices even in
|
||||
Python 2 code.</p>
|
||||
<p>Another key point is that modernizing your Python 2 code to also support
|
||||
Python 3 is largely automated for you. While you might have to make some API
|
||||
decisions thanks to Python 3 clarifying text data versus binary data, the
|
||||
lower-level work is now mostly done for you and thus can at least benefit from
|
||||
the automated changes immediately.</p>
|
||||
<p>Keep those key points in mind while you read on about the details of porting
|
||||
your code to support Python 2 & 3 simultaneously.</p>
|
||||
<div class="section" id="drop-support-for-python-2-6-and-older">
|
||||
<h3>Drop support for Python 2.6 and older<a class="headerlink" href="#drop-support-for-python-2-6-and-older" title="Permalink to this headline">¶</a></h3>
|
||||
<p>While you can make Python 2.5 work with Python 3, it is <strong>much</strong> easier if you
|
||||
only have to work with Python 2.7. If dropping Python 2.5 is not an
|
||||
option then the <a class="reference external" href="https://pypi.org/project/six">six</a> project can help you support Python 2.5 & 3 simultaneously
|
||||
(<code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">six</span></code>). Do realize, though, that nearly all the projects listed
|
||||
in this HOWTO will not be available to you.</p>
|
||||
<p>If you are able to skip Python 2.5 and older, then the required changes
|
||||
to your code should continue to look and feel like idiomatic Python code. At
|
||||
worst you will have to use a function instead of a method in some instances or
|
||||
have to import a function instead of using a built-in one, but otherwise the
|
||||
overall transformation should not feel foreign to you.</p>
|
||||
<p>But you should aim for only supporting Python 2.7. Python 2.6 is no longer
|
||||
freely supported and thus is not receiving bugfixes. This means <strong>you</strong> will have
|
||||
to work around any issues you come across with Python 2.6. There are also some
|
||||
tools mentioned in this HOWTO which do not support Python 2.6 (e.g., <a class="reference external" href="https://pypi.org/project/pylint">Pylint</a>),
|
||||
and this will become more commonplace as time goes on. It will simply be easier
|
||||
for you if you only support the versions of Python that you have to support.</p>
|
||||
</div>
|
||||
<div class="section" id="make-sure-you-specify-the-proper-version-support-in-your-setup-py-file">
|
||||
<h3>Make sure you specify the proper version support in your <code class="docutils literal notranslate"><span class="pre">setup.py</span></code> file<a class="headerlink" href="#make-sure-you-specify-the-proper-version-support-in-your-setup-py-file" title="Permalink to this headline">¶</a></h3>
|
||||
<p>In your <code class="docutils literal notranslate"><span class="pre">setup.py</span></code> file you should have the proper <a class="reference external" href="https://pypi.org/classifiers">trove classifier</a>
|
||||
specifying what versions of Python you support. As your project does not support
|
||||
Python 3 yet you should at least have
|
||||
<code class="docutils literal notranslate"><span class="pre">Programming</span> <span class="pre">Language</span> <span class="pre">::</span> <span class="pre">Python</span> <span class="pre">::</span> <span class="pre">2</span> <span class="pre">::</span> <span class="pre">Only</span></code> specified. Ideally you should
|
||||
also specify each major/minor version of Python that you do support, e.g.
|
||||
<code class="docutils literal notranslate"><span class="pre">Programming</span> <span class="pre">Language</span> <span class="pre">::</span> <span class="pre">Python</span> <span class="pre">::</span> <span class="pre">2.7</span></code>.</p>
|
||||
</div>
|
||||
<div class="section" id="have-good-test-coverage">
|
||||
<h3>Have good test coverage<a class="headerlink" href="#have-good-test-coverage" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Once you have your code supporting the oldest version of Python 2 you want it
|
||||
to, you will want to make sure your test suite has good coverage. A good rule of
|
||||
thumb is that if you want to be confident enough in your test suite that any
|
||||
failures that appear after having tools rewrite your code are actual bugs in the
|
||||
tools and not in your code. If you want a number to aim for, try to get over 80%
|
||||
coverage (and don’t feel bad if you find it hard to get better than 90%
|
||||
coverage). If you don’t already have a tool to measure test coverage then
|
||||
<a class="reference external" href="https://pypi.org/project/coverage">coverage.py</a> is recommended.</p>
|
||||
</div>
|
||||
<div class="section" id="learn-the-differences-between-python-2-3">
|
||||
<h3>Learn the differences between Python 2 & 3<a class="headerlink" href="#learn-the-differences-between-python-2-3" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Once you have your code well-tested you are ready to begin porting your code to
|
||||
Python 3! But to fully understand how your code is going to change and what
|
||||
you want to look out for while you code, you will want to learn what changes
|
||||
Python 3 makes in terms of Python 2. Typically the two best ways of doing that
|
||||
is reading the <a class="reference external" href="https://docs.python.org/3/whatsnew/index.html">“What’s New”</a> doc for each release of Python 3 and the
|
||||
<a class="reference external" href="http://python3porting.com/">Porting to Python 3</a> book (which is free online). There is also a handy
|
||||
<a class="reference external" href="http://python-future.org/compatible_idioms.html">cheat sheet</a> from the Python-Future project.</p>
|
||||
</div>
|
||||
<div class="section" id="update-your-code">
|
||||
<h3>Update your code<a class="headerlink" href="#update-your-code" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Once you feel like you know what is different in Python 3 compared to Python 2,
|
||||
it’s time to update your code! You have a choice between two tools in porting
|
||||
your code automatically: <a class="reference external" href="http://python-future.org/automatic_conversion.html">Futurize</a> and <a class="reference external" href="https://python-modernize.readthedocs.io/">Modernize</a>. Which tool you choose will
|
||||
depend on how much like Python 3 you want your code to be. <a class="reference external" href="http://python-future.org/automatic_conversion.html">Futurize</a> does its
|
||||
best to make Python 3 idioms and practices exist in Python 2, e.g. backporting
|
||||
the <code class="docutils literal notranslate"><span class="pre">bytes</span></code> type from Python 3 so that you have semantic parity between the
|
||||
major versions of Python. <a class="reference external" href="https://python-modernize.readthedocs.io/">Modernize</a>,
|
||||
on the other hand, is more conservative and targets a Python 2/3 subset of
|
||||
Python, directly relying on <a class="reference external" href="https://pypi.org/project/six">six</a> to help provide compatibility. As Python 3 is
|
||||
the future, it might be best to consider Futurize to begin adjusting to any new
|
||||
practices that Python 3 introduces which you are not accustomed to yet.</p>
|
||||
<p>Regardless of which tool you choose, they will update your code to run under
|
||||
Python 3 while staying compatible with the version of Python 2 you started with.
|
||||
Depending on how conservative you want to be, you may want to run the tool over
|
||||
your test suite first and visually inspect the diff to make sure the
|
||||
transformation is accurate. After you have transformed your test suite and
|
||||
verified that all the tests still pass as expected, then you can transform your
|
||||
application code knowing that any tests which fail is a translation failure.</p>
|
||||
<p>Unfortunately the tools can’t automate everything to make your code work under
|
||||
Python 3 and so there are a handful of things you will need to update manually
|
||||
to get full Python 3 support (which of these steps are necessary vary between
|
||||
the tools). Read the documentation for the tool you choose to use to see what it
|
||||
fixes by default and what it can do optionally to know what will (not) be fixed
|
||||
for you and what you may have to fix on your own (e.g. using <code class="docutils literal notranslate"><span class="pre">io.open()</span></code> over
|
||||
the built-in <code class="docutils literal notranslate"><span class="pre">open()</span></code> function is off by default in Modernize). Luckily,
|
||||
though, there are only a couple of things to watch out for which can be
|
||||
considered large issues that may be hard to debug if not watched for.</p>
|
||||
<div class="section" id="division">
|
||||
<h4>Division<a class="headerlink" href="#division" title="Permalink to this headline">¶</a></h4>
|
||||
<p>In Python 3, <code class="docutils literal notranslate"><span class="pre">5</span> <span class="pre">/</span> <span class="pre">2</span> <span class="pre">==</span> <span class="pre">2.5</span></code> and not <code class="docutils literal notranslate"><span class="pre">2</span></code>; all division between <code class="docutils literal notranslate"><span class="pre">int</span></code> values
|
||||
result in a <code class="docutils literal notranslate"><span class="pre">float</span></code>. This change has actually been planned since Python 2.2
|
||||
which was released in 2002. Since then users have been encouraged to add
|
||||
<code class="docutils literal notranslate"><span class="pre">from</span> <span class="pre">__future__</span> <span class="pre">import</span> <span class="pre">division</span></code> to any and all files which use the <code class="docutils literal notranslate"><span class="pre">/</span></code> and
|
||||
<code class="docutils literal notranslate"><span class="pre">//</span></code> operators or to be running the interpreter with the <code class="docutils literal notranslate"><span class="pre">-Q</span></code> flag. If you
|
||||
have not been doing this then you will need to go through your code and do two
|
||||
things:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p>Add <code class="docutils literal notranslate"><span class="pre">from</span> <span class="pre">__future__</span> <span class="pre">import</span> <span class="pre">division</span></code> to your files</p></li>
|
||||
<li><p>Update any division operator as necessary to either use <code class="docutils literal notranslate"><span class="pre">//</span></code> to use floor
|
||||
division or continue using <code class="docutils literal notranslate"><span class="pre">/</span></code> and expect a float</p></li>
|
||||
</ol>
|
||||
<p>The reason that <code class="docutils literal notranslate"><span class="pre">/</span></code> isn’t simply translated to <code class="docutils literal notranslate"><span class="pre">//</span></code> automatically is that if
|
||||
an object defines a <code class="docutils literal notranslate"><span class="pre">__truediv__</span></code> method but not <code class="docutils literal notranslate"><span class="pre">__floordiv__</span></code> then your
|
||||
code would begin to fail (e.g. a user-defined class that uses <code class="docutils literal notranslate"><span class="pre">/</span></code> to
|
||||
signify some operation but not <code class="docutils literal notranslate"><span class="pre">//</span></code> for the same thing or at all).</p>
|
||||
</div>
|
||||
<div class="section" id="text-versus-binary-data">
|
||||
<h4>Text versus binary data<a class="headerlink" href="#text-versus-binary-data" title="Permalink to this headline">¶</a></h4>
|
||||
<p>In Python 2 you could use the <code class="docutils literal notranslate"><span class="pre">str</span></code> type for both text and binary data.
|
||||
Unfortunately this confluence of two different concepts could lead to brittle
|
||||
code which sometimes worked for either kind of data, sometimes not. It also
|
||||
could lead to confusing APIs if people didn’t explicitly state that something
|
||||
that accepted <code class="docutils literal notranslate"><span class="pre">str</span></code> accepted either text or binary data instead of one
|
||||
specific type. This complicated the situation especially for anyone supporting
|
||||
multiple languages as APIs wouldn’t bother explicitly supporting <code class="docutils literal notranslate"><span class="pre">unicode</span></code>
|
||||
when they claimed text data support.</p>
|
||||
<p>To make the distinction between text and binary data clearer and more
|
||||
pronounced, Python 3 did what most languages created in the age of the internet
|
||||
have done and made text and binary data distinct types that cannot blindly be
|
||||
mixed together (Python predates widespread access to the internet). For any code
|
||||
that deals only with text or only binary data, this separation doesn’t pose an
|
||||
issue. But for code that has to deal with both, it does mean you might have to
|
||||
now care about when you are using text compared to binary data, which is why
|
||||
this cannot be entirely automated.</p>
|
||||
<p>To start, you will need to decide which APIs take text and which take binary
|
||||
(it is <strong>highly</strong> recommended you don’t design APIs that can take both due to
|
||||
the difficulty of keeping the code working; as stated earlier it is difficult to
|
||||
do well). In Python 2 this means making sure the APIs that take text can work
|
||||
with <code class="docutils literal notranslate"><span class="pre">unicode</span></code> and those that work with binary data work with the
|
||||
<code class="docutils literal notranslate"><span class="pre">bytes</span></code> type from Python 3 (which is a subset of <code class="docutils literal notranslate"><span class="pre">str</span></code> in Python 2 and acts
|
||||
as an alias for <code class="docutils literal notranslate"><span class="pre">bytes</span></code> type in Python 2). Usually the biggest issue is
|
||||
realizing which methods exist on which types in Python 2 & 3 simultaneously
|
||||
(for text that’s <code class="docutils literal notranslate"><span class="pre">unicode</span></code> in Python 2 and <code class="docutils literal notranslate"><span class="pre">str</span></code> in Python 3, for binary
|
||||
that’s <code class="docutils literal notranslate"><span class="pre">str</span></code>/<code class="docutils literal notranslate"><span class="pre">bytes</span></code> in Python 2 and <code class="docutils literal notranslate"><span class="pre">bytes</span></code> in Python 3). The following
|
||||
table lists the <strong>unique</strong> methods of each data type across Python 2 & 3
|
||||
(e.g., the <code class="docutils literal notranslate"><span class="pre">decode()</span></code> method is usable on the equivalent binary data type in
|
||||
either Python 2 or 3, but it can’t be used by the textual data type consistently
|
||||
between Python 2 and 3 because <code class="docutils literal notranslate"><span class="pre">str</span></code> in Python 3 doesn’t have the method). Do
|
||||
note that as of Python 3.5 the <code class="docutils literal notranslate"><span class="pre">__mod__</span></code> method was added to the bytes type.</p>
|
||||
<table class="docutils align-center">
|
||||
<colgroup>
|
||||
<col style="width: 53%" />
|
||||
<col style="width: 47%" />
|
||||
</colgroup>
|
||||
<tbody>
|
||||
<tr class="row-odd"><td><p><strong>Text data</strong></p></td>
|
||||
<td><p><strong>Binary data</strong></p></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p></p></td>
|
||||
<td><p>decode</p></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>encode</p></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>format</p></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr class="row-odd"><td><p>isdecimal</p></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr class="row-even"><td><p>isnumeric</p></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>Making the distinction easier to handle can be accomplished by encoding and
|
||||
decoding between binary data and text at the edge of your code. This means that
|
||||
when you receive text in binary data, you should immediately decode it. And if
|
||||
your code needs to send text as binary data then encode it as late as possible.
|
||||
This allows your code to work with only text internally and thus eliminates
|
||||
having to keep track of what type of data you are working with.</p>
|
||||
<p>The next issue is making sure you know whether the string literals in your code
|
||||
represent text or binary data. You should add a <code class="docutils literal notranslate"><span class="pre">b</span></code> prefix to any
|
||||
literal that presents binary data. For text you should add a <code class="docutils literal notranslate"><span class="pre">u</span></code> prefix to
|
||||
the text literal. (there is a <a class="reference internal" href="../library/__future__.html#module-__future__" title="__future__: Future statement definitions"><code class="xref py py-mod docutils literal notranslate"><span class="pre">__future__</span></code></a> import to force all unspecified
|
||||
literals to be Unicode, but usage has shown it isn’t as effective as adding a
|
||||
<code class="docutils literal notranslate"><span class="pre">b</span></code> or <code class="docutils literal notranslate"><span class="pre">u</span></code> prefix to all literals explicitly)</p>
|
||||
<p>As part of this dichotomy you also need to be careful about opening files.
|
||||
Unless you have been working on Windows, there is a chance you have not always
|
||||
bothered to add the <code class="docutils literal notranslate"><span class="pre">b</span></code> mode when opening a binary file (e.g., <code class="docutils literal notranslate"><span class="pre">rb</span></code> for
|
||||
binary reading). Under Python 3, binary files and text files are clearly
|
||||
distinct and mutually incompatible; see the <a class="reference internal" href="../library/io.html#module-io" title="io: Core tools for working with streams."><code class="xref py py-mod docutils literal notranslate"><span class="pre">io</span></code></a> module for details.
|
||||
Therefore, you <strong>must</strong> make a decision of whether a file will be used for
|
||||
binary access (allowing binary data to be read and/or written) or textual access
|
||||
(allowing text data to be read and/or written). You should also use <a class="reference internal" href="../library/io.html#io.open" title="io.open"><code class="xref py py-func docutils literal notranslate"><span class="pre">io.open()</span></code></a>
|
||||
for opening files instead of the built-in <a class="reference internal" href="../library/functions.html#open" title="open"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> function as the <a class="reference internal" href="../library/io.html#module-io" title="io: Core tools for working with streams."><code class="xref py py-mod docutils literal notranslate"><span class="pre">io</span></code></a>
|
||||
module is consistent from Python 2 to 3 while the built-in <a class="reference internal" href="../library/functions.html#open" title="open"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> function
|
||||
is not (in Python 3 it’s actually <a class="reference internal" href="../library/io.html#io.open" title="io.open"><code class="xref py py-func docutils literal notranslate"><span class="pre">io.open()</span></code></a>). Do not bother with the
|
||||
outdated practice of using <a class="reference internal" href="../library/codecs.html#codecs.open" title="codecs.open"><code class="xref py py-func docutils literal notranslate"><span class="pre">codecs.open()</span></code></a> as that’s only necessary for
|
||||
keeping compatibility with Python 2.5.</p>
|
||||
<p>The constructors of both <code class="docutils literal notranslate"><span class="pre">str</span></code> and <code class="docutils literal notranslate"><span class="pre">bytes</span></code> have different semantics for the
|
||||
same arguments between Python 2 & 3. Passing an integer to <code class="docutils literal notranslate"><span class="pre">bytes</span></code> in Python 2
|
||||
will give you the string representation of the integer: <code class="docutils literal notranslate"><span class="pre">bytes(3)</span> <span class="pre">==</span> <span class="pre">'3'</span></code>.
|
||||
But in Python 3, an integer argument to <code class="docutils literal notranslate"><span class="pre">bytes</span></code> will give you a bytes object
|
||||
as long as the integer specified, filled with null bytes:
|
||||
<code class="docutils literal notranslate"><span class="pre">bytes(3)</span> <span class="pre">==</span> <span class="pre">b'\x00\x00\x00'</span></code>. A similar worry is necessary when passing a
|
||||
bytes object to <code class="docutils literal notranslate"><span class="pre">str</span></code>. In Python 2 you just get the bytes object back:
|
||||
<code class="docutils literal notranslate"><span class="pre">str(b'3')</span> <span class="pre">==</span> <span class="pre">b'3'</span></code>. But in Python 3 you get the string representation of the
|
||||
bytes object: <code class="docutils literal notranslate"><span class="pre">str(b'3')</span> <span class="pre">==</span> <span class="pre">"b'3'"</span></code>.</p>
|
||||
<p>Finally, the indexing of binary data requires careful handling (slicing does
|
||||
<strong>not</strong> require any special handling). In Python 2,
|
||||
<code class="docutils literal notranslate"><span class="pre">b'123'[1]</span> <span class="pre">==</span> <span class="pre">b'2'</span></code> while in Python 3 <code class="docutils literal notranslate"><span class="pre">b'123'[1]</span> <span class="pre">==</span> <span class="pre">50</span></code>. Because binary data
|
||||
is simply a collection of binary numbers, Python 3 returns the integer value for
|
||||
the byte you index on. But in Python 2 because <code class="docutils literal notranslate"><span class="pre">bytes</span> <span class="pre">==</span> <span class="pre">str</span></code>, indexing
|
||||
returns a one-item slice of bytes. The <a class="reference external" href="https://pypi.org/project/six">six</a> project has a function
|
||||
named <code class="docutils literal notranslate"><span class="pre">six.indexbytes()</span></code> which will return an integer like in Python 3:
|
||||
<code class="docutils literal notranslate"><span class="pre">six.indexbytes(b'123',</span> <span class="pre">1)</span></code>.</p>
|
||||
<p>To summarize:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p>Decide which of your APIs take text and which take binary data</p></li>
|
||||
<li><p>Make sure that your code that works with text also works with <code class="docutils literal notranslate"><span class="pre">unicode</span></code> and
|
||||
code for binary data works with <code class="docutils literal notranslate"><span class="pre">bytes</span></code> in Python 2 (see the table above
|
||||
for what methods you cannot use for each type)</p></li>
|
||||
<li><p>Mark all binary literals with a <code class="docutils literal notranslate"><span class="pre">b</span></code> prefix, textual literals with a <code class="docutils literal notranslate"><span class="pre">u</span></code>
|
||||
prefix</p></li>
|
||||
<li><p>Decode binary data to text as soon as possible, encode text as binary data as
|
||||
late as possible</p></li>
|
||||
<li><p>Open files using <a class="reference internal" href="../library/io.html#io.open" title="io.open"><code class="xref py py-func docutils literal notranslate"><span class="pre">io.open()</span></code></a> and make sure to specify the <code class="docutils literal notranslate"><span class="pre">b</span></code> mode when
|
||||
appropriate</p></li>
|
||||
<li><p>Be careful when indexing into binary data</p></li>
|
||||
</ol>
|
||||
</div>
|
||||
<div class="section" id="use-feature-detection-instead-of-version-detection">
|
||||
<h4>Use feature detection instead of version detection<a class="headerlink" href="#use-feature-detection-instead-of-version-detection" title="Permalink to this headline">¶</a></h4>
|
||||
<p>Inevitably you will have code that has to choose what to do based on what
|
||||
version of Python is running. The best way to do this is with feature detection
|
||||
of whether the version of Python you’re running under supports what you need.
|
||||
If for some reason that doesn’t work then you should make the version check be
|
||||
against Python 2 and not Python 3. To help explain this, let’s look at an
|
||||
example.</p>
|
||||
<p>Let’s pretend that you need access to a feature of <a class="reference external" href="https://docs.python.org/3/library/importlib.html#module-importlib">importlib</a> that
|
||||
is available in Python’s standard library since Python 3.3 and available for
|
||||
Python 2 through <a class="reference external" href="https://pypi.org/project/importlib2">importlib2</a> on PyPI. You might be tempted to write code to
|
||||
access e.g. the <code class="docutils literal notranslate"><span class="pre">importlib.abc</span></code> module by doing the following:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">sys</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
|
||||
<span class="kn">from</span> <span class="nn">importlib</span> <span class="k">import</span> <span class="n">abc</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="kn">from</span> <span class="nn">importlib2</span> <span class="k">import</span> <span class="n">abc</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The problem with this code is what happens when Python 4 comes out? It would
|
||||
be better to treat Python 2 as the exceptional case instead of Python 3 and
|
||||
assume that future Python versions will be more compatible with Python 3 than
|
||||
Python 2:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">sys</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">></span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="kn">from</span> <span class="nn">importlib</span> <span class="k">import</span> <span class="n">abc</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="kn">from</span> <span class="nn">importlib2</span> <span class="k">import</span> <span class="n">abc</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The best solution, though, is to do no version detection at all and instead rely
|
||||
on feature detection. That avoids any potential issues of getting the version
|
||||
detection wrong and helps keep you future-compatible:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="p">:</span>
|
||||
<span class="kn">from</span> <span class="nn">importlib</span> <span class="k">import</span> <span class="n">abc</span>
|
||||
<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
|
||||
<span class="kn">from</span> <span class="nn">importlib2</span> <span class="k">import</span> <span class="n">abc</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="prevent-compatibility-regressions">
|
||||
<h3>Prevent compatibility regressions<a class="headerlink" href="#prevent-compatibility-regressions" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Once you have fully translated your code to be compatible with Python 3, you
|
||||
will want to make sure your code doesn’t regress and stop working under
|
||||
Python 3. This is especially true if you have a dependency which is blocking you
|
||||
from actually running under Python 3 at the moment.</p>
|
||||
<p>To help with staying compatible, any new modules you create should have
|
||||
at least the following block of code at the top of it:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">__future__</span> <span class="k">import</span> <span class="n">absolute_import</span>
|
||||
<span class="kn">from</span> <span class="nn">__future__</span> <span class="k">import</span> <span class="n">division</span>
|
||||
<span class="kn">from</span> <span class="nn">__future__</span> <span class="k">import</span> <span class="n">print_function</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>You can also run Python 2 with the <code class="docutils literal notranslate"><span class="pre">-3</span></code> flag to be warned about various
|
||||
compatibility issues your code triggers during execution. If you turn warnings
|
||||
into errors with <code class="docutils literal notranslate"><span class="pre">-Werror</span></code> then you can make sure that you don’t accidentally
|
||||
miss a warning.</p>
|
||||
<p>You can also use the <a class="reference external" href="https://pypi.org/project/pylint">Pylint</a> project and its <code class="docutils literal notranslate"><span class="pre">--py3k</span></code> flag to lint your code
|
||||
to receive warnings when your code begins to deviate from Python 3
|
||||
compatibility. This also prevents you from having to run <a class="reference external" href="https://python-modernize.readthedocs.io/">Modernize</a> or <a class="reference external" href="http://python-future.org/automatic_conversion.html">Futurize</a>
|
||||
over your code regularly to catch compatibility regressions. This does require
|
||||
you only support Python 2.7 and Python 3.4 or newer as that is Pylint’s
|
||||
minimum Python version support.</p>
|
||||
</div>
|
||||
<div class="section" id="check-which-dependencies-block-your-transition">
|
||||
<h3>Check which dependencies block your transition<a class="headerlink" href="#check-which-dependencies-block-your-transition" title="Permalink to this headline">¶</a></h3>
|
||||
<p><strong>After</strong> you have made your code compatible with Python 3 you should begin to
|
||||
care about whether your dependencies have also been ported. The <a class="reference external" href="https://pypi.org/project/caniusepython3">caniusepython3</a>
|
||||
project was created to help you determine which projects
|
||||
– directly or indirectly – are blocking you from supporting Python 3. There
|
||||
is both a command-line tool as well as a web interface at
|
||||
<a class="reference external" href="https://caniusepython3.com">https://caniusepython3.com</a>.</p>
|
||||
<p>The project also provides code which you can integrate into your test suite so
|
||||
that you will have a failing test when you no longer have dependencies blocking
|
||||
you from using Python 3. This allows you to avoid having to manually check your
|
||||
dependencies and to be notified quickly when you can start running on Python 3.</p>
|
||||
</div>
|
||||
<div class="section" id="update-your-setup-py-file-to-denote-python-3-compatibility">
|
||||
<h3>Update your <code class="docutils literal notranslate"><span class="pre">setup.py</span></code> file to denote Python 3 compatibility<a class="headerlink" href="#update-your-setup-py-file-to-denote-python-3-compatibility" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Once your code works under Python 3, you should update the classifiers in
|
||||
your <code class="docutils literal notranslate"><span class="pre">setup.py</span></code> to contain <code class="docutils literal notranslate"><span class="pre">Programming</span> <span class="pre">Language</span> <span class="pre">::</span> <span class="pre">Python</span> <span class="pre">::</span> <span class="pre">3</span></code> and to not
|
||||
specify sole Python 2 support. This will tell anyone using your code that you
|
||||
support Python 2 <strong>and</strong> 3. Ideally you will also want to add classifiers for
|
||||
each major/minor version of Python you now support.</p>
|
||||
</div>
|
||||
<div class="section" id="use-continuous-integration-to-stay-compatible">
|
||||
<h3>Use continuous integration to stay compatible<a class="headerlink" href="#use-continuous-integration-to-stay-compatible" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Once you are able to fully run under Python 3 you will want to make sure your
|
||||
code always works under both Python 2 & 3. Probably the best tool for running
|
||||
your tests under multiple Python interpreters is <a class="reference external" href="https://pypi.org/project/tox">tox</a>. You can then integrate
|
||||
tox with your continuous integration system so that you never accidentally break
|
||||
Python 2 or 3 support.</p>
|
||||
<p>You may also want to use the <code class="docutils literal notranslate"><span class="pre">-bb</span></code> flag with the Python 3 interpreter to
|
||||
trigger an exception when you are comparing bytes to strings or bytes to an int
|
||||
(the latter is available starting in Python 3.5). By default type-differing
|
||||
comparisons simply return <code class="docutils literal notranslate"><span class="pre">False</span></code>, but if you made a mistake in your
|
||||
separation of text/binary data handling or indexing on bytes you wouldn’t easily
|
||||
find the mistake. This flag will raise an exception when these kinds of
|
||||
comparisons occur, making the mistake much easier to track down.</p>
|
||||
<p>And that’s mostly it! At this point your code base is compatible with both
|
||||
Python 2 and 3 simultaneously. Your testing will also be set up so that you
|
||||
don’t accidentally break Python 2 or 3 compatibility regardless of which version
|
||||
you typically run your tests under while developing.</p>
|
||||
</div>
|
||||
<div class="section" id="consider-using-optional-static-type-checking">
|
||||
<h3>Consider using optional static type checking<a class="headerlink" href="#consider-using-optional-static-type-checking" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Another way to help port your code is to use a static type checker like
|
||||
<a class="reference external" href="http://mypy-lang.org/">mypy</a> or <a class="reference external" href="https://github.com/google/pytype">pytype</a> on your code. These tools can be used to analyze your code as
|
||||
if it’s being run under Python 2, then you can run the tool a second time as if
|
||||
your code is running under Python 3. By running a static type checker twice like
|
||||
this you can discover if you’re e.g. misusing binary data type in one version
|
||||
of Python compared to another. If you add optional type hints to your code you
|
||||
can also explicitly state whether your APIs use textual or binary data, helping
|
||||
to make sure everything functions as expected in both versions of Python.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">Porting Python 2 Code to Python 3</a><ul>
|
||||
<li><a class="reference internal" href="#the-short-explanation">The Short Explanation</a></li>
|
||||
<li><a class="reference internal" href="#details">Details</a><ul>
|
||||
<li><a class="reference internal" href="#drop-support-for-python-2-6-and-older">Drop support for Python 2.6 and older</a></li>
|
||||
<li><a class="reference internal" href="#make-sure-you-specify-the-proper-version-support-in-your-setup-py-file">Make sure you specify the proper version support in your <code class="docutils literal notranslate"><span class="pre">setup.py</span></code> file</a></li>
|
||||
<li><a class="reference internal" href="#have-good-test-coverage">Have good test coverage</a></li>
|
||||
<li><a class="reference internal" href="#learn-the-differences-between-python-2-3">Learn the differences between Python 2 & 3</a></li>
|
||||
<li><a class="reference internal" href="#update-your-code">Update your code</a><ul>
|
||||
<li><a class="reference internal" href="#division">Division</a></li>
|
||||
<li><a class="reference internal" href="#text-versus-binary-data">Text versus binary data</a></li>
|
||||
<li><a class="reference internal" href="#use-feature-detection-instead-of-version-detection">Use feature detection instead of version detection</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#prevent-compatibility-regressions">Prevent compatibility regressions</a></li>
|
||||
<li><a class="reference internal" href="#check-which-dependencies-block-your-transition">Check which dependencies block your transition</a></li>
|
||||
<li><a class="reference internal" href="#update-your-setup-py-file-to-denote-python-3-compatibility">Update your <code class="docutils literal notranslate"><span class="pre">setup.py</span></code> file to denote Python 3 compatibility</a></li>
|
||||
<li><a class="reference internal" href="#use-continuous-integration-to-stay-compatible">Use continuous integration to stay compatible</a></li>
|
||||
<li><a class="reference internal" href="#consider-using-optional-static-type-checking">Consider using optional static type checking</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="index.html"
|
||||
title="previous chapter">Python HOWTOs</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="cporting.html"
|
||||
title="next chapter">Porting Extension Modules to Python 3</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/pyporting.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="cporting.html" title="Porting Extension Modules to Python 3"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="index.html" title="Python HOWTOs"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
1564
python-3.7.4-docs-html/howto/regex.html
Normal file
1564
python-3.7.4-docs-html/howto/regex.html
Normal file
File diff suppressed because it is too large
Load Diff
538
python-3.7.4-docs-html/howto/sockets.html
Normal file
538
python-3.7.4-docs-html/howto/sockets.html
Normal file
@@ -0,0 +1,538 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Socket Programming HOWTO — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Sorting HOW TO" href="sorting.html" />
|
||||
<link rel="prev" title="Regular Expression HOWTO" href="regex.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/sockets.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="sorting.html" title="Sorting HOW TO"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="regex.html" title="Regular Expression HOWTO"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="socket-programming-howto">
|
||||
<span id="socket-howto"></span><h1>Socket Programming HOWTO<a class="headerlink" href="#socket-programming-howto" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Author</dt>
|
||||
<dd class="field-odd"><p>Gordon McMillan</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<div class="topic">
|
||||
<p class="topic-title first">Abstract</p>
|
||||
<p>Sockets are used nearly everywhere, but are one of the most severely
|
||||
misunderstood technologies around. This is a 10,000 foot overview of sockets.
|
||||
It’s not really a tutorial - you’ll still have work to do in getting things
|
||||
operational. It doesn’t cover the fine points (and there are a lot of them), but
|
||||
I hope it will give you enough background to begin using them decently.</p>
|
||||
</div>
|
||||
<div class="section" id="sockets">
|
||||
<h2>Sockets<a class="headerlink" href="#sockets" title="Permalink to this headline">¶</a></h2>
|
||||
<p>I’m only going to talk about INET (i.e. IPv4) sockets, but they account for at least 99% of
|
||||
the sockets in use. And I’ll only talk about STREAM (i.e. TCP) sockets - unless you really
|
||||
know what you’re doing (in which case this HOWTO isn’t for you!), you’ll get
|
||||
better behavior and performance from a STREAM socket than anything else. I will
|
||||
try to clear up the mystery of what a socket is, as well as some hints on how to
|
||||
work with blocking and non-blocking sockets. But I’ll start by talking about
|
||||
blocking sockets. You’ll need to know how they work before dealing with
|
||||
non-blocking sockets.</p>
|
||||
<p>Part of the trouble with understanding these things is that “socket” can mean a
|
||||
number of subtly different things, depending on context. So first, let’s make a
|
||||
distinction between a “client” socket - an endpoint of a conversation, and a
|
||||
“server” socket, which is more like a switchboard operator. The client
|
||||
application (your browser, for example) uses “client” sockets exclusively; the
|
||||
web server it’s talking to uses both “server” sockets and “client” sockets.</p>
|
||||
<div class="section" id="history">
|
||||
<h3>History<a class="headerlink" href="#history" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Of the various forms of <abbr title="Inter Process Communication">IPC</abbr>,
|
||||
sockets are by far the most popular. On any given platform, there are
|
||||
likely to be other forms of IPC that are faster, but for
|
||||
cross-platform communication, sockets are about the only game in town.</p>
|
||||
<p>They were invented in Berkeley as part of the BSD flavor of Unix. They spread
|
||||
like wildfire with the Internet. With good reason — the combination of sockets
|
||||
with INET makes talking to arbitrary machines around the world unbelievably easy
|
||||
(at least compared to other schemes).</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="creating-a-socket">
|
||||
<h2>Creating a Socket<a class="headerlink" href="#creating-a-socket" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Roughly speaking, when you clicked on the link that brought you to this page,
|
||||
your browser did something like the following:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># create an INET, STREAMing socket</span>
|
||||
<span class="n">s</span> <span class="o">=</span> <span class="n">socket</span><span class="o">.</span><span class="n">socket</span><span class="p">(</span><span class="n">socket</span><span class="o">.</span><span class="n">AF_INET</span><span class="p">,</span> <span class="n">socket</span><span class="o">.</span><span class="n">SOCK_STREAM</span><span class="p">)</span>
|
||||
<span class="c1"># now connect to the web server on port 80 - the normal http port</span>
|
||||
<span class="n">s</span><span class="o">.</span><span class="n">connect</span><span class="p">((</span><span class="s2">"www.python.org"</span><span class="p">,</span> <span class="mi">80</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>When the <code class="docutils literal notranslate"><span class="pre">connect</span></code> completes, the socket <code class="docutils literal notranslate"><span class="pre">s</span></code> can be used to send
|
||||
in a request for the text of the page. The same socket will read the
|
||||
reply, and then be destroyed. That’s right, destroyed. Client sockets
|
||||
are normally only used for one exchange (or a small set of sequential
|
||||
exchanges).</p>
|
||||
<p>What happens in the web server is a bit more complex. First, the web server
|
||||
creates a “server socket”:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># create an INET, STREAMing socket</span>
|
||||
<span class="n">serversocket</span> <span class="o">=</span> <span class="n">socket</span><span class="o">.</span><span class="n">socket</span><span class="p">(</span><span class="n">socket</span><span class="o">.</span><span class="n">AF_INET</span><span class="p">,</span> <span class="n">socket</span><span class="o">.</span><span class="n">SOCK_STREAM</span><span class="p">)</span>
|
||||
<span class="c1"># bind the socket to a public host, and a well-known port</span>
|
||||
<span class="n">serversocket</span><span class="o">.</span><span class="n">bind</span><span class="p">((</span><span class="n">socket</span><span class="o">.</span><span class="n">gethostname</span><span class="p">(),</span> <span class="mi">80</span><span class="p">))</span>
|
||||
<span class="c1"># become a server socket</span>
|
||||
<span class="n">serversocket</span><span class="o">.</span><span class="n">listen</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>A couple things to notice: we used <code class="docutils literal notranslate"><span class="pre">socket.gethostname()</span></code> so that the socket
|
||||
would be visible to the outside world. If we had used <code class="docutils literal notranslate"><span class="pre">s.bind(('localhost',</span>
|
||||
<span class="pre">80))</span></code> or <code class="docutils literal notranslate"><span class="pre">s.bind(('127.0.0.1',</span> <span class="pre">80))</span></code> we would still have a “server” socket,
|
||||
but one that was only visible within the same machine. <code class="docutils literal notranslate"><span class="pre">s.bind(('',</span> <span class="pre">80))</span></code>
|
||||
specifies that the socket is reachable by any address the machine happens to
|
||||
have.</p>
|
||||
<p>A second thing to note: low number ports are usually reserved for “well known”
|
||||
services (HTTP, SNMP etc). If you’re playing around, use a nice high number (4
|
||||
digits).</p>
|
||||
<p>Finally, the argument to <code class="docutils literal notranslate"><span class="pre">listen</span></code> tells the socket library that we want it to
|
||||
queue up as many as 5 connect requests (the normal max) before refusing outside
|
||||
connections. If the rest of the code is written properly, that should be plenty.</p>
|
||||
<p>Now that we have a “server” socket, listening on port 80, we can enter the
|
||||
mainloop of the web server:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
||||
<span class="c1"># accept connections from outside</span>
|
||||
<span class="p">(</span><span class="n">clientsocket</span><span class="p">,</span> <span class="n">address</span><span class="p">)</span> <span class="o">=</span> <span class="n">serversocket</span><span class="o">.</span><span class="n">accept</span><span class="p">()</span>
|
||||
<span class="c1"># now do something with the clientsocket</span>
|
||||
<span class="c1"># in this case, we'll pretend this is a threaded server</span>
|
||||
<span class="n">ct</span> <span class="o">=</span> <span class="n">client_thread</span><span class="p">(</span><span class="n">clientsocket</span><span class="p">)</span>
|
||||
<span class="n">ct</span><span class="o">.</span><span class="n">run</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>There’s actually 3 general ways in which this loop could work - dispatching a
|
||||
thread to handle <code class="docutils literal notranslate"><span class="pre">clientsocket</span></code>, create a new process to handle
|
||||
<code class="docutils literal notranslate"><span class="pre">clientsocket</span></code>, or restructure this app to use non-blocking sockets, and
|
||||
multiplex between our “server” socket and any active <code class="docutils literal notranslate"><span class="pre">clientsocket</span></code>s using
|
||||
<code class="docutils literal notranslate"><span class="pre">select</span></code>. More about that later. The important thing to understand now is
|
||||
this: this is <em>all</em> a “server” socket does. It doesn’t send any data. It doesn’t
|
||||
receive any data. It just produces “client” sockets. Each <code class="docutils literal notranslate"><span class="pre">clientsocket</span></code> is
|
||||
created in response to some <em>other</em> “client” socket doing a <code class="docutils literal notranslate"><span class="pre">connect()</span></code> to the
|
||||
host and port we’re bound to. As soon as we’ve created that <code class="docutils literal notranslate"><span class="pre">clientsocket</span></code>, we
|
||||
go back to listening for more connections. The two “clients” are free to chat it
|
||||
up - they are using some dynamically allocated port which will be recycled when
|
||||
the conversation ends.</p>
|
||||
<div class="section" id="ipc">
|
||||
<h3>IPC<a class="headerlink" href="#ipc" title="Permalink to this headline">¶</a></h3>
|
||||
<p>If you need fast IPC between two processes on one machine, you should look into
|
||||
pipes or shared memory. If you do decide to use AF_INET sockets, bind the
|
||||
“server” socket to <code class="docutils literal notranslate"><span class="pre">'localhost'</span></code>. On most platforms, this will take a
|
||||
shortcut around a couple of layers of network code and be quite a bit faster.</p>
|
||||
<div class="admonition seealso">
|
||||
<p class="admonition-title">See also</p>
|
||||
<p>The <a class="reference internal" href="../library/multiprocessing.html#module-multiprocessing" title="multiprocessing: Process-based parallelism."><code class="xref py py-mod docutils literal notranslate"><span class="pre">multiprocessing</span></code></a> integrates cross-platform IPC into a higher-level
|
||||
API.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="using-a-socket">
|
||||
<h2>Using a Socket<a class="headerlink" href="#using-a-socket" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The first thing to note, is that the web browser’s “client” socket and the web
|
||||
server’s “client” socket are identical beasts. That is, this is a “peer to peer”
|
||||
conversation. Or to put it another way, <em>as the designer, you will have to
|
||||
decide what the rules of etiquette are for a conversation</em>. Normally, the
|
||||
<code class="docutils literal notranslate"><span class="pre">connect</span></code>ing socket starts the conversation, by sending in a request, or
|
||||
perhaps a signon. But that’s a design decision - it’s not a rule of sockets.</p>
|
||||
<p>Now there are two sets of verbs to use for communication. You can use <code class="docutils literal notranslate"><span class="pre">send</span></code>
|
||||
and <code class="docutils literal notranslate"><span class="pre">recv</span></code>, or you can transform your client socket into a file-like beast and
|
||||
use <code class="docutils literal notranslate"><span class="pre">read</span></code> and <code class="docutils literal notranslate"><span class="pre">write</span></code>. The latter is the way Java presents its sockets.
|
||||
I’m not going to talk about it here, except to warn you that you need to use
|
||||
<code class="docutils literal notranslate"><span class="pre">flush</span></code> on sockets. These are buffered “files”, and a common mistake is to
|
||||
<code class="docutils literal notranslate"><span class="pre">write</span></code> something, and then <code class="docutils literal notranslate"><span class="pre">read</span></code> for a reply. Without a <code class="docutils literal notranslate"><span class="pre">flush</span></code> in
|
||||
there, you may wait forever for the reply, because the request may still be in
|
||||
your output buffer.</p>
|
||||
<p>Now we come to the major stumbling block of sockets - <code class="docutils literal notranslate"><span class="pre">send</span></code> and <code class="docutils literal notranslate"><span class="pre">recv</span></code> operate
|
||||
on the network buffers. They do not necessarily handle all the bytes you hand
|
||||
them (or expect from them), because their major focus is handling the network
|
||||
buffers. In general, they return when the associated network buffers have been
|
||||
filled (<code class="docutils literal notranslate"><span class="pre">send</span></code>) or emptied (<code class="docutils literal notranslate"><span class="pre">recv</span></code>). They then tell you how many bytes they
|
||||
handled. It is <em>your</em> responsibility to call them again until your message has
|
||||
been completely dealt with.</p>
|
||||
<p>When a <code class="docutils literal notranslate"><span class="pre">recv</span></code> returns 0 bytes, it means the other side has closed (or is in
|
||||
the process of closing) the connection. You will not receive any more data on
|
||||
this connection. Ever. You may be able to send data successfully; I’ll talk
|
||||
more about this later.</p>
|
||||
<p>A protocol like HTTP uses a socket for only one transfer. The client sends a
|
||||
request, then reads a reply. That’s it. The socket is discarded. This means that
|
||||
a client can detect the end of the reply by receiving 0 bytes.</p>
|
||||
<p>But if you plan to reuse your socket for further transfers, you need to realize
|
||||
that <em>there is no</em> <abbr title="End of Transfer">EOT</abbr> <em>on a socket.</em> I repeat: if a socket
|
||||
<code class="docutils literal notranslate"><span class="pre">send</span></code> or <code class="docutils literal notranslate"><span class="pre">recv</span></code> returns after handling 0 bytes, the connection has been
|
||||
broken. If the connection has <em>not</em> been broken, you may wait on a <code class="docutils literal notranslate"><span class="pre">recv</span></code>
|
||||
forever, because the socket will <em>not</em> tell you that there’s nothing more to
|
||||
read (for now). Now if you think about that a bit, you’ll come to realize a
|
||||
fundamental truth of sockets: <em>messages must either be fixed length</em> (yuck), <em>or
|
||||
be delimited</em> (shrug), <em>or indicate how long they are</em> (much better), <em>or end by
|
||||
shutting down the connection</em>. The choice is entirely yours, (but some ways are
|
||||
righter than others).</p>
|
||||
<p>Assuming you don’t want to end the connection, the simplest solution is a fixed
|
||||
length message:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">MySocket</span><span class="p">:</span>
|
||||
<span class="sd">"""demonstration class only</span>
|
||||
<span class="sd"> - coded for clarity, not efficiency</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sock</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">sock</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sock</span> <span class="o">=</span> <span class="n">socket</span><span class="o">.</span><span class="n">socket</span><span class="p">(</span>
|
||||
<span class="n">socket</span><span class="o">.</span><span class="n">AF_INET</span><span class="p">,</span> <span class="n">socket</span><span class="o">.</span><span class="n">SOCK_STREAM</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sock</span> <span class="o">=</span> <span class="n">sock</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">connect</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">host</span><span class="p">,</span> <span class="n">port</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sock</span><span class="o">.</span><span class="n">connect</span><span class="p">((</span><span class="n">host</span><span class="p">,</span> <span class="n">port</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">mysend</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">):</span>
|
||||
<span class="n">totalsent</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">while</span> <span class="n">totalsent</span> <span class="o"><</span> <span class="n">MSGLEN</span><span class="p">:</span>
|
||||
<span class="n">sent</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sock</span><span class="o">.</span><span class="n">send</span><span class="p">(</span><span class="n">msg</span><span class="p">[</span><span class="n">totalsent</span><span class="p">:])</span>
|
||||
<span class="k">if</span> <span class="n">sent</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"socket connection broken"</span><span class="p">)</span>
|
||||
<span class="n">totalsent</span> <span class="o">=</span> <span class="n">totalsent</span> <span class="o">+</span> <span class="n">sent</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">myreceive</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">chunks</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">bytes_recd</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">while</span> <span class="n">bytes_recd</span> <span class="o"><</span> <span class="n">MSGLEN</span><span class="p">:</span>
|
||||
<span class="n">chunk</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sock</span><span class="o">.</span><span class="n">recv</span><span class="p">(</span><span class="nb">min</span><span class="p">(</span><span class="n">MSGLEN</span> <span class="o">-</span> <span class="n">bytes_recd</span><span class="p">,</span> <span class="mi">2048</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">chunk</span> <span class="o">==</span> <span class="sa">b</span><span class="s1">''</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"socket connection broken"</span><span class="p">)</span>
|
||||
<span class="n">chunks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">chunk</span><span class="p">)</span>
|
||||
<span class="n">bytes_recd</span> <span class="o">=</span> <span class="n">bytes_recd</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">chunk</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="sa">b</span><span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">chunks</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The sending code here is usable for almost any messaging scheme - in Python you
|
||||
send strings, and you can use <code class="docutils literal notranslate"><span class="pre">len()</span></code> to determine its length (even if it has
|
||||
embedded <code class="docutils literal notranslate"><span class="pre">\0</span></code> characters). It’s mostly the receiving code that gets more
|
||||
complex. (And in C, it’s not much worse, except you can’t use <code class="docutils literal notranslate"><span class="pre">strlen</span></code> if the
|
||||
message has embedded <code class="docutils literal notranslate"><span class="pre">\0</span></code>s.)</p>
|
||||
<p>The easiest enhancement is to make the first character of the message an
|
||||
indicator of message type, and have the type determine the length. Now you have
|
||||
two <code class="docutils literal notranslate"><span class="pre">recv</span></code>s - the first to get (at least) that first character so you can
|
||||
look up the length, and the second in a loop to get the rest. If you decide to
|
||||
go the delimited route, you’ll be receiving in some arbitrary chunk size, (4096
|
||||
or 8192 is frequently a good match for network buffer sizes), and scanning what
|
||||
you’ve received for a delimiter.</p>
|
||||
<p>One complication to be aware of: if your conversational protocol allows multiple
|
||||
messages to be sent back to back (without some kind of reply), and you pass
|
||||
<code class="docutils literal notranslate"><span class="pre">recv</span></code> an arbitrary chunk size, you may end up reading the start of a
|
||||
following message. You’ll need to put that aside and hold onto it, until it’s
|
||||
needed.</p>
|
||||
<p>Prefixing the message with its length (say, as 5 numeric characters) gets more
|
||||
complex, because (believe it or not), you may not get all 5 characters in one
|
||||
<code class="docutils literal notranslate"><span class="pre">recv</span></code>. In playing around, you’ll get away with it; but in high network loads,
|
||||
your code will very quickly break unless you use two <code class="docutils literal notranslate"><span class="pre">recv</span></code> loops - the first
|
||||
to determine the length, the second to get the data part of the message. Nasty.
|
||||
This is also when you’ll discover that <code class="docutils literal notranslate"><span class="pre">send</span></code> does not always manage to get
|
||||
rid of everything in one pass. And despite having read this, you will eventually
|
||||
get bit by it!</p>
|
||||
<p>In the interests of space, building your character, (and preserving my
|
||||
competitive position), these enhancements are left as an exercise for the
|
||||
reader. Lets move on to cleaning up.</p>
|
||||
<div class="section" id="binary-data">
|
||||
<h3>Binary Data<a class="headerlink" href="#binary-data" title="Permalink to this headline">¶</a></h3>
|
||||
<p>It is perfectly possible to send binary data over a socket. The major problem is
|
||||
that not all machines use the same formats for binary data. For example, a
|
||||
Motorola chip will represent a 16 bit integer with the value 1 as the two hex
|
||||
bytes 00 01. Intel and DEC, however, are byte-reversed - that same 1 is 01 00.
|
||||
Socket libraries have calls for converting 16 and 32 bit integers - <code class="docutils literal notranslate"><span class="pre">ntohl,</span>
|
||||
<span class="pre">htonl,</span> <span class="pre">ntohs,</span> <span class="pre">htons</span></code> where “n” means <em>network</em> and “h” means <em>host</em>, “s” means
|
||||
<em>short</em> and “l” means <em>long</em>. Where network order is host order, these do
|
||||
nothing, but where the machine is byte-reversed, these swap the bytes around
|
||||
appropriately.</p>
|
||||
<p>In these days of 32 bit machines, the ascii representation of binary data is
|
||||
frequently smaller than the binary representation. That’s because a surprising
|
||||
amount of the time, all those longs have the value 0, or maybe 1. The string “0”
|
||||
would be two bytes, while binary is four. Of course, this doesn’t fit well with
|
||||
fixed-length messages. Decisions, decisions.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="disconnecting">
|
||||
<h2>Disconnecting<a class="headerlink" href="#disconnecting" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Strictly speaking, you’re supposed to use <code class="docutils literal notranslate"><span class="pre">shutdown</span></code> on a socket before you
|
||||
<code class="docutils literal notranslate"><span class="pre">close</span></code> it. The <code class="docutils literal notranslate"><span class="pre">shutdown</span></code> is an advisory to the socket at the other end.
|
||||
Depending on the argument you pass it, it can mean “I’m not going to send
|
||||
anymore, but I’ll still listen”, or “I’m not listening, good riddance!”. Most
|
||||
socket libraries, however, are so used to programmers neglecting to use this
|
||||
piece of etiquette that normally a <code class="docutils literal notranslate"><span class="pre">close</span></code> is the same as <code class="docutils literal notranslate"><span class="pre">shutdown();</span>
|
||||
<span class="pre">close()</span></code>. So in most situations, an explicit <code class="docutils literal notranslate"><span class="pre">shutdown</span></code> is not needed.</p>
|
||||
<p>One way to use <code class="docutils literal notranslate"><span class="pre">shutdown</span></code> effectively is in an HTTP-like exchange. The client
|
||||
sends a request and then does a <code class="docutils literal notranslate"><span class="pre">shutdown(1)</span></code>. This tells the server “This
|
||||
client is done sending, but can still receive.” The server can detect “EOF” by
|
||||
a receive of 0 bytes. It can assume it has the complete request. The server
|
||||
sends a reply. If the <code class="docutils literal notranslate"><span class="pre">send</span></code> completes successfully then, indeed, the client
|
||||
was still receiving.</p>
|
||||
<p>Python takes the automatic shutdown a step further, and says that when a socket
|
||||
is garbage collected, it will automatically do a <code class="docutils literal notranslate"><span class="pre">close</span></code> if it’s needed. But
|
||||
relying on this is a very bad habit. If your socket just disappears without
|
||||
doing a <code class="docutils literal notranslate"><span class="pre">close</span></code>, the socket at the other end may hang indefinitely, thinking
|
||||
you’re just being slow. <em>Please</em> <code class="docutils literal notranslate"><span class="pre">close</span></code> your sockets when you’re done.</p>
|
||||
<div class="section" id="when-sockets-die">
|
||||
<h3>When Sockets Die<a class="headerlink" href="#when-sockets-die" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Probably the worst thing about using blocking sockets is what happens when the
|
||||
other side comes down hard (without doing a <code class="docutils literal notranslate"><span class="pre">close</span></code>). Your socket is likely to
|
||||
hang. TCP is a reliable protocol, and it will wait a long, long time
|
||||
before giving up on a connection. If you’re using threads, the entire thread is
|
||||
essentially dead. There’s not much you can do about it. As long as you aren’t
|
||||
doing something dumb, like holding a lock while doing a blocking read, the
|
||||
thread isn’t really consuming much in the way of resources. Do <em>not</em> try to kill
|
||||
the thread - part of the reason that threads are more efficient than processes
|
||||
is that they avoid the overhead associated with the automatic recycling of
|
||||
resources. In other words, if you do manage to kill the thread, your whole
|
||||
process is likely to be screwed up.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="non-blocking-sockets">
|
||||
<h2>Non-blocking Sockets<a class="headerlink" href="#non-blocking-sockets" title="Permalink to this headline">¶</a></h2>
|
||||
<p>If you’ve understood the preceding, you already know most of what you need to
|
||||
know about the mechanics of using sockets. You’ll still use the same calls, in
|
||||
much the same ways. It’s just that, if you do it right, your app will be almost
|
||||
inside-out.</p>
|
||||
<p>In Python, you use <code class="docutils literal notranslate"><span class="pre">socket.setblocking(0)</span></code> to make it non-blocking. In C, it’s
|
||||
more complex, (for one thing, you’ll need to choose between the BSD flavor
|
||||
<code class="docutils literal notranslate"><span class="pre">O_NONBLOCK</span></code> and the almost indistinguishable Posix flavor <code class="docutils literal notranslate"><span class="pre">O_NDELAY</span></code>, which
|
||||
is completely different from <code class="docutils literal notranslate"><span class="pre">TCP_NODELAY</span></code>), but it’s the exact same idea. You
|
||||
do this after creating the socket, but before using it. (Actually, if you’re
|
||||
nuts, you can switch back and forth.)</p>
|
||||
<p>The major mechanical difference is that <code class="docutils literal notranslate"><span class="pre">send</span></code>, <code class="docutils literal notranslate"><span class="pre">recv</span></code>, <code class="docutils literal notranslate"><span class="pre">connect</span></code> and
|
||||
<code class="docutils literal notranslate"><span class="pre">accept</span></code> can return without having done anything. You have (of course) a
|
||||
number of choices. You can check return code and error codes and generally drive
|
||||
yourself crazy. If you don’t believe me, try it sometime. Your app will grow
|
||||
large, buggy and suck CPU. So let’s skip the brain-dead solutions and do it
|
||||
right.</p>
|
||||
<p>Use <code class="docutils literal notranslate"><span class="pre">select</span></code>.</p>
|
||||
<p>In C, coding <code class="docutils literal notranslate"><span class="pre">select</span></code> is fairly complex. In Python, it’s a piece of cake, but
|
||||
it’s close enough to the C version that if you understand <code class="docutils literal notranslate"><span class="pre">select</span></code> in Python,
|
||||
you’ll have little trouble with it in C:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">ready_to_read</span><span class="p">,</span> <span class="n">ready_to_write</span><span class="p">,</span> <span class="n">in_error</span> <span class="o">=</span> \
|
||||
<span class="n">select</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
|
||||
<span class="n">potential_readers</span><span class="p">,</span>
|
||||
<span class="n">potential_writers</span><span class="p">,</span>
|
||||
<span class="n">potential_errs</span><span class="p">,</span>
|
||||
<span class="n">timeout</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>You pass <code class="docutils literal notranslate"><span class="pre">select</span></code> three lists: the first contains all sockets that you might
|
||||
want to try reading; the second all the sockets you might want to try writing
|
||||
to, and the last (normally left empty) those that you want to check for errors.
|
||||
You should note that a socket can go into more than one list. The <code class="docutils literal notranslate"><span class="pre">select</span></code>
|
||||
call is blocking, but you can give it a timeout. This is generally a sensible
|
||||
thing to do - give it a nice long timeout (say a minute) unless you have good
|
||||
reason to do otherwise.</p>
|
||||
<p>In return, you will get three lists. They contain the sockets that are actually
|
||||
readable, writable and in error. Each of these lists is a subset (possibly
|
||||
empty) of the corresponding list you passed in.</p>
|
||||
<p>If a socket is in the output readable list, you can be
|
||||
as-close-to-certain-as-we-ever-get-in-this-business that a <code class="docutils literal notranslate"><span class="pre">recv</span></code> on that
|
||||
socket will return <em>something</em>. Same idea for the writable list. You’ll be able
|
||||
to send <em>something</em>. Maybe not all you want to, but <em>something</em> is better than
|
||||
nothing. (Actually, any reasonably healthy socket will return as writable - it
|
||||
just means outbound network buffer space is available.)</p>
|
||||
<p>If you have a “server” socket, put it in the potential_readers list. If it comes
|
||||
out in the readable list, your <code class="docutils literal notranslate"><span class="pre">accept</span></code> will (almost certainly) work. If you
|
||||
have created a new socket to <code class="docutils literal notranslate"><span class="pre">connect</span></code> to someone else, put it in the
|
||||
potential_writers list. If it shows up in the writable list, you have a decent
|
||||
chance that it has connected.</p>
|
||||
<p>Actually, <code class="docutils literal notranslate"><span class="pre">select</span></code> can be handy even with blocking sockets. It’s one way of
|
||||
determining whether you will block - the socket returns as readable when there’s
|
||||
something in the buffers. However, this still doesn’t help with the problem of
|
||||
determining whether the other end is done, or just busy with something else.</p>
|
||||
<p><strong>Portability alert</strong>: On Unix, <code class="docutils literal notranslate"><span class="pre">select</span></code> works both with the sockets and
|
||||
files. Don’t try this on Windows. On Windows, <code class="docutils literal notranslate"><span class="pre">select</span></code> works with sockets
|
||||
only. Also note that in C, many of the more advanced socket options are done
|
||||
differently on Windows. In fact, on Windows I usually use threads (which work
|
||||
very, very well) with my sockets.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">Socket Programming HOWTO</a><ul>
|
||||
<li><a class="reference internal" href="#sockets">Sockets</a><ul>
|
||||
<li><a class="reference internal" href="#history">History</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#creating-a-socket">Creating a Socket</a><ul>
|
||||
<li><a class="reference internal" href="#ipc">IPC</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#using-a-socket">Using a Socket</a><ul>
|
||||
<li><a class="reference internal" href="#binary-data">Binary Data</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#disconnecting">Disconnecting</a><ul>
|
||||
<li><a class="reference internal" href="#when-sockets-die">When Sockets Die</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#non-blocking-sockets">Non-blocking Sockets</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="regex.html"
|
||||
title="previous chapter">Regular Expression HOWTO</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="sorting.html"
|
||||
title="next chapter">Sorting HOW TO</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/sockets.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="sorting.html" title="Sorting HOW TO"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="regex.html" title="Regular Expression HOWTO"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
473
python-3.7.4-docs-html/howto/sorting.html
Normal file
473
python-3.7.4-docs-html/howto/sorting.html
Normal file
@@ -0,0 +1,473 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Sorting HOW TO — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Unicode HOWTO" href="unicode.html" />
|
||||
<link rel="prev" title="Socket Programming HOWTO" href="sockets.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/sorting.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="unicode.html" title="Unicode HOWTO"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="sockets.html" title="Socket Programming HOWTO"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="sorting-how-to">
|
||||
<span id="sortinghowto"></span><h1>Sorting HOW TO<a class="headerlink" href="#sorting-how-to" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Author</dt>
|
||||
<dd class="field-odd"><p>Andrew Dalke and Raymond Hettinger</p>
|
||||
</dd>
|
||||
<dt class="field-even">Release</dt>
|
||||
<dd class="field-even"><p>0.1</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<p>Python lists have a built-in <a class="reference internal" href="../library/stdtypes.html#list.sort" title="list.sort"><code class="xref py py-meth docutils literal notranslate"><span class="pre">list.sort()</span></code></a> method that modifies the list
|
||||
in-place. There is also a <a class="reference internal" href="../library/functions.html#sorted" title="sorted"><code class="xref py py-func docutils literal notranslate"><span class="pre">sorted()</span></code></a> built-in function that builds a new
|
||||
sorted list from an iterable.</p>
|
||||
<p>In this document, we explore the various techniques for sorting data using Python.</p>
|
||||
<div class="section" id="sorting-basics">
|
||||
<h2>Sorting Basics<a class="headerlink" href="#sorting-basics" title="Permalink to this headline">¶</a></h2>
|
||||
<p>A simple ascending sort is very easy: just call the <a class="reference internal" href="../library/functions.html#sorted" title="sorted"><code class="xref py py-func docutils literal notranslate"><span class="pre">sorted()</span></code></a> function. It
|
||||
returns a new sorted list:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
|
||||
<span class="go">[1, 2, 3, 4, 5]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>You can also use the <a class="reference internal" href="../library/stdtypes.html#list.sort" title="list.sort"><code class="xref py py-meth docutils literal notranslate"><span class="pre">list.sort()</span></code></a> method. It modifies the list
|
||||
in-place (and returns <code class="docutils literal notranslate"><span class="pre">None</span></code> to avoid confusion). Usually it’s less convenient
|
||||
than <a class="reference internal" href="../library/functions.html#sorted" title="sorted"><code class="xref py py-func docutils literal notranslate"><span class="pre">sorted()</span></code></a> - but if you don’t need the original list, it’s slightly
|
||||
more efficient.</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">a</span> <span class="o">=</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">]</span>
|
||||
<span class="gp">>>> </span><span class="n">a</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
||||
<span class="gp">>>> </span><span class="n">a</span>
|
||||
<span class="go">[1, 2, 3, 4, 5]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Another difference is that the <a class="reference internal" href="../library/stdtypes.html#list.sort" title="list.sort"><code class="xref py py-meth docutils literal notranslate"><span class="pre">list.sort()</span></code></a> method is only defined for
|
||||
lists. In contrast, the <a class="reference internal" href="../library/functions.html#sorted" title="sorted"><code class="xref py py-func docutils literal notranslate"><span class="pre">sorted()</span></code></a> function accepts any iterable.</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="s1">'D'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'B'</span><span class="p">,</span> <span class="mi">3</span><span class="p">:</span> <span class="s1">'B'</span><span class="p">,</span> <span class="mi">4</span><span class="p">:</span> <span class="s1">'E'</span><span class="p">,</span> <span class="mi">5</span><span class="p">:</span> <span class="s1">'A'</span><span class="p">})</span>
|
||||
<span class="go">[1, 2, 3, 4, 5]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="key-functions">
|
||||
<h2>Key Functions<a class="headerlink" href="#key-functions" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Both <a class="reference internal" href="../library/stdtypes.html#list.sort" title="list.sort"><code class="xref py py-meth docutils literal notranslate"><span class="pre">list.sort()</span></code></a> and <a class="reference internal" href="../library/functions.html#sorted" title="sorted"><code class="xref py py-func docutils literal notranslate"><span class="pre">sorted()</span></code></a> have a <em>key</em> parameter to specify a
|
||||
function to be called on each list element prior to making comparisons.</p>
|
||||
<p>For example, here’s a case-insensitive string comparison:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="s2">"This is a test string from Andrew"</span><span class="o">.</span><span class="n">split</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="nb">str</span><span class="o">.</span><span class="n">lower</span><span class="p">)</span>
|
||||
<span class="go">['a', 'Andrew', 'from', 'is', 'string', 'test', 'This']</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The value of the <em>key</em> parameter should be a function that takes a single argument
|
||||
and returns a key to use for sorting purposes. This technique is fast because
|
||||
the key function is called exactly once for each input record.</p>
|
||||
<p>A common pattern is to sort complex objects using some of the object’s indices
|
||||
as keys. For example:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">student_tuples</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="gp">... </span> <span class="p">(</span><span class="s1">'john'</span><span class="p">,</span> <span class="s1">'A'</span><span class="p">,</span> <span class="mi">15</span><span class="p">),</span>
|
||||
<span class="gp">... </span> <span class="p">(</span><span class="s1">'jane'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span>
|
||||
<span class="gp">... </span> <span class="p">(</span><span class="s1">'dave'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span>
|
||||
<span class="gp">... </span><span class="p">]</span>
|
||||
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">student_tuples</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">student</span><span class="p">:</span> <span class="n">student</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span> <span class="c1"># sort by age</span>
|
||||
<span class="go">[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The same technique works for objects with named attributes. For example:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="k">class</span> <span class="nc">Student</span><span class="p">:</span>
|
||||
<span class="gp">... </span> <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">grade</span><span class="p">,</span> <span class="n">age</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
|
||||
<span class="gp">... </span> <span class="bp">self</span><span class="o">.</span><span class="n">grade</span> <span class="o">=</span> <span class="n">grade</span>
|
||||
<span class="gp">... </span> <span class="bp">self</span><span class="o">.</span><span class="n">age</span> <span class="o">=</span> <span class="n">age</span>
|
||||
<span class="gp">... </span> <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="k">return</span> <span class="nb">repr</span><span class="p">((</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">grade</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">age</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">student_objects</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="gp">... </span> <span class="n">Student</span><span class="p">(</span><span class="s1">'john'</span><span class="p">,</span> <span class="s1">'A'</span><span class="p">,</span> <span class="mi">15</span><span class="p">),</span>
|
||||
<span class="gp">... </span> <span class="n">Student</span><span class="p">(</span><span class="s1">'jane'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span>
|
||||
<span class="gp">... </span> <span class="n">Student</span><span class="p">(</span><span class="s1">'dave'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span>
|
||||
<span class="gp">... </span><span class="p">]</span>
|
||||
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">student_objects</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">student</span><span class="p">:</span> <span class="n">student</span><span class="o">.</span><span class="n">age</span><span class="p">)</span> <span class="c1"># sort by age</span>
|
||||
<span class="go">[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="operator-module-functions">
|
||||
<h2>Operator Module Functions<a class="headerlink" href="#operator-module-functions" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The key-function patterns shown above are very common, so Python provides
|
||||
convenience functions to make accessor functions easier and faster. The
|
||||
<a class="reference internal" href="../library/operator.html#module-operator" title="operator: Functions corresponding to the standard operators."><code class="xref py py-mod docutils literal notranslate"><span class="pre">operator</span></code></a> module has <a class="reference internal" href="../library/operator.html#operator.itemgetter" title="operator.itemgetter"><code class="xref py py-func docutils literal notranslate"><span class="pre">itemgetter()</span></code></a>,
|
||||
<a class="reference internal" href="../library/operator.html#operator.attrgetter" title="operator.attrgetter"><code class="xref py py-func docutils literal notranslate"><span class="pre">attrgetter()</span></code></a>, and a <a class="reference internal" href="../library/operator.html#operator.methodcaller" title="operator.methodcaller"><code class="xref py py-func docutils literal notranslate"><span class="pre">methodcaller()</span></code></a> function.</p>
|
||||
<p>Using those functions, the above examples become simpler and faster:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">operator</span> <span class="k">import</span> <span class="n">itemgetter</span><span class="p">,</span> <span class="n">attrgetter</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">student_tuples</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">2</span><span class="p">))</span>
|
||||
<span class="go">[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">student_objects</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">attrgetter</span><span class="p">(</span><span class="s1">'age'</span><span class="p">))</span>
|
||||
<span class="go">[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The operator module functions allow multiple levels of sorting. For example, to
|
||||
sort by <em>grade</em> then by <em>age</em>:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">student_tuples</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">))</span>
|
||||
<span class="go">[('john', 'A', 15), ('dave', 'B', 10), ('jane', 'B', 12)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">student_objects</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">attrgetter</span><span class="p">(</span><span class="s1">'grade'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">))</span>
|
||||
<span class="go">[('john', 'A', 15), ('dave', 'B', 10), ('jane', 'B', 12)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="ascending-and-descending">
|
||||
<h2>Ascending and Descending<a class="headerlink" href="#ascending-and-descending" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Both <a class="reference internal" href="../library/stdtypes.html#list.sort" title="list.sort"><code class="xref py py-meth docutils literal notranslate"><span class="pre">list.sort()</span></code></a> and <a class="reference internal" href="../library/functions.html#sorted" title="sorted"><code class="xref py py-func docutils literal notranslate"><span class="pre">sorted()</span></code></a> accept a <em>reverse</em> parameter with a
|
||||
boolean value. This is used to flag descending sorts. For example, to get the
|
||||
student data in reverse <em>age</em> order:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">student_tuples</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="go">[('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">student_objects</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">attrgetter</span><span class="p">(</span><span class="s1">'age'</span><span class="p">),</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="go">[('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="sort-stability-and-complex-sorts">
|
||||
<h2>Sort Stability and Complex Sorts<a class="headerlink" href="#sort-stability-and-complex-sorts" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Sorts are guaranteed to be <a class="reference external" href="https://en.wikipedia.org/wiki/Sorting_algorithm#Stability">stable</a>. That means that
|
||||
when multiple records have the same key, their original order is preserved.</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="p">[(</span><span class="s1">'red'</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="p">(</span><span class="s1">'blue'</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="p">(</span><span class="s1">'red'</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="p">(</span><span class="s1">'blue'</span><span class="p">,</span> <span class="mi">2</span><span class="p">)]</span>
|
||||
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span>
|
||||
<span class="go">[('blue', 1), ('blue', 2), ('red', 1), ('red', 2)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Notice how the two records for <em>blue</em> retain their original order so that
|
||||
<code class="docutils literal notranslate"><span class="pre">('blue',</span> <span class="pre">1)</span></code> is guaranteed to precede <code class="docutils literal notranslate"><span class="pre">('blue',</span> <span class="pre">2)</span></code>.</p>
|
||||
<p>This wonderful property lets you build complex sorts in a series of sorting
|
||||
steps. For example, to sort the student data by descending <em>grade</em> and then
|
||||
ascending <em>age</em>, do the <em>age</em> sort first and then sort again using <em>grade</em>:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">s</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">student_objects</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">attrgetter</span><span class="p">(</span><span class="s1">'age'</span><span class="p">))</span> <span class="c1"># sort on secondary key</span>
|
||||
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">attrgetter</span><span class="p">(</span><span class="s1">'grade'</span><span class="p">),</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="c1"># now sort on primary key, descending</span>
|
||||
<span class="go">[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The <a class="reference external" href="https://en.wikipedia.org/wiki/Timsort">Timsort</a> algorithm used in Python
|
||||
does multiple sorts efficiently because it can take advantage of any ordering
|
||||
already present in a dataset.</p>
|
||||
</div>
|
||||
<div class="section" id="the-old-way-using-decorate-sort-undecorate">
|
||||
<h2>The Old Way Using Decorate-Sort-Undecorate<a class="headerlink" href="#the-old-way-using-decorate-sort-undecorate" title="Permalink to this headline">¶</a></h2>
|
||||
<p>This idiom is called Decorate-Sort-Undecorate after its three steps:</p>
|
||||
<ul class="simple">
|
||||
<li><p>First, the initial list is decorated with new values that control the sort order.</p></li>
|
||||
<li><p>Second, the decorated list is sorted.</p></li>
|
||||
<li><p>Finally, the decorations are removed, creating a list that contains only the
|
||||
initial values in the new order.</p></li>
|
||||
</ul>
|
||||
<p>For example, to sort the student data by <em>grade</em> using the DSU approach:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">decorated</span> <span class="o">=</span> <span class="p">[(</span><span class="n">student</span><span class="o">.</span><span class="n">grade</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">student</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">student</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">student_objects</span><span class="p">)]</span>
|
||||
<span class="gp">>>> </span><span class="n">decorated</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
||||
<span class="gp">>>> </span><span class="p">[</span><span class="n">student</span> <span class="k">for</span> <span class="n">grade</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">student</span> <span class="ow">in</span> <span class="n">decorated</span><span class="p">]</span> <span class="c1"># undecorate</span>
|
||||
<span class="go">[('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>This idiom works because tuples are compared lexicographically; the first items
|
||||
are compared; if they are the same then the second items are compared, and so
|
||||
on.</p>
|
||||
<p>It is not strictly necessary in all cases to include the index <em>i</em> in the
|
||||
decorated list, but including it gives two benefits:</p>
|
||||
<ul class="simple">
|
||||
<li><p>The sort is stable – if two items have the same key, their order will be
|
||||
preserved in the sorted list.</p></li>
|
||||
<li><p>The original items do not have to be comparable because the ordering of the
|
||||
decorated tuples will be determined by at most the first two items. So for
|
||||
example the original list could contain complex numbers which cannot be sorted
|
||||
directly.</p></li>
|
||||
</ul>
|
||||
<p>Another name for this idiom is
|
||||
<a class="reference external" href="https://en.wikipedia.org/wiki/Schwartzian_transform">Schwartzian transform</a>,
|
||||
after Randal L. Schwartz, who popularized it among Perl programmers.</p>
|
||||
<p>Now that Python sorting provides key-functions, this technique is not often needed.</p>
|
||||
</div>
|
||||
<div class="section" id="the-old-way-using-the-cmp-parameter">
|
||||
<h2>The Old Way Using the <em>cmp</em> Parameter<a class="headerlink" href="#the-old-way-using-the-cmp-parameter" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Many constructs given in this HOWTO assume Python 2.4 or later. Before that,
|
||||
there was no <a class="reference internal" href="../library/functions.html#sorted" title="sorted"><code class="xref py py-func docutils literal notranslate"><span class="pre">sorted()</span></code></a> builtin and <a class="reference internal" href="../library/stdtypes.html#list.sort" title="list.sort"><code class="xref py py-meth docutils literal notranslate"><span class="pre">list.sort()</span></code></a> took no keyword
|
||||
arguments. Instead, all of the Py2.x versions supported a <em>cmp</em> parameter to
|
||||
handle user specified comparison functions.</p>
|
||||
<p>In Py3.0, the <em>cmp</em> parameter was removed entirely (as part of a larger effort to
|
||||
simplify and unify the language, eliminating the conflict between rich
|
||||
comparisons and the <code class="xref py py-meth docutils literal notranslate"><span class="pre">__cmp__()</span></code> magic method).</p>
|
||||
<p>In Py2.x, sort allowed an optional function which can be called for doing the
|
||||
comparisons. That function should take two arguments to be compared and then
|
||||
return a negative value for less-than, return zero if they are equal, or return
|
||||
a positive value for greater-than. For example, we can do:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="k">def</span> <span class="nf">numeric_compare</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="k">return</span> <span class="n">x</span> <span class="o">-</span> <span class="n">y</span>
|
||||
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="nb">cmp</span><span class="o">=</span><span class="n">numeric_compare</span><span class="p">)</span> <span class="c1"># doctest: +SKIP</span>
|
||||
<span class="go">[1, 2, 3, 4, 5]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Or you can reverse the order of comparison with:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="k">def</span> <span class="nf">reverse_numeric</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
||||
<span class="gp">... </span> <span class="k">return</span> <span class="n">y</span> <span class="o">-</span> <span class="n">x</span>
|
||||
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="nb">cmp</span><span class="o">=</span><span class="n">reverse_numeric</span><span class="p">)</span> <span class="c1"># doctest: +SKIP</span>
|
||||
<span class="go">[5, 4, 3, 2, 1]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>When porting code from Python 2.x to 3.x, the situation can arise when you have
|
||||
the user supplying a comparison function and you need to convert that to a key
|
||||
function. The following wrapper makes that easy to do:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">cmp_to_key</span><span class="p">(</span><span class="n">mycmp</span><span class="p">):</span>
|
||||
<span class="s1">'Convert a cmp= function into a key= function'</span>
|
||||
<span class="k">class</span> <span class="nc">K</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">obj</span> <span class="o">=</span> <span class="n">obj</span>
|
||||
<span class="k">def</span> <span class="nf">__lt__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">mycmp</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">obj</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">obj</span><span class="p">)</span> <span class="o"><</span> <span class="mi">0</span>
|
||||
<span class="k">def</span> <span class="nf">__gt__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">mycmp</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">obj</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">obj</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span>
|
||||
<span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">mycmp</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">obj</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">obj</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span>
|
||||
<span class="k">def</span> <span class="nf">__le__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">mycmp</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">obj</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">obj</span><span class="p">)</span> <span class="o"><=</span> <span class="mi">0</span>
|
||||
<span class="k">def</span> <span class="nf">__ge__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">mycmp</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">obj</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">obj</span><span class="p">)</span> <span class="o">>=</span> <span class="mi">0</span>
|
||||
<span class="k">def</span> <span class="nf">__ne__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">mycmp</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">obj</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">obj</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">0</span>
|
||||
<span class="k">return</span> <span class="n">K</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>To convert to a key function, just wrap the old comparison function:</p>
|
||||
<div class="highlight-pycon3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">sorted</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="n">key</span><span class="o">=</span><span class="n">cmp_to_key</span><span class="p">(</span><span class="n">reverse_numeric</span><span class="p">))</span>
|
||||
<span class="go">[5, 4, 3, 2, 1]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>In Python 3.2, the <a class="reference internal" href="../library/functools.html#functools.cmp_to_key" title="functools.cmp_to_key"><code class="xref py py-func docutils literal notranslate"><span class="pre">functools.cmp_to_key()</span></code></a> function was added to the
|
||||
<a class="reference internal" href="../library/functools.html#module-functools" title="functools: Higher-order functions and operations on callable objects."><code class="xref py py-mod docutils literal notranslate"><span class="pre">functools</span></code></a> module in the standard library.</p>
|
||||
</div>
|
||||
<div class="section" id="odd-and-ends">
|
||||
<h2>Odd and Ends<a class="headerlink" href="#odd-and-ends" title="Permalink to this headline">¶</a></h2>
|
||||
<ul>
|
||||
<li><p>For locale aware sorting, use <a class="reference internal" href="../library/locale.html#locale.strxfrm" title="locale.strxfrm"><code class="xref py py-func docutils literal notranslate"><span class="pre">locale.strxfrm()</span></code></a> for a key function or
|
||||
<a class="reference internal" href="../library/locale.html#locale.strcoll" title="locale.strcoll"><code class="xref py py-func docutils literal notranslate"><span class="pre">locale.strcoll()</span></code></a> for a comparison function.</p></li>
|
||||
<li><p>The <em>reverse</em> parameter still maintains sort stability (so that records with
|
||||
equal keys retain the original order). Interestingly, that effect can be
|
||||
simulated without the parameter by using the builtin <a class="reference internal" href="../library/functions.html#reversed" title="reversed"><code class="xref py py-func docutils literal notranslate"><span class="pre">reversed()</span></code></a> function
|
||||
twice:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="p">[(</span><span class="s1">'red'</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="p">(</span><span class="s1">'blue'</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="p">(</span><span class="s1">'red'</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="p">(</span><span class="s1">'blue'</span><span class="p">,</span> <span class="mi">2</span><span class="p">)]</span>
|
||||
<span class="gp">>>> </span><span class="n">standard_way</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">double_reversed</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">reversed</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="nb">reversed</span><span class="p">(</span><span class="n">data</span><span class="p">),</span> <span class="n">key</span><span class="o">=</span><span class="n">itemgetter</span><span class="p">(</span><span class="mi">0</span><span class="p">))))</span>
|
||||
<span class="gp">>>> </span><span class="k">assert</span> <span class="n">standard_way</span> <span class="o">==</span> <span class="n">double_reversed</span>
|
||||
<span class="gp">>>> </span><span class="n">standard_way</span>
|
||||
<span class="go">[('red', 1), ('red', 2), ('blue', 1), ('blue', 2)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</li>
|
||||
<li><p>The sort routines are guaranteed to use <a class="reference internal" href="../reference/datamodel.html#object.__lt__" title="object.__lt__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__lt__()</span></code></a> when making comparisons
|
||||
between two objects. So, it is easy to add a standard sort order to a class by
|
||||
defining an <a class="reference internal" href="../reference/datamodel.html#object.__lt__" title="object.__lt__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">__lt__()</span></code></a> method:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">Student</span><span class="o">.</span><span class="fm">__lt__</span> <span class="o">=</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">age</span> <span class="o"><</span> <span class="n">other</span><span class="o">.</span><span class="n">age</span>
|
||||
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">student_objects</span><span class="p">)</span>
|
||||
<span class="go">[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</li>
|
||||
<li><p>Key functions need not depend directly on the objects being sorted. A key
|
||||
function can also access external resources. For instance, if the student grades
|
||||
are stored in a dictionary, they can be used to sort a separate list of student
|
||||
names:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">students</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'dave'</span><span class="p">,</span> <span class="s1">'john'</span><span class="p">,</span> <span class="s1">'jane'</span><span class="p">]</span>
|
||||
<span class="gp">>>> </span><span class="n">newgrades</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'john'</span><span class="p">:</span> <span class="s1">'F'</span><span class="p">,</span> <span class="s1">'jane'</span><span class="p">:</span><span class="s1">'A'</span><span class="p">,</span> <span class="s1">'dave'</span><span class="p">:</span> <span class="s1">'C'</span><span class="p">}</span>
|
||||
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">students</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">newgrades</span><span class="o">.</span><span class="fm">__getitem__</span><span class="p">)</span>
|
||||
<span class="go">['jane', 'dave', 'john']</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">Sorting HOW TO</a><ul>
|
||||
<li><a class="reference internal" href="#sorting-basics">Sorting Basics</a></li>
|
||||
<li><a class="reference internal" href="#key-functions">Key Functions</a></li>
|
||||
<li><a class="reference internal" href="#operator-module-functions">Operator Module Functions</a></li>
|
||||
<li><a class="reference internal" href="#ascending-and-descending">Ascending and Descending</a></li>
|
||||
<li><a class="reference internal" href="#sort-stability-and-complex-sorts">Sort Stability and Complex Sorts</a></li>
|
||||
<li><a class="reference internal" href="#the-old-way-using-decorate-sort-undecorate">The Old Way Using Decorate-Sort-Undecorate</a></li>
|
||||
<li><a class="reference internal" href="#the-old-way-using-the-cmp-parameter">The Old Way Using the <em>cmp</em> Parameter</a></li>
|
||||
<li><a class="reference internal" href="#odd-and-ends">Odd and Ends</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="sockets.html"
|
||||
title="previous chapter">Socket Programming HOWTO</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="unicode.html"
|
||||
title="next chapter">Unicode HOWTO</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/sorting.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="unicode.html" title="Unicode HOWTO"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="sockets.html" title="Socket Programming HOWTO"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
878
python-3.7.4-docs-html/howto/unicode.html
Normal file
878
python-3.7.4-docs-html/howto/unicode.html
Normal file
@@ -0,0 +1,878 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Unicode HOWTO — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="HOWTO Fetch Internet Resources Using The urllib Package" href="urllib2.html" />
|
||||
<link rel="prev" title="Sorting HOW TO" href="sorting.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/unicode.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="urllib2.html" title="HOWTO Fetch Internet Resources Using The urllib Package"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="sorting.html" title="Sorting HOW TO"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="unicode-howto">
|
||||
<span id="id1"></span><h1>Unicode HOWTO<a class="headerlink" href="#unicode-howto" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Release</dt>
|
||||
<dd class="field-odd"><p>1.12</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<p>This HOWTO discusses Python’s support for the Unicode specification
|
||||
for representing textual data, and explains various problems that
|
||||
people commonly encounter when trying to work with Unicode.</p>
|
||||
<div class="section" id="introduction-to-unicode">
|
||||
<h2>Introduction to Unicode<a class="headerlink" href="#introduction-to-unicode" title="Permalink to this headline">¶</a></h2>
|
||||
<div class="section" id="definitions">
|
||||
<h3>Definitions<a class="headerlink" href="#definitions" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Today’s programs need to be able to handle a wide variety of
|
||||
characters. Applications are often internationalized to display
|
||||
messages and output in a variety of user-selectable languages; the
|
||||
same program might need to output an error message in English, French,
|
||||
Japanese, Hebrew, or Russian. Web content can be written in any of
|
||||
these languages and can also include a variety of emoji symbols.
|
||||
Python’s string type uses the Unicode Standard for representing
|
||||
characters, which lets Python programs work with all these different
|
||||
possible characters.</p>
|
||||
<p>Unicode (<a class="reference external" href="https://www.unicode.org/">https://www.unicode.org/</a>) is a specification that aims to
|
||||
list every character used by human languages and give each character
|
||||
its own unique code. The Unicode specifications are continually
|
||||
revised and updated to add new languages and symbols.</p>
|
||||
<p>A <strong>character</strong> is the smallest possible component of a text. ‘A’, ‘B’, ‘C’,
|
||||
etc., are all different characters. So are ‘È’ and ‘Í’. Characters vary
|
||||
depending on the language or context you’re talking
|
||||
about. For example, there’s a character for “Roman Numeral One”, ‘Ⅰ’, that’s
|
||||
separate from the uppercase letter ‘I’. They’ll usually look the same,
|
||||
but these are two different characters that have different meanings.</p>
|
||||
<p>The Unicode standard describes how characters are represented by
|
||||
<strong>code points</strong>. A code point value is an integer in the range 0 to
|
||||
0x10FFFF (about 1.1 million values, with some 110 thousand assigned so
|
||||
far). In the standard and in this document, a code point is written
|
||||
using the notation <code class="docutils literal notranslate"><span class="pre">U+265E</span></code> to mean the character with value
|
||||
<code class="docutils literal notranslate"><span class="pre">0x265e</span></code> (9,822 in decimal).</p>
|
||||
<p>The Unicode standard contains a lot of tables listing characters and
|
||||
their corresponding code points:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>0061 'a'; LATIN SMALL LETTER A
|
||||
0062 'b'; LATIN SMALL LETTER B
|
||||
0063 'c'; LATIN SMALL LETTER C
|
||||
...
|
||||
007B '{'; LEFT CURLY BRACKET
|
||||
...
|
||||
2167 'Ⅶ': ROMAN NUMERAL EIGHT
|
||||
2168 'Ⅸ': ROMAN NUMERAL NINE
|
||||
...
|
||||
265E '♞': BLACK CHESS KNIGHT
|
||||
265F '♟': BLACK CHESS PAWN
|
||||
...
|
||||
1F600 '😀': GRINNING FACE
|
||||
1F609 '😉': WINKING FACE
|
||||
...
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Strictly, these definitions imply that it’s meaningless to say ‘this is
|
||||
character <code class="docutils literal notranslate"><span class="pre">U+265E</span></code>’. <code class="docutils literal notranslate"><span class="pre">U+265E</span></code> is a code point, which represents some particular
|
||||
character; in this case, it represents the character ‘BLACK CHESS KNIGHT’,
|
||||
‘♞’. In
|
||||
informal contexts, this distinction between code points and characters will
|
||||
sometimes be forgotten.</p>
|
||||
<p>A character is represented on a screen or on paper by a set of graphical
|
||||
elements that’s called a <strong>glyph</strong>. The glyph for an uppercase A, for example,
|
||||
is two diagonal strokes and a horizontal stroke, though the exact details will
|
||||
depend on the font being used. Most Python code doesn’t need to worry about
|
||||
glyphs; figuring out the correct glyph to display is generally the job of a GUI
|
||||
toolkit or a terminal’s font renderer.</p>
|
||||
</div>
|
||||
<div class="section" id="encodings">
|
||||
<h3>Encodings<a class="headerlink" href="#encodings" title="Permalink to this headline">¶</a></h3>
|
||||
<p>To summarize the previous section: a Unicode string is a sequence of
|
||||
code points, which are numbers from 0 through <code class="docutils literal notranslate"><span class="pre">0x10FFFF</span></code> (1,114,111
|
||||
decimal). This sequence of code points needs to be represented in
|
||||
memory as a set of <strong>code units</strong>, and <strong>code units</strong> are then mapped
|
||||
to 8-bit bytes. The rules for translating a Unicode string into a
|
||||
sequence of bytes are called a <strong>character encoding</strong>, or just
|
||||
an <strong>encoding</strong>.</p>
|
||||
<p>The first encoding you might think of is using 32-bit integers as the
|
||||
code unit, and then using the CPU’s representation of 32-bit integers.
|
||||
In this representation, the string “Python” might look like this:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span> P y t h o n
|
||||
0x50 00 00 00 79 00 00 00 74 00 00 00 68 00 00 00 6f 00 00 00 6e 00 00 00
|
||||
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>This representation is straightforward but using it presents a number of
|
||||
problems.</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p>It’s not portable; different processors order the bytes differently.</p></li>
|
||||
<li><p>It’s very wasteful of space. In most texts, the majority of the code points
|
||||
are less than 127, or less than 255, so a lot of space is occupied by <code class="docutils literal notranslate"><span class="pre">0x00</span></code>
|
||||
bytes. The above string takes 24 bytes compared to the 6 bytes needed for an
|
||||
ASCII representation. Increased RAM usage doesn’t matter too much (desktop
|
||||
computers have gigabytes of RAM, and strings aren’t usually that large), but
|
||||
expanding our usage of disk and network bandwidth by a factor of 4 is
|
||||
intolerable.</p></li>
|
||||
<li><p>It’s not compatible with existing C functions such as <code class="docutils literal notranslate"><span class="pre">strlen()</span></code>, so a new
|
||||
family of wide string functions would need to be used.</p></li>
|
||||
</ol>
|
||||
<p>Therefore this encoding isn’t used very much, and people instead choose other
|
||||
encodings that are more efficient and convenient, such as UTF-8.</p>
|
||||
<p>UTF-8 is one of the most commonly used encodings, and Python often
|
||||
defaults to using it. UTF stands for “Unicode Transformation Format”,
|
||||
and the ‘8’ means that 8-bit values are used in the encoding. (There
|
||||
are also UTF-16 and UTF-32 encodings, but they are less frequently
|
||||
used than UTF-8.) UTF-8 uses the following rules:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p>If the code point is < 128, it’s represented by the corresponding byte value.</p></li>
|
||||
<li><p>If the code point is >= 128, it’s turned into a sequence of two, three, or
|
||||
four bytes, where each byte of the sequence is between 128 and 255.</p></li>
|
||||
</ol>
|
||||
<p>UTF-8 has several convenient properties:</p>
|
||||
<ol class="arabic simple">
|
||||
<li><p>It can handle any Unicode code point.</p></li>
|
||||
<li><p>A Unicode string is turned into a sequence of bytes that contains embedded
|
||||
zero bytes only where they represent the null character (U+0000). This means
|
||||
that UTF-8 strings can be processed by C functions such as <code class="docutils literal notranslate"><span class="pre">strcpy()</span></code> and sent
|
||||
through protocols that can’t handle zero bytes for anything other than
|
||||
end-of-string markers.</p></li>
|
||||
<li><p>A string of ASCII text is also valid UTF-8 text.</p></li>
|
||||
<li><p>UTF-8 is fairly compact; the majority of commonly used characters can be
|
||||
represented with one or two bytes.</p></li>
|
||||
<li><p>If bytes are corrupted or lost, it’s possible to determine the start of the
|
||||
next UTF-8-encoded code point and resynchronize. It’s also unlikely that
|
||||
random 8-bit data will look like valid UTF-8.</p></li>
|
||||
<li><p>UTF-8 is a byte oriented encoding. The encoding specifies that each
|
||||
character is represented by a specific sequence of one or more bytes. This
|
||||
avoids the byte-ordering issues that can occur with integer and word oriented
|
||||
encodings, like UTF-16 and UTF-32, where the sequence of bytes varies depending
|
||||
on the hardware on which the string was encoded.</p></li>
|
||||
</ol>
|
||||
</div>
|
||||
<div class="section" id="references">
|
||||
<h3>References<a class="headerlink" href="#references" title="Permalink to this headline">¶</a></h3>
|
||||
<p>The <a class="reference external" href="http://www.unicode.org">Unicode Consortium site</a> has character charts, a
|
||||
glossary, and PDF versions of the Unicode specification. Be prepared for some
|
||||
difficult reading. <a class="reference external" href="http://www.unicode.org/history/">A chronology</a> of the
|
||||
origin and development of Unicode is also available on the site.</p>
|
||||
<p>On the Computerphile Youtube channel, Tom Scott briefly
|
||||
<cite>discusses the history of Unicode and UTF-8 <https://www.youtube.com/watch?v=MijmeoH9LT4></cite>
|
||||
(9 minutes 36 seconds).</p>
|
||||
<p>To help understand the standard, Jukka Korpela has written <a class="reference external" href="http://jkorpela.fi/unicode/guide.html">an introductory
|
||||
guide</a> to reading the
|
||||
Unicode character tables.</p>
|
||||
<p>Another <a class="reference external" href="https://www.joelonsoftware.com/2003/10/08/the-absolute-minimum-every-software-developer-absolutely-positively-must-know-about-unicode-and-character-sets-no-excuses/">good introductory article</a>
|
||||
was written by Joel Spolsky.
|
||||
If this introduction didn’t make things clear to you, you should try
|
||||
reading this alternate article before continuing.</p>
|
||||
<p>Wikipedia entries are often helpful; see the entries for “<a class="reference external" href="https://en.wikipedia.org/wiki/Character_encoding">character encoding</a>” and <a class="reference external" href="https://en.wikipedia.org/wiki/UTF-8">UTF-8</a>, for example.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="python-s-unicode-support">
|
||||
<h2>Python’s Unicode Support<a class="headerlink" href="#python-s-unicode-support" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Now that you’ve learned the rudiments of Unicode, we can look at Python’s
|
||||
Unicode features.</p>
|
||||
<div class="section" id="the-string-type">
|
||||
<h3>The String Type<a class="headerlink" href="#the-string-type" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Since Python 3.0, the language’s <a class="reference internal" href="../library/stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> type contains Unicode
|
||||
characters, meaning any string created using <code class="docutils literal notranslate"><span class="pre">"unicode</span> <span class="pre">rocks!"</span></code>, <code class="docutils literal notranslate"><span class="pre">'unicode</span>
|
||||
<span class="pre">rocks!'</span></code>, or the triple-quoted string syntax is stored as Unicode.</p>
|
||||
<p>The default encoding for Python source code is UTF-8, so you can simply
|
||||
include a Unicode character in a string literal:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="p">:</span>
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="s1">'/tmp/input.txt'</span><span class="p">,</span> <span class="s1">'r'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="o">...</span>
|
||||
<span class="k">except</span> <span class="ne">OSError</span><span class="p">:</span>
|
||||
<span class="c1"># 'File not found' error message.</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Fichier non trouvé"</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Side note: Python 3 also supports using Unicode characters in identifiers:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">répertoire</span> <span class="o">=</span> <span class="s2">"/tmp/records.log"</span>
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">répertoire</span><span class="p">,</span> <span class="s2">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s2">"test</span><span class="se">\n</span><span class="s2">"</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>If you can’t enter a particular character in your editor or want to
|
||||
keep the source code ASCII-only for some reason, you can also use
|
||||
escape sequences in string literals. (Depending on your system,
|
||||
you may see the actual capital-delta glyph instead of a u escape.)</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="s2">"</span><span class="se">\N{GREEK CAPITAL LETTER DELTA}</span><span class="s2">"</span> <span class="c1"># Using the character name</span>
|
||||
<span class="go">'\u0394'</span>
|
||||
<span class="gp">>>> </span><span class="s2">"</span><span class="se">\u0394</span><span class="s2">"</span> <span class="c1"># Using a 16-bit hex value</span>
|
||||
<span class="go">'\u0394'</span>
|
||||
<span class="gp">>>> </span><span class="s2">"</span><span class="se">\U00000394</span><span class="s2">"</span> <span class="c1"># Using a 32-bit hex value</span>
|
||||
<span class="go">'\u0394'</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>In addition, one can create a string using the <a class="reference internal" href="../library/stdtypes.html#bytes.decode" title="bytes.decode"><code class="xref py py-func docutils literal notranslate"><span class="pre">decode()</span></code></a> method of
|
||||
<a class="reference internal" href="../library/stdtypes.html#bytes" title="bytes"><code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code></a>. This method takes an <em>encoding</em> argument, such as <code class="docutils literal notranslate"><span class="pre">UTF-8</span></code>,
|
||||
and optionally an <em>errors</em> argument.</p>
|
||||
<p>The <em>errors</em> argument specifies the response when the input string can’t be
|
||||
converted according to the encoding’s rules. Legal values for this argument are
|
||||
<code class="docutils literal notranslate"><span class="pre">'strict'</span></code> (raise a <a class="reference internal" href="../library/exceptions.html#UnicodeDecodeError" title="UnicodeDecodeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">UnicodeDecodeError</span></code></a> exception), <code class="docutils literal notranslate"><span class="pre">'replace'</span></code> (use
|
||||
<code class="docutils literal notranslate"><span class="pre">U+FFFD</span></code>, <code class="docutils literal notranslate"><span class="pre">REPLACEMENT</span> <span class="pre">CHARACTER</span></code>), <code class="docutils literal notranslate"><span class="pre">'ignore'</span></code> (just leave the
|
||||
character out of the Unicode result), or <code class="docutils literal notranslate"><span class="pre">'backslashreplace'</span></code> (inserts a
|
||||
<code class="docutils literal notranslate"><span class="pre">\xNN</span></code> escape sequence).
|
||||
The following examples show the differences:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="sa">b</span><span class="s1">'</span><span class="se">\x80</span><span class="s1">abc'</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">"utf-8"</span><span class="p">,</span> <span class="s2">"strict"</span><span class="p">)</span>
|
||||
<span class="gt">Traceback (most recent call last):</span>
|
||||
<span class="o">...</span>
|
||||
<span class="gr">UnicodeDecodeError</span>: <span class="n">'utf-8' codec can't decode byte 0x80 in position 0:</span>
|
||||
<span class="go"> invalid start byte</span>
|
||||
<span class="gp">>>> </span><span class="sa">b</span><span class="s1">'</span><span class="se">\x80</span><span class="s1">abc'</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">"utf-8"</span><span class="p">,</span> <span class="s2">"replace"</span><span class="p">)</span>
|
||||
<span class="go">'\ufffdabc'</span>
|
||||
<span class="gp">>>> </span><span class="sa">b</span><span class="s1">'</span><span class="se">\x80</span><span class="s1">abc'</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">"utf-8"</span><span class="p">,</span> <span class="s2">"backslashreplace"</span><span class="p">)</span>
|
||||
<span class="go">'\\x80abc'</span>
|
||||
<span class="gp">>>> </span><span class="sa">b</span><span class="s1">'</span><span class="se">\x80</span><span class="s1">abc'</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">"utf-8"</span><span class="p">,</span> <span class="s2">"ignore"</span><span class="p">)</span>
|
||||
<span class="go">'abc'</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Encodings are specified as strings containing the encoding’s name. Python
|
||||
comes with roughly 100 different encodings; see the Python Library Reference at
|
||||
<a class="reference internal" href="../library/codecs.html#standard-encodings"><span class="std std-ref">Standard Encodings</span></a> for a list. Some encodings have multiple names; for
|
||||
example, <code class="docutils literal notranslate"><span class="pre">'latin-1'</span></code>, <code class="docutils literal notranslate"><span class="pre">'iso_8859_1'</span></code> and <code class="docutils literal notranslate"><span class="pre">'8859</span></code>’ are all synonyms for
|
||||
the same encoding.</p>
|
||||
<p>One-character Unicode strings can also be created with the <a class="reference internal" href="../library/functions.html#chr" title="chr"><code class="xref py py-func docutils literal notranslate"><span class="pre">chr()</span></code></a>
|
||||
built-in function, which takes integers and returns a Unicode string of length 1
|
||||
that contains the corresponding code point. The reverse operation is the
|
||||
built-in <a class="reference internal" href="../library/functions.html#ord" title="ord"><code class="xref py py-func docutils literal notranslate"><span class="pre">ord()</span></code></a> function that takes a one-character Unicode string and
|
||||
returns the code point value:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">chr</span><span class="p">(</span><span class="mi">57344</span><span class="p">)</span>
|
||||
<span class="go">'\ue000'</span>
|
||||
<span class="gp">>>> </span><span class="nb">ord</span><span class="p">(</span><span class="s1">'</span><span class="se">\ue000</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="go">57344</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="converting-to-bytes">
|
||||
<h3>Converting to Bytes<a class="headerlink" href="#converting-to-bytes" title="Permalink to this headline">¶</a></h3>
|
||||
<p>The opposite method of <a class="reference internal" href="../library/stdtypes.html#bytes.decode" title="bytes.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">bytes.decode()</span></code></a> is <a class="reference internal" href="../library/stdtypes.html#str.encode" title="str.encode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">str.encode()</span></code></a>,
|
||||
which returns a <a class="reference internal" href="../library/stdtypes.html#bytes" title="bytes"><code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code></a> representation of the Unicode string, encoded in the
|
||||
requested <em>encoding</em>.</p>
|
||||
<p>The <em>errors</em> parameter is the same as the parameter of the
|
||||
<a class="reference internal" href="../library/stdtypes.html#bytes.decode" title="bytes.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">decode()</span></code></a> method but supports a few more possible handlers. As well as
|
||||
<code class="docutils literal notranslate"><span class="pre">'strict'</span></code>, <code class="docutils literal notranslate"><span class="pre">'ignore'</span></code>, and <code class="docutils literal notranslate"><span class="pre">'replace'</span></code> (which in this case
|
||||
inserts a question mark instead of the unencodable character), there is
|
||||
also <code class="docutils literal notranslate"><span class="pre">'xmlcharrefreplace'</span></code> (inserts an XML character reference),
|
||||
<code class="docutils literal notranslate"><span class="pre">backslashreplace</span></code> (inserts a <code class="docutils literal notranslate"><span class="pre">\uNNNN</span></code> escape sequence) and
|
||||
<code class="docutils literal notranslate"><span class="pre">namereplace</span></code> (inserts a <code class="docutils literal notranslate"><span class="pre">\N{...}</span></code> escape sequence).</p>
|
||||
<p>The following example shows the different results:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">u</span> <span class="o">=</span> <span class="nb">chr</span><span class="p">(</span><span class="mi">40960</span><span class="p">)</span> <span class="o">+</span> <span class="s1">'abcd'</span> <span class="o">+</span> <span class="nb">chr</span><span class="p">(</span><span class="mi">1972</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">u</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span>
|
||||
<span class="go">b'\xea\x80\x80abcd\xde\xb4'</span>
|
||||
<span class="gp">>>> </span><span class="n">u</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'ascii'</span><span class="p">)</span>
|
||||
<span class="gt">Traceback (most recent call last):</span>
|
||||
<span class="o">...</span>
|
||||
<span class="gr">UnicodeEncodeError</span>: <span class="n">'ascii' codec can't encode character '\ua000' in</span>
|
||||
<span class="go"> position 0: ordinal not in range(128)</span>
|
||||
<span class="gp">>>> </span><span class="n">u</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'ascii'</span><span class="p">,</span> <span class="s1">'ignore'</span><span class="p">)</span>
|
||||
<span class="go">b'abcd'</span>
|
||||
<span class="gp">>>> </span><span class="n">u</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'ascii'</span><span class="p">,</span> <span class="s1">'replace'</span><span class="p">)</span>
|
||||
<span class="go">b'?abcd?'</span>
|
||||
<span class="gp">>>> </span><span class="n">u</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'ascii'</span><span class="p">,</span> <span class="s1">'xmlcharrefreplace'</span><span class="p">)</span>
|
||||
<span class="go">b'&#40960;abcd&#1972;'</span>
|
||||
<span class="gp">>>> </span><span class="n">u</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'ascii'</span><span class="p">,</span> <span class="s1">'backslashreplace'</span><span class="p">)</span>
|
||||
<span class="go">b'\\ua000abcd\\u07b4'</span>
|
||||
<span class="gp">>>> </span><span class="n">u</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'ascii'</span><span class="p">,</span> <span class="s1">'namereplace'</span><span class="p">)</span>
|
||||
<span class="go">b'\\N{YI SYLLABLE IT}abcd\\u07b4'</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The low-level routines for registering and accessing the available
|
||||
encodings are found in the <a class="reference internal" href="../library/codecs.html#module-codecs" title="codecs: Encode and decode data and streams."><code class="xref py py-mod docutils literal notranslate"><span class="pre">codecs</span></code></a> module. Implementing new
|
||||
encodings also requires understanding the <a class="reference internal" href="../library/codecs.html#module-codecs" title="codecs: Encode and decode data and streams."><code class="xref py py-mod docutils literal notranslate"><span class="pre">codecs</span></code></a> module.
|
||||
However, the encoding and decoding functions returned by this module
|
||||
are usually more low-level than is comfortable, and writing new encodings
|
||||
is a specialized task, so the module won’t be covered in this HOWTO.</p>
|
||||
</div>
|
||||
<div class="section" id="unicode-literals-in-python-source-code">
|
||||
<h3>Unicode Literals in Python Source Code<a class="headerlink" href="#unicode-literals-in-python-source-code" title="Permalink to this headline">¶</a></h3>
|
||||
<p>In Python source code, specific Unicode code points can be written using the
|
||||
<code class="docutils literal notranslate"><span class="pre">\u</span></code> escape sequence, which is followed by four hex digits giving the code
|
||||
point. The <code class="docutils literal notranslate"><span class="pre">\U</span></code> escape sequence is similar, but expects eight hex digits,
|
||||
not four:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">s</span> <span class="o">=</span> <span class="s2">"a</span><span class="se">\xac\u1234\u20ac\U00008000</span><span class="s2">"</span>
|
||||
<span class="gp">... </span><span class="c1"># ^^^^ two-digit hex escape</span>
|
||||
<span class="gp">... </span><span class="c1"># ^^^^^^ four-digit Unicode escape</span>
|
||||
<span class="gp">... </span><span class="c1"># ^^^^^^^^^^ eight-digit Unicode escape</span>
|
||||
<span class="gp">>>> </span><span class="p">[</span><span class="nb">ord</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">s</span><span class="p">]</span>
|
||||
<span class="go">[97, 172, 4660, 8364, 32768]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Using escape sequences for code points greater than 127 is fine in small doses,
|
||||
but becomes an annoyance if you’re using many accented characters, as you would
|
||||
in a program with messages in French or some other accent-using language. You
|
||||
can also assemble strings using the <a class="reference internal" href="../library/functions.html#chr" title="chr"><code class="xref py py-func docutils literal notranslate"><span class="pre">chr()</span></code></a> built-in function, but this is
|
||||
even more tedious.</p>
|
||||
<p>Ideally, you’d want to be able to write literals in your language’s natural
|
||||
encoding. You could then edit Python source code with your favorite editor
|
||||
which would display the accented characters naturally, and have the right
|
||||
characters used at runtime.</p>
|
||||
<p>Python supports writing source code in UTF-8 by default, but you can use almost
|
||||
any encoding if you declare the encoding being used. This is done by including
|
||||
a special comment as either the first or second line of the source file:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="ch">#!/usr/bin/env python</span>
|
||||
<span class="c1"># -*- coding: latin-1 -*-</span>
|
||||
|
||||
<span class="n">u</span> <span class="o">=</span> <span class="s1">'abcdé'</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="nb">ord</span><span class="p">(</span><span class="n">u</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The syntax is inspired by Emacs’s notation for specifying variables local to a
|
||||
file. Emacs supports many different variables, but Python only supports
|
||||
‘coding’. The <code class="docutils literal notranslate"><span class="pre">-*-</span></code> symbols indicate to Emacs that the comment is special;
|
||||
they have no significance to Python but are a convention. Python looks for
|
||||
<code class="docutils literal notranslate"><span class="pre">coding:</span> <span class="pre">name</span></code> or <code class="docutils literal notranslate"><span class="pre">coding=name</span></code> in the comment.</p>
|
||||
<p>If you don’t include such a comment, the default encoding used will be UTF-8 as
|
||||
already mentioned. See also <span class="target" id="index-0"></span><a class="pep reference external" href="https://www.python.org/dev/peps/pep-0263"><strong>PEP 263</strong></a> for more information.</p>
|
||||
</div>
|
||||
<div class="section" id="unicode-properties">
|
||||
<h3>Unicode Properties<a class="headerlink" href="#unicode-properties" title="Permalink to this headline">¶</a></h3>
|
||||
<p>The Unicode specification includes a database of information about
|
||||
code points. For each defined code point, the information includes
|
||||
the character’s name, its category, the numeric value if applicable
|
||||
(for characters representing numeric concepts such as the Roman
|
||||
numerals, fractions such as one-third and four-fifths, etc.). There
|
||||
are also display-related properties, such as how to use the code point
|
||||
in bidirectional text.</p>
|
||||
<p>The following program displays some information about several characters, and
|
||||
prints the numeric value of one particular character:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">unicodedata</span>
|
||||
|
||||
<span class="n">u</span> <span class="o">=</span> <span class="nb">chr</span><span class="p">(</span><span class="mi">233</span><span class="p">)</span> <span class="o">+</span> <span class="nb">chr</span><span class="p">(</span><span class="mh">0x0bf2</span><span class="p">)</span> <span class="o">+</span> <span class="nb">chr</span><span class="p">(</span><span class="mi">3972</span><span class="p">)</span> <span class="o">+</span> <span class="nb">chr</span><span class="p">(</span><span class="mi">6000</span><span class="p">)</span> <span class="o">+</span> <span class="nb">chr</span><span class="p">(</span><span class="mi">13231</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">c</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">u</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="s1">'</span><span class="si">%04x</span><span class="s1">'</span> <span class="o">%</span> <span class="nb">ord</span><span class="p">(</span><span class="n">c</span><span class="p">),</span> <span class="n">unicodedata</span><span class="o">.</span><span class="n">category</span><span class="p">(</span><span class="n">c</span><span class="p">),</span> <span class="n">end</span><span class="o">=</span><span class="s2">" "</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">unicodedata</span><span class="o">.</span><span class="n">name</span><span class="p">(</span><span class="n">c</span><span class="p">))</span>
|
||||
|
||||
<span class="c1"># Get numeric value of second character</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">unicodedata</span><span class="o">.</span><span class="n">numeric</span><span class="p">(</span><span class="n">u</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>When run, this prints:</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>0 00e9 Ll LATIN SMALL LETTER E WITH ACUTE
|
||||
1 0bf2 No TAMIL NUMBER ONE THOUSAND
|
||||
2 0f84 Mn TIBETAN MARK HALANTA
|
||||
3 1770 Lo TAGBANWA LETTER SA
|
||||
4 33af So SQUARE RAD OVER S SQUARED
|
||||
1000.0
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The category codes are abbreviations describing the nature of the character.
|
||||
These are grouped into categories such as “Letter”, “Number”, “Punctuation”, or
|
||||
“Symbol”, which in turn are broken up into subcategories. To take the codes
|
||||
from the above output, <code class="docutils literal notranslate"><span class="pre">'Ll'</span></code> means ‘Letter, lowercase’, <code class="docutils literal notranslate"><span class="pre">'No'</span></code> means
|
||||
“Number, other”, <code class="docutils literal notranslate"><span class="pre">'Mn'</span></code> is “Mark, nonspacing”, and <code class="docutils literal notranslate"><span class="pre">'So'</span></code> is “Symbol,
|
||||
other”. See
|
||||
<a class="reference external" href="http://www.unicode.org/reports/tr44/#General_Category_Values">the General Category Values section of the Unicode Character Database documentation</a> for a
|
||||
list of category codes.</p>
|
||||
</div>
|
||||
<div class="section" id="comparing-strings">
|
||||
<h3>Comparing Strings<a class="headerlink" href="#comparing-strings" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Unicode adds some complication to comparing strings, because the same
|
||||
set of characters can be represented by different sequences of code
|
||||
points. For example, a letter like ‘ê’ can be represented as a single
|
||||
code point U+00EA, or as U+0065 U+0302, which is the code point for
|
||||
‘e’ followed by a code point for ‘COMBINING CIRCUMFLEX ACCENT’. These
|
||||
will produce the same output when printed, but one is a string of
|
||||
length 1 and the other is of length 2.</p>
|
||||
<p>One tool for a case-insensitive comparison is the
|
||||
<a class="reference internal" href="../library/stdtypes.html#str.casefold" title="str.casefold"><code class="xref py py-meth docutils literal notranslate"><span class="pre">casefold()</span></code></a> string method that converts a string to a
|
||||
case-insensitive form following an algorithm described by the Unicode
|
||||
Standard. This algorithm has special handling for characters such as
|
||||
the German letter ‘ß’ (code point U+00DF), which becomes the pair of
|
||||
lowercase letters ‘ss’.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">street</span> <span class="o">=</span> <span class="s1">'Gürzenichstraße'</span>
|
||||
<span class="gp">>>> </span><span class="n">street</span><span class="o">.</span><span class="n">casefold</span><span class="p">()</span>
|
||||
<span class="go">'gürzenichstrasse'</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>A second tool is the <a class="reference internal" href="../library/unicodedata.html#module-unicodedata" title="unicodedata: Access the Unicode Database."><code class="xref py py-mod docutils literal notranslate"><span class="pre">unicodedata</span></code></a> module’s
|
||||
<a class="reference internal" href="../library/unicodedata.html#unicodedata.normalize" title="unicodedata.normalize"><code class="xref py py-func docutils literal notranslate"><span class="pre">normalize()</span></code></a> function that converts strings to one
|
||||
of several normal forms, where letters followed by a combining
|
||||
character are replaced with single characters. <code class="xref py py-func docutils literal notranslate"><span class="pre">normalize()</span></code> can
|
||||
be used to perform string comparisons that won’t falsely report
|
||||
inequality if two strings use combining characters differently:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">unicodedata</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">compare_strs</span><span class="p">(</span><span class="n">s1</span><span class="p">,</span> <span class="n">s2</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">NFD</span><span class="p">(</span><span class="n">s</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">unicodedata</span><span class="o">.</span><span class="n">normalize</span><span class="p">(</span><span class="s1">'NFD'</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">NFD</span><span class="p">(</span><span class="n">s1</span><span class="p">)</span> <span class="o">==</span> <span class="n">NFD</span><span class="p">(</span><span class="n">s2</span><span class="p">)</span>
|
||||
|
||||
<span class="n">single_char</span> <span class="o">=</span> <span class="s1">'ê'</span>
|
||||
<span class="n">multiple_chars</span> <span class="o">=</span> <span class="s1">'</span><span class="se">\N{LATIN SMALL LETTER E}\N{COMBINING CIRCUMFLEX ACCENT}</span><span class="s1">'</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'length of first string='</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">single_char</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'length of second string='</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">multiple_chars</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">compare_strs</span><span class="p">(</span><span class="n">single_char</span><span class="p">,</span> <span class="n">multiple_chars</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>When run, this outputs:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python3 compare-strs.py
|
||||
<span class="go">length of first string= 1</span>
|
||||
<span class="go">length of second string= 2</span>
|
||||
<span class="go">True</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The first argument to the <a class="reference internal" href="../library/unicodedata.html#unicodedata.normalize" title="unicodedata.normalize"><code class="xref py py-func docutils literal notranslate"><span class="pre">normalize()</span></code></a> function is a
|
||||
string giving the desired normalization form, which can be one of
|
||||
‘NFC’, ‘NFKC’, ‘NFD’, and ‘NFKD’.</p>
|
||||
<p>The Unicode Standard also specifies how to do caseless comparisons:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">unicodedata</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">compare_caseless</span><span class="p">(</span><span class="n">s1</span><span class="p">,</span> <span class="n">s2</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">NFD</span><span class="p">(</span><span class="n">s</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">unicodedata</span><span class="o">.</span><span class="n">normalize</span><span class="p">(</span><span class="s1">'NFD'</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">NFD</span><span class="p">(</span><span class="n">NFD</span><span class="p">(</span><span class="n">s1</span><span class="p">)</span><span class="o">.</span><span class="n">casefold</span><span class="p">())</span> <span class="o">==</span> <span class="n">NFD</span><span class="p">(</span><span class="n">NFD</span><span class="p">(</span><span class="n">s2</span><span class="p">)</span><span class="o">.</span><span class="n">casefold</span><span class="p">())</span>
|
||||
|
||||
<span class="c1"># Example usage</span>
|
||||
<span class="n">single_char</span> <span class="o">=</span> <span class="s1">'ê'</span>
|
||||
<span class="n">multiple_chars</span> <span class="o">=</span> <span class="s1">'</span><span class="se">\N{LATIN CAPITAL LETTER E}\N{COMBINING CIRCUMFLEX ACCENT}</span><span class="s1">'</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">compare_caseless</span><span class="p">(</span><span class="n">single_char</span><span class="p">,</span> <span class="n">multiple_chars</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>This will print <code class="docutils literal notranslate"><span class="pre">True</span></code>. (Why is <code class="xref py py-func docutils literal notranslate"><span class="pre">NFD()</span></code> invoked twice? Because
|
||||
there are a few characters that make <code class="xref py py-meth docutils literal notranslate"><span class="pre">casefold()</span></code> return a
|
||||
non-normalized string, so the result needs to be normalized again. See
|
||||
section 3.13 of the Unicode Standard for a discussion and an example.)</p>
|
||||
</div>
|
||||
<div class="section" id="unicode-regular-expressions">
|
||||
<h3>Unicode Regular Expressions<a class="headerlink" href="#unicode-regular-expressions" title="Permalink to this headline">¶</a></h3>
|
||||
<p>The regular expressions supported by the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module can be provided
|
||||
either as bytes or strings. Some of the special character sequences such as
|
||||
<code class="docutils literal notranslate"><span class="pre">\d</span></code> and <code class="docutils literal notranslate"><span class="pre">\w</span></code> have different meanings depending on whether
|
||||
the pattern is supplied as bytes or a string. For example,
|
||||
<code class="docutils literal notranslate"><span class="pre">\d</span></code> will match the characters <code class="docutils literal notranslate"><span class="pre">[0-9]</span></code> in bytes but
|
||||
in strings will match any character that’s in the <code class="docutils literal notranslate"><span class="pre">'Nd'</span></code> category.</p>
|
||||
<p>The string in this example has the number 57 written in both Thai and
|
||||
Arabic numerals:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">re</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\d+'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">s</span> <span class="o">=</span> <span class="s2">"Over </span><span class="se">\u0e55\u0e57</span><span class="s2"> 57 flavours"</span>
|
||||
<span class="n">m</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">()))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>When executed, <code class="docutils literal notranslate"><span class="pre">\d+</span></code> will match the Thai numerals and print them
|
||||
out. If you supply the <a class="reference internal" href="../library/re.html#re.ASCII" title="re.ASCII"><code class="xref py py-const docutils literal notranslate"><span class="pre">re.ASCII</span></code></a> flag to
|
||||
<a class="reference internal" href="../library/re.html#re.compile" title="re.compile"><code class="xref py py-func docutils literal notranslate"><span class="pre">compile()</span></code></a>, <code class="docutils literal notranslate"><span class="pre">\d+</span></code> will match the substring “57” instead.</p>
|
||||
<p>Similarly, <code class="docutils literal notranslate"><span class="pre">\w</span></code> matches a wide variety of Unicode characters but
|
||||
only <code class="docutils literal notranslate"><span class="pre">[a-zA-Z0-9_]</span></code> in bytes or if <a class="reference internal" href="../library/re.html#re.ASCII" title="re.ASCII"><code class="xref py py-const docutils literal notranslate"><span class="pre">re.ASCII</span></code></a> is supplied,
|
||||
and <code class="docutils literal notranslate"><span class="pre">\s</span></code> will match either Unicode whitespace characters or
|
||||
<code class="docutils literal notranslate"><span class="pre">[</span> <span class="pre">\t\n\r\f\v]</span></code>.</p>
|
||||
</div>
|
||||
<div class="section" id="id2">
|
||||
<h3>References<a class="headerlink" href="#id2" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Some good alternative discussions of Python’s Unicode support are:</p>
|
||||
<ul class="simple">
|
||||
<li><p><a class="reference external" href="http://python-notes.curiousefficiency.org/en/latest/python3/text_file_processing.html">Processing Text Files in Python 3</a>, by Nick Coghlan.</p></li>
|
||||
<li><p><a class="reference external" href="https://nedbatchelder.com/text/unipain.html">Pragmatic Unicode</a>, a PyCon 2012 presentation by Ned Batchelder.</p></li>
|
||||
</ul>
|
||||
<p>The <a class="reference internal" href="../library/stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> type is described in the Python library reference at
|
||||
<a class="reference internal" href="../library/stdtypes.html#textseq"><span class="std std-ref">Text Sequence Type — str</span></a>.</p>
|
||||
<p>The documentation for the <a class="reference internal" href="../library/unicodedata.html#module-unicodedata" title="unicodedata: Access the Unicode Database."><code class="xref py py-mod docutils literal notranslate"><span class="pre">unicodedata</span></code></a> module.</p>
|
||||
<p>The documentation for the <a class="reference internal" href="../library/codecs.html#module-codecs" title="codecs: Encode and decode data and streams."><code class="xref py py-mod docutils literal notranslate"><span class="pre">codecs</span></code></a> module.</p>
|
||||
<p>Marc-André Lemburg gave <a class="reference external" href="https://downloads.egenix.com/python/Unicode-EPC2002-Talk.pdf">a presentation titled “Python and Unicode” (PDF slides)</a> at
|
||||
EuroPython 2002. The slides are an excellent overview of the design of Python
|
||||
2’s Unicode features (where the Unicode string type is called <code class="docutils literal notranslate"><span class="pre">unicode</span></code> and
|
||||
literals start with <code class="docutils literal notranslate"><span class="pre">u</span></code>).</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="reading-and-writing-unicode-data">
|
||||
<h2>Reading and Writing Unicode Data<a class="headerlink" href="#reading-and-writing-unicode-data" title="Permalink to this headline">¶</a></h2>
|
||||
<p>Once you’ve written some code that works with Unicode data, the next problem is
|
||||
input/output. How do you get Unicode strings into your program, and how do you
|
||||
convert Unicode into a form suitable for storage or transmission?</p>
|
||||
<p>It’s possible that you may not need to do anything depending on your input
|
||||
sources and output destinations; you should check whether the libraries used in
|
||||
your application support Unicode natively. XML parsers often return Unicode
|
||||
data, for example. Many relational databases also support Unicode-valued
|
||||
columns and can return Unicode values from an SQL query.</p>
|
||||
<p>Unicode data is usually converted to a particular encoding before it gets
|
||||
written to disk or sent over a socket. It’s possible to do all the work
|
||||
yourself: open a file, read an 8-bit bytes object from it, and convert the bytes
|
||||
with <code class="docutils literal notranslate"><span class="pre">bytes.decode(encoding)</span></code>. However, the manual approach is not recommended.</p>
|
||||
<p>One problem is the multi-byte nature of encodings; one Unicode character can be
|
||||
represented by several bytes. If you want to read the file in arbitrary-sized
|
||||
chunks (say, 1024 or 4096 bytes), you need to write error-handling code to catch the case
|
||||
where only part of the bytes encoding a single Unicode character are read at the
|
||||
end of a chunk. One solution would be to read the entire file into memory and
|
||||
then perform the decoding, but that prevents you from working with files that
|
||||
are extremely large; if you need to read a 2 GiB file, you need 2 GiB of RAM.
|
||||
(More, really, since for at least a moment you’d need to have both the encoded
|
||||
string and its Unicode version in memory.)</p>
|
||||
<p>The solution would be to use the low-level decoding interface to catch the case
|
||||
of partial coding sequences. The work of implementing this has already been
|
||||
done for you: the built-in <a class="reference internal" href="../library/functions.html#open" title="open"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> function can return a file-like object
|
||||
that assumes the file’s contents are in a specified encoding and accepts Unicode
|
||||
parameters for methods such as <a class="reference internal" href="../library/io.html#io.TextIOBase.read" title="io.TextIOBase.read"><code class="xref py py-meth docutils literal notranslate"><span class="pre">read()</span></code></a> and
|
||||
<a class="reference internal" href="../library/io.html#io.TextIOBase.write" title="io.TextIOBase.write"><code class="xref py py-meth docutils literal notranslate"><span class="pre">write()</span></code></a>. This works through <a class="reference internal" href="../library/functions.html#open" title="open"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a>’s <em>encoding</em> and
|
||||
<em>errors</em> parameters which are interpreted just like those in <a class="reference internal" href="../library/stdtypes.html#str.encode" title="str.encode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">str.encode()</span></code></a>
|
||||
and <a class="reference internal" href="../library/stdtypes.html#bytes.decode" title="bytes.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">bytes.decode()</span></code></a>.</p>
|
||||
<p>Reading Unicode from a file is therefore simple:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="s1">'unicode.txt'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">line</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>It’s also possible to open files in update mode, allowing both reading and
|
||||
writing:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="s1">'test'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">'w+'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s1">'</span><span class="se">\u4500</span><span class="s1"> blah blah blah</span><span class="se">\n</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="n">f</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">readline</span><span class="p">()[:</span><span class="mi">1</span><span class="p">]))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The Unicode character <code class="docutils literal notranslate"><span class="pre">U+FEFF</span></code> is used as a byte-order mark (BOM), and is often
|
||||
written as the first character of a file in order to assist with autodetection
|
||||
of the file’s byte ordering. Some encodings, such as UTF-16, expect a BOM to be
|
||||
present at the start of a file; when such an encoding is used, the BOM will be
|
||||
automatically written as the first character and will be silently dropped when
|
||||
the file is read. There are variants of these encodings, such as ‘utf-16-le’
|
||||
and ‘utf-16-be’ for little-endian and big-endian encodings, that specify one
|
||||
particular byte ordering and don’t skip the BOM.</p>
|
||||
<p>In some areas, it is also convention to use a “BOM” at the start of UTF-8
|
||||
encoded files; the name is misleading since UTF-8 is not byte-order dependent.
|
||||
The mark simply announces that the file is encoded in UTF-8. For reading such
|
||||
files, use the ‘utf-8-sig’ codec to automatically skip the mark if present.</p>
|
||||
<div class="section" id="unicode-filenames">
|
||||
<h3>Unicode filenames<a class="headerlink" href="#unicode-filenames" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Most of the operating systems in common use today support filenames
|
||||
that contain arbitrary Unicode characters. Usually this is
|
||||
implemented by converting the Unicode string into some encoding that
|
||||
varies depending on the system. Today Python is converging on using
|
||||
UTF-8: Python on MacOS has used UTF-8 for several versions, and Python
|
||||
3.6 switched to using UTF-8 on Windows as well. On Unix systems,
|
||||
there will only be a filesystem encoding if you’ve set the <code class="docutils literal notranslate"><span class="pre">LANG</span></code> or
|
||||
<code class="docutils literal notranslate"><span class="pre">LC_CTYPE</span></code> environment variables; if you haven’t, the default
|
||||
encoding is again UTF-8.</p>
|
||||
<p>The <a class="reference internal" href="../library/sys.html#sys.getfilesystemencoding" title="sys.getfilesystemencoding"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.getfilesystemencoding()</span></code></a> function returns the encoding to use on
|
||||
your current system, in case you want to do the encoding manually, but there’s
|
||||
not much reason to bother. When opening a file for reading or writing, you can
|
||||
usually just provide the Unicode string as the filename, and it will be
|
||||
automatically converted to the right encoding for you:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">filename</span> <span class="o">=</span> <span class="s1">'filename</span><span class="se">\u4500</span><span class="s1">abc'</span>
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="s1">'w'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s1">'blah</span><span class="se">\n</span><span class="s1">'</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Functions in the <a class="reference internal" href="../library/os.html#module-os" title="os: Miscellaneous operating system interfaces."><code class="xref py py-mod docutils literal notranslate"><span class="pre">os</span></code></a> module such as <a class="reference internal" href="../library/os.html#os.stat" title="os.stat"><code class="xref py py-func docutils literal notranslate"><span class="pre">os.stat()</span></code></a> will also accept Unicode
|
||||
filenames.</p>
|
||||
<p>The <a class="reference internal" href="../library/os.html#os.listdir" title="os.listdir"><code class="xref py py-func docutils literal notranslate"><span class="pre">os.listdir()</span></code></a> function returns filenames, which raises an issue: should it return
|
||||
the Unicode version of filenames, or should it return bytes containing
|
||||
the encoded versions? <a class="reference internal" href="../library/os.html#os.listdir" title="os.listdir"><code class="xref py py-func docutils literal notranslate"><span class="pre">os.listdir()</span></code></a> can do both, depending on whether you
|
||||
provided the directory path as bytes or a Unicode string. If you pass a
|
||||
Unicode string as the path, filenames will be decoded using the filesystem’s
|
||||
encoding and a list of Unicode strings will be returned, while passing a byte
|
||||
path will return the filenames as bytes. For example,
|
||||
assuming the default filesystem encoding is UTF-8, running the following
|
||||
program:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">fn</span> <span class="o">=</span> <span class="s1">'filename</span><span class="se">\u4500</span><span class="s1">abc'</span>
|
||||
<span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">fn</span><span class="p">,</span> <span class="s1">'w'</span><span class="p">)</span>
|
||||
<span class="n">f</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="sa">b</span><span class="s1">'.'</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="s1">'.'</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>will produce the following output:</p>
|
||||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> python listdir-test.py
|
||||
<span class="go">[b'filename\xe4\x94\x80abc', ...]</span>
|
||||
<span class="go">['filename\u4500abc', ...]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The first list contains UTF-8-encoded filenames, and the second list contains
|
||||
the Unicode versions.</p>
|
||||
<p>Note that on most occasions, you should can just stick with using
|
||||
Unicode with these APIs. The bytes APIs should only be used on
|
||||
systems where undecodable file names can be present; that’s
|
||||
pretty much only Unix systems now.</p>
|
||||
</div>
|
||||
<div class="section" id="tips-for-writing-unicode-aware-programs">
|
||||
<h3>Tips for Writing Unicode-aware Programs<a class="headerlink" href="#tips-for-writing-unicode-aware-programs" title="Permalink to this headline">¶</a></h3>
|
||||
<p>This section provides some suggestions on writing software that deals with
|
||||
Unicode.</p>
|
||||
<p>The most important tip is:</p>
|
||||
<blockquote>
|
||||
<div><p>Software should only work with Unicode strings internally, decoding the input
|
||||
data as soon as possible and encoding the output only at the end.</p>
|
||||
</div></blockquote>
|
||||
<p>If you attempt to write processing functions that accept both Unicode and byte
|
||||
strings, you will find your program vulnerable to bugs wherever you combine the
|
||||
two different kinds of strings. There is no automatic encoding or decoding: if
|
||||
you do e.g. <code class="docutils literal notranslate"><span class="pre">str</span> <span class="pre">+</span> <span class="pre">bytes</span></code>, a <a class="reference internal" href="../library/exceptions.html#TypeError" title="TypeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> will be raised.</p>
|
||||
<p>When using data coming from a web browser or some other untrusted source, a
|
||||
common technique is to check for illegal characters in a string before using the
|
||||
string in a generated command line or storing it in a database. If you’re doing
|
||||
this, be careful to check the decoded string, not the encoded bytes data;
|
||||
some encodings may have interesting properties, such as not being bijective
|
||||
or not being fully ASCII-compatible. This is especially true if the input
|
||||
data also specifies the encoding, since the attacker can then choose a
|
||||
clever way to hide malicious text in the encoded bytestream.</p>
|
||||
<div class="section" id="converting-between-file-encodings">
|
||||
<h4>Converting Between File Encodings<a class="headerlink" href="#converting-between-file-encodings" title="Permalink to this headline">¶</a></h4>
|
||||
<p>The <a class="reference internal" href="../library/codecs.html#codecs.StreamRecoder" title="codecs.StreamRecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamRecoder</span></code></a> class can transparently convert between
|
||||
encodings, taking a stream that returns data in encoding #1
|
||||
and behaving like a stream returning data in encoding #2.</p>
|
||||
<p>For example, if you have an input file <em>f</em> that’s in Latin-1, you
|
||||
can wrap it with a <a class="reference internal" href="../library/codecs.html#codecs.StreamRecoder" title="codecs.StreamRecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamRecoder</span></code></a> to return bytes encoded in
|
||||
UTF-8:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">new_f</span> <span class="o">=</span> <span class="n">codecs</span><span class="o">.</span><span class="n">StreamRecoder</span><span class="p">(</span><span class="n">f</span><span class="p">,</span>
|
||||
<span class="c1"># en/decoder: used by read() to encode its results and</span>
|
||||
<span class="c1"># by write() to decode its input.</span>
|
||||
<span class="n">codecs</span><span class="o">.</span><span class="n">getencoder</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">),</span> <span class="n">codecs</span><span class="o">.</span><span class="n">getdecoder</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">),</span>
|
||||
|
||||
<span class="c1"># reader/writer: used to read and write to the stream.</span>
|
||||
<span class="n">codecs</span><span class="o">.</span><span class="n">getreader</span><span class="p">(</span><span class="s1">'latin-1'</span><span class="p">),</span> <span class="n">codecs</span><span class="o">.</span><span class="n">getwriter</span><span class="p">(</span><span class="s1">'latin-1'</span><span class="p">)</span> <span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="files-in-an-unknown-encoding">
|
||||
<h4>Files in an Unknown Encoding<a class="headerlink" href="#files-in-an-unknown-encoding" title="Permalink to this headline">¶</a></h4>
|
||||
<p>What can you do if you need to make a change to a file, but don’t know
|
||||
the file’s encoding? If you know the encoding is ASCII-compatible and
|
||||
only want to examine or modify the ASCII parts, you can open the file
|
||||
with the <code class="docutils literal notranslate"><span class="pre">surrogateescape</span></code> error handler:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">fname</span><span class="p">,</span> <span class="s1">'r'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"ascii"</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s2">"surrogateescape"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># make changes to the string 'data'</span>
|
||||
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">fname</span> <span class="o">+</span> <span class="s1">'.new'</span><span class="p">,</span> <span class="s1">'w'</span><span class="p">,</span>
|
||||
<span class="n">encoding</span><span class="o">=</span><span class="s2">"ascii"</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s2">"surrogateescape"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The <code class="docutils literal notranslate"><span class="pre">surrogateescape</span></code> error handler will decode any non-ASCII bytes
|
||||
as code points in a special range running from U+DC80 to
|
||||
U+DCFF. These code points will then turn back into the
|
||||
same bytes when the <code class="docutils literal notranslate"><span class="pre">surrogateescape</span></code> error handler is used to
|
||||
encode the data and write it back out.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="id3">
|
||||
<h3>References<a class="headerlink" href="#id3" title="Permalink to this headline">¶</a></h3>
|
||||
<p>One section of <a class="reference external" href="http://pyvideo.org/video/289/pycon-2010--mastering-python-3-i-o">Mastering Python 3 Input/Output</a>,
|
||||
a PyCon 2010 talk by David Beazley, discusses text processing and binary data handling.</p>
|
||||
<p>The <a class="reference external" href="https://downloads.egenix.com/python/LSM2005-Developing-Unicode-aware-applications-in-Python.pdf">PDF slides for Marc-André Lemburg’s presentation “Writing Unicode-aware
|
||||
Applications in Python”</a>
|
||||
discuss questions of character encodings as well as how to internationalize
|
||||
and localize an application. These slides cover Python 2.x only.</p>
|
||||
<p><a class="reference external" href="http://pyvideo.org/video/1768/the-guts-of-unicode-in-python">The Guts of Unicode in Python</a>
|
||||
is a PyCon 2013 talk by Benjamin Peterson that discusses the internal Unicode
|
||||
representation in Python 3.3.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="acknowledgements">
|
||||
<h2>Acknowledgements<a class="headerlink" href="#acknowledgements" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The initial draft of this document was written by Andrew Kuchling.
|
||||
It has since been revised further by Alexander Belopolsky, Georg Brandl,
|
||||
Andrew Kuchling, and Ezio Melotti.</p>
|
||||
<p>Thanks to the following people who have noted errors or offered
|
||||
suggestions on this article: Éric Araujo, Nicholas Bastin, Nick
|
||||
Coghlan, Marius Gedminas, Kent Johnson, Ken Krugler, Marc-André
|
||||
Lemburg, Martin von Löwis, Terry J. Reedy, Serhiy Storchaka,
|
||||
Eryk Sun, Chad Whitacre, Graham Wideman.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">Unicode HOWTO</a><ul>
|
||||
<li><a class="reference internal" href="#introduction-to-unicode">Introduction to Unicode</a><ul>
|
||||
<li><a class="reference internal" href="#definitions">Definitions</a></li>
|
||||
<li><a class="reference internal" href="#encodings">Encodings</a></li>
|
||||
<li><a class="reference internal" href="#references">References</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#python-s-unicode-support">Python’s Unicode Support</a><ul>
|
||||
<li><a class="reference internal" href="#the-string-type">The String Type</a></li>
|
||||
<li><a class="reference internal" href="#converting-to-bytes">Converting to Bytes</a></li>
|
||||
<li><a class="reference internal" href="#unicode-literals-in-python-source-code">Unicode Literals in Python Source Code</a></li>
|
||||
<li><a class="reference internal" href="#unicode-properties">Unicode Properties</a></li>
|
||||
<li><a class="reference internal" href="#comparing-strings">Comparing Strings</a></li>
|
||||
<li><a class="reference internal" href="#unicode-regular-expressions">Unicode Regular Expressions</a></li>
|
||||
<li><a class="reference internal" href="#id2">References</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#reading-and-writing-unicode-data">Reading and Writing Unicode Data</a><ul>
|
||||
<li><a class="reference internal" href="#unicode-filenames">Unicode filenames</a></li>
|
||||
<li><a class="reference internal" href="#tips-for-writing-unicode-aware-programs">Tips for Writing Unicode-aware Programs</a><ul>
|
||||
<li><a class="reference internal" href="#converting-between-file-encodings">Converting Between File Encodings</a></li>
|
||||
<li><a class="reference internal" href="#files-in-an-unknown-encoding">Files in an Unknown Encoding</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#id3">References</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#acknowledgements">Acknowledgements</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="sorting.html"
|
||||
title="previous chapter">Sorting HOW TO</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="urllib2.html"
|
||||
title="next chapter">HOWTO Fetch Internet Resources Using The urllib Package</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/unicode.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="urllib2.html" title="HOWTO Fetch Internet Resources Using The urllib Package"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="sorting.html" title="Sorting HOW TO"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
770
python-3.7.4-docs-html/howto/urllib2.html
Normal file
770
python-3.7.4-docs-html/howto/urllib2.html
Normal file
@@ -0,0 +1,770 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>HOWTO Fetch Internet Resources Using The urllib Package — Python 3.7.4 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pydoctheme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/sidebar.js"></script>
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml"
|
||||
title="Search within Python 3.7.4 documentation"
|
||||
href="../_static/opensearch.xml"/>
|
||||
<link rel="author" title="About these documents" href="../about.html" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||||
<link rel="next" title="Argparse Tutorial" href="argparse.html" />
|
||||
<link rel="prev" title="Unicode HOWTO" href="unicode.html" />
|
||||
<link rel="shortcut icon" type="image/png" href="../_static/py.png" />
|
||||
<link rel="canonical" href="https://docs.python.org/3/howto/urllib2.html" />
|
||||
|
||||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||||
<script type="text/javascript" src="../_static/switchers.js"></script>
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
@media only screen {
|
||||
table.full-width-table {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head><body>
|
||||
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="argparse.html" title="Argparse Tutorial"
|
||||
accesskey="N">next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="unicode.html" title="Unicode HOWTO"
|
||||
accesskey="P">previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<div class="section" id="howto-fetch-internet-resources-using-the-urllib-package">
|
||||
<span id="urllib-howto"></span><h1>HOWTO Fetch Internet Resources Using The urllib Package<a class="headerlink" href="#howto-fetch-internet-resources-using-the-urllib-package" title="Permalink to this headline">¶</a></h1>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Author</dt>
|
||||
<dd class="field-odd"><p><a class="reference external" href="http://www.voidspace.org.uk/python/index.shtml">Michael Foord</a></p>
|
||||
</dd>
|
||||
</dl>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>There is a French translation of an earlier revision of this
|
||||
HOWTO, available at <a class="reference external" href="http://www.voidspace.org.uk/python/articles/urllib2_francais.shtml">urllib2 - Le Manuel manquant</a>.</p>
|
||||
</div>
|
||||
<div class="section" id="introduction">
|
||||
<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this headline">¶</a></h2>
|
||||
<div class="sidebar">
|
||||
<p class="sidebar-title">Related Articles</p>
|
||||
<p>You may also find useful the following article on fetching web resources
|
||||
with Python:</p>
|
||||
<ul>
|
||||
<li><p><a class="reference external" href="http://www.voidspace.org.uk/python/articles/authentication.shtml">Basic Authentication</a></p>
|
||||
<blockquote>
|
||||
<div><p>A tutorial on <em>Basic Authentication</em>, with examples in Python.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<p><strong>urllib.request</strong> is a Python module for fetching URLs
|
||||
(Uniform Resource Locators). It offers a very simple interface, in the form of
|
||||
the <em>urlopen</em> function. This is capable of fetching URLs using a variety of
|
||||
different protocols. It also offers a slightly more complex interface for
|
||||
handling common situations - like basic authentication, cookies, proxies and so
|
||||
on. These are provided by objects called handlers and openers.</p>
|
||||
<p>urllib.request supports fetching URLs for many “URL schemes” (identified by the string
|
||||
before the <code class="docutils literal notranslate"><span class="pre">":"</span></code> in URL - for example <code class="docutils literal notranslate"><span class="pre">"ftp"</span></code> is the URL scheme of
|
||||
<code class="docutils literal notranslate"><span class="pre">"ftp://python.org/"</span></code>) using their associated network protocols (e.g. FTP, HTTP).
|
||||
This tutorial focuses on the most common case, HTTP.</p>
|
||||
<p>For straightforward situations <em>urlopen</em> is very easy to use. But as soon as you
|
||||
encounter errors or non-trivial cases when opening HTTP URLs, you will need some
|
||||
understanding of the HyperText Transfer Protocol. The most comprehensive and
|
||||
authoritative reference to HTTP is <span class="target" id="index-0"></span><a class="rfc reference external" href="https://tools.ietf.org/html/rfc2616.html"><strong>RFC 2616</strong></a>. This is a technical document and
|
||||
not intended to be easy to read. This HOWTO aims to illustrate using <em>urllib</em>,
|
||||
with enough detail about HTTP to help you through. It is not intended to replace
|
||||
the <a class="reference internal" href="../library/urllib.request.html#module-urllib.request" title="urllib.request: Extensible library for opening URLs."><code class="xref py py-mod docutils literal notranslate"><span class="pre">urllib.request</span></code></a> docs, but is supplementary to them.</p>
|
||||
</div>
|
||||
<div class="section" id="fetching-urls">
|
||||
<h2>Fetching URLs<a class="headerlink" href="#fetching-urls" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The simplest way to use urllib.request is as follows:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">urllib.request</span>
|
||||
<span class="k">with</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="s1">'http://python.org/'</span><span class="p">)</span> <span class="k">as</span> <span class="n">response</span><span class="p">:</span>
|
||||
<span class="n">html</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>If you wish to retrieve a resource via URL and store it in a temporary
|
||||
location, you can do so via the <a class="reference internal" href="../library/shutil.html#shutil.copyfileobj" title="shutil.copyfileobj"><code class="xref py py-func docutils literal notranslate"><span class="pre">shutil.copyfileobj()</span></code></a> and
|
||||
<a class="reference internal" href="../library/tempfile.html#tempfile.NamedTemporaryFile" title="tempfile.NamedTemporaryFile"><code class="xref py py-func docutils literal notranslate"><span class="pre">tempfile.NamedTemporaryFile()</span></code></a> functions:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">shutil</span>
|
||||
<span class="kn">import</span> <span class="nn">tempfile</span>
|
||||
<span class="kn">import</span> <span class="nn">urllib.request</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="s1">'http://python.org/'</span><span class="p">)</span> <span class="k">as</span> <span class="n">response</span><span class="p">:</span>
|
||||
<span class="k">with</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">NamedTemporaryFile</span><span class="p">(</span><span class="n">delete</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="k">as</span> <span class="n">tmp_file</span><span class="p">:</span>
|
||||
<span class="n">shutil</span><span class="o">.</span><span class="n">copyfileobj</span><span class="p">(</span><span class="n">response</span><span class="p">,</span> <span class="n">tmp_file</span><span class="p">)</span>
|
||||
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">tmp_file</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> <span class="k">as</span> <span class="n">html</span><span class="p">:</span>
|
||||
<span class="k">pass</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Many uses of urllib will be that simple (note that instead of an ‘http:’ URL we
|
||||
could have used a URL starting with ‘ftp:’, ‘file:’, etc.). However, it’s the
|
||||
purpose of this tutorial to explain the more complicated cases, concentrating on
|
||||
HTTP.</p>
|
||||
<p>HTTP is based on requests and responses - the client makes requests and servers
|
||||
send responses. urllib.request mirrors this with a <code class="docutils literal notranslate"><span class="pre">Request</span></code> object which represents
|
||||
the HTTP request you are making. In its simplest form you create a Request
|
||||
object that specifies the URL you want to fetch. Calling <code class="docutils literal notranslate"><span class="pre">urlopen</span></code> with this
|
||||
Request object returns a response object for the URL requested. This response is
|
||||
a file-like object, which means you can for example call <code class="docutils literal notranslate"><span class="pre">.read()</span></code> on the
|
||||
response:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">urllib.request</span>
|
||||
|
||||
<span class="n">req</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">Request</span><span class="p">(</span><span class="s1">'http://www.voidspace.org.uk'</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="n">req</span><span class="p">)</span> <span class="k">as</span> <span class="n">response</span><span class="p">:</span>
|
||||
<span class="n">the_page</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Note that urllib.request makes use of the same Request interface to handle all URL
|
||||
schemes. For example, you can make an FTP request like so:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">req</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">Request</span><span class="p">(</span><span class="s1">'ftp://example.com/'</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>In the case of HTTP, there are two extra things that Request objects allow you
|
||||
to do: First, you can pass data to be sent to the server. Second, you can pass
|
||||
extra information (“metadata”) <em>about</em> the data or the about request itself, to
|
||||
the server - this information is sent as HTTP “headers”. Let’s look at each of
|
||||
these in turn.</p>
|
||||
<div class="section" id="data">
|
||||
<h3>Data<a class="headerlink" href="#data" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Sometimes you want to send data to a URL (often the URL will refer to a CGI
|
||||
(Common Gateway Interface) script or other web application). With HTTP,
|
||||
this is often done using what’s known as a <strong>POST</strong> request. This is often what
|
||||
your browser does when you submit a HTML form that you filled in on the web. Not
|
||||
all POSTs have to come from forms: you can use a POST to transmit arbitrary data
|
||||
to your own application. In the common case of HTML forms, the data needs to be
|
||||
encoded in a standard way, and then passed to the Request object as the <code class="docutils literal notranslate"><span class="pre">data</span></code>
|
||||
argument. The encoding is done using a function from the <a class="reference internal" href="../library/urllib.parse.html#module-urllib.parse" title="urllib.parse: Parse URLs into or assemble them from components."><code class="xref py py-mod docutils literal notranslate"><span class="pre">urllib.parse</span></code></a>
|
||||
library.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">urllib.parse</span>
|
||||
<span class="kn">import</span> <span class="nn">urllib.request</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="s1">'http://www.someserver.com/cgi-bin/register.cgi'</span>
|
||||
<span class="n">values</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'name'</span> <span class="p">:</span> <span class="s1">'Michael Foord'</span><span class="p">,</span>
|
||||
<span class="s1">'location'</span> <span class="p">:</span> <span class="s1">'Northampton'</span><span class="p">,</span>
|
||||
<span class="s1">'language'</span> <span class="p">:</span> <span class="s1">'Python'</span> <span class="p">}</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">urlencode</span><span class="p">(</span><span class="n">values</span><span class="p">)</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'ascii'</span><span class="p">)</span> <span class="c1"># data should be bytes</span>
|
||||
<span class="n">req</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">Request</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="n">req</span><span class="p">)</span> <span class="k">as</span> <span class="n">response</span><span class="p">:</span>
|
||||
<span class="n">the_page</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Note that other encodings are sometimes required (e.g. for file upload from HTML
|
||||
forms - see <a class="reference external" href="https://www.w3.org/TR/REC-html40/interact/forms.html#h-17.13">HTML Specification, Form Submission</a> for more
|
||||
details).</p>
|
||||
<p>If you do not pass the <code class="docutils literal notranslate"><span class="pre">data</span></code> argument, urllib uses a <strong>GET</strong> request. One
|
||||
way in which GET and POST requests differ is that POST requests often have
|
||||
“side-effects”: they change the state of the system in some way (for example by
|
||||
placing an order with the website for a hundredweight of tinned spam to be
|
||||
delivered to your door). Though the HTTP standard makes it clear that POSTs are
|
||||
intended to <em>always</em> cause side-effects, and GET requests <em>never</em> to cause
|
||||
side-effects, nothing prevents a GET request from having side-effects, nor a
|
||||
POST requests from having no side-effects. Data can also be passed in an HTTP
|
||||
GET request by encoding it in the URL itself.</p>
|
||||
<p>This is done as follows:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">urllib.request</span>
|
||||
<span class="gp">>>> </span><span class="kn">import</span> <span class="nn">urllib.parse</span>
|
||||
<span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="gp">>>> </span><span class="n">data</span><span class="p">[</span><span class="s1">'name'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'Somebody Here'</span>
|
||||
<span class="gp">>>> </span><span class="n">data</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'Northampton'</span>
|
||||
<span class="gp">>>> </span><span class="n">data</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'Python'</span>
|
||||
<span class="gp">>>> </span><span class="n">url_values</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">urlencode</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">url_values</span><span class="p">)</span> <span class="c1"># The order may differ from below. </span>
|
||||
<span class="go">name=Somebody+Here&language=Python&location=Northampton</span>
|
||||
<span class="gp">>>> </span><span class="n">url</span> <span class="o">=</span> <span class="s1">'http://www.example.com/example.cgi'</span>
|
||||
<span class="gp">>>> </span><span class="n">full_url</span> <span class="o">=</span> <span class="n">url</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">url_values</span>
|
||||
<span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="n">full_url</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Notice that the full URL is created by adding a <code class="docutils literal notranslate"><span class="pre">?</span></code> to the URL, followed by
|
||||
the encoded values.</p>
|
||||
</div>
|
||||
<div class="section" id="headers">
|
||||
<h3>Headers<a class="headerlink" href="#headers" title="Permalink to this headline">¶</a></h3>
|
||||
<p>We’ll discuss here one particular HTTP header, to illustrate how to add headers
|
||||
to your HTTP request.</p>
|
||||
<p>Some websites <a class="footnote-reference brackets" href="#id8" id="id1">1</a> dislike being browsed by programs, or send different versions
|
||||
to different browsers <a class="footnote-reference brackets" href="#id9" id="id2">2</a>. By default urllib identifies itself as
|
||||
<code class="docutils literal notranslate"><span class="pre">Python-urllib/x.y</span></code> (where <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code> are the major and minor version
|
||||
numbers of the Python release,
|
||||
e.g. <code class="docutils literal notranslate"><span class="pre">Python-urllib/2.5</span></code>), which may confuse the site, or just plain
|
||||
not work. The way a browser identifies itself is through the
|
||||
<code class="docutils literal notranslate"><span class="pre">User-Agent</span></code> header <a class="footnote-reference brackets" href="#id10" id="id3">3</a>. When you create a Request object you can
|
||||
pass a dictionary of headers in. The following example makes the same
|
||||
request as above, but identifies itself as a version of Internet
|
||||
Explorer <a class="footnote-reference brackets" href="#id11" id="id4">4</a>.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">urllib.parse</span>
|
||||
<span class="kn">import</span> <span class="nn">urllib.request</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="s1">'http://www.someserver.com/cgi-bin/register.cgi'</span>
|
||||
<span class="n">user_agent</span> <span class="o">=</span> <span class="s1">'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'</span>
|
||||
<span class="n">values</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'Michael Foord'</span><span class="p">,</span>
|
||||
<span class="s1">'location'</span><span class="p">:</span> <span class="s1">'Northampton'</span><span class="p">,</span>
|
||||
<span class="s1">'language'</span><span class="p">:</span> <span class="s1">'Python'</span> <span class="p">}</span>
|
||||
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'User-Agent'</span><span class="p">:</span> <span class="n">user_agent</span><span class="p">}</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">urlencode</span><span class="p">(</span><span class="n">values</span><span class="p">)</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'ascii'</span><span class="p">)</span>
|
||||
<span class="n">req</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">Request</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">headers</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="n">req</span><span class="p">)</span> <span class="k">as</span> <span class="n">response</span><span class="p">:</span>
|
||||
<span class="n">the_page</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The response also has two useful methods. See the section on <a class="reference internal" href="#info-and-geturl">info and geturl</a>
|
||||
which comes after we have a look at what happens when things go wrong.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="handling-exceptions">
|
||||
<h2>Handling Exceptions<a class="headerlink" href="#handling-exceptions" title="Permalink to this headline">¶</a></h2>
|
||||
<p><em>urlopen</em> raises <code class="xref py py-exc docutils literal notranslate"><span class="pre">URLError</span></code> when it cannot handle a response (though as
|
||||
usual with Python APIs, built-in exceptions such as <a class="reference internal" href="../library/exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a>,
|
||||
<a class="reference internal" href="../library/exceptions.html#TypeError" title="TypeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> etc. may also be raised).</p>
|
||||
<p><code class="xref py py-exc docutils literal notranslate"><span class="pre">HTTPError</span></code> is the subclass of <code class="xref py py-exc docutils literal notranslate"><span class="pre">URLError</span></code> raised in the specific case of
|
||||
HTTP URLs.</p>
|
||||
<p>The exception classes are exported from the <a class="reference internal" href="../library/urllib.error.html#module-urllib.error" title="urllib.error: Exception classes raised by urllib.request."><code class="xref py py-mod docutils literal notranslate"><span class="pre">urllib.error</span></code></a> module.</p>
|
||||
<div class="section" id="urlerror">
|
||||
<h3>URLError<a class="headerlink" href="#urlerror" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Often, URLError is raised because there is no network connection (no route to
|
||||
the specified server), or the specified server doesn’t exist. In this case, the
|
||||
exception raised will have a ‘reason’ attribute, which is a tuple containing an
|
||||
error code and a text error message.</p>
|
||||
<p>e.g.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">req</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">Request</span><span class="p">(</span><span class="s1">'http://www.pretend_server.org'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="k">try</span><span class="p">:</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="n">req</span><span class="p">)</span>
|
||||
<span class="gp">... </span><span class="k">except</span> <span class="n">urllib</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">URLError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="gp">... </span> <span class="nb">print</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">reason</span><span class="p">)</span>
|
||||
<span class="gp">...</span>
|
||||
<span class="go">(4, 'getaddrinfo failed')</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="httperror">
|
||||
<h3>HTTPError<a class="headerlink" href="#httperror" title="Permalink to this headline">¶</a></h3>
|
||||
<p>Every HTTP response from the server contains a numeric “status code”. Sometimes
|
||||
the status code indicates that the server is unable to fulfil the request. The
|
||||
default handlers will handle some of these responses for you (for example, if
|
||||
the response is a “redirection” that requests the client fetch the document from
|
||||
a different URL, urllib will handle that for you). For those it can’t handle,
|
||||
urlopen will raise an <code class="xref py py-exc docutils literal notranslate"><span class="pre">HTTPError</span></code>. Typical errors include ‘404’ (page not
|
||||
found), ‘403’ (request forbidden), and ‘401’ (authentication required).</p>
|
||||
<p>See section 10 of <span class="target" id="index-1"></span><a class="rfc reference external" href="https://tools.ietf.org/html/rfc2616.html"><strong>RFC 2616</strong></a> for a reference on all the HTTP error codes.</p>
|
||||
<p>The <code class="xref py py-exc docutils literal notranslate"><span class="pre">HTTPError</span></code> instance raised will have an integer ‘code’ attribute, which
|
||||
corresponds to the error sent by the server.</p>
|
||||
<div class="section" id="error-codes">
|
||||
<h4>Error Codes<a class="headerlink" href="#error-codes" title="Permalink to this headline">¶</a></h4>
|
||||
<p>Because the default handlers handle redirects (codes in the 300 range), and
|
||||
codes in the 100–299 range indicate success, you will usually only see error
|
||||
codes in the 400–599 range.</p>
|
||||
<p><a class="reference internal" href="../library/http.server.html#http.server.BaseHTTPRequestHandler.responses" title="http.server.BaseHTTPRequestHandler.responses"><code class="xref py py-attr docutils literal notranslate"><span class="pre">http.server.BaseHTTPRequestHandler.responses</span></code></a> is a useful dictionary of
|
||||
response codes in that shows all the response codes used by <span class="target" id="index-2"></span><a class="rfc reference external" href="https://tools.ietf.org/html/rfc2616.html"><strong>RFC 2616</strong></a>. The
|
||||
dictionary is reproduced here for convenience</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># Table mapping response codes to messages; entries have the</span>
|
||||
<span class="c1"># form {code: (shortmessage, longmessage)}.</span>
|
||||
<span class="n">responses</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="mi">100</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Continue'</span><span class="p">,</span> <span class="s1">'Request received, please continue'</span><span class="p">),</span>
|
||||
<span class="mi">101</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Switching Protocols'</span><span class="p">,</span>
|
||||
<span class="s1">'Switching to new protocol; obey Upgrade header'</span><span class="p">),</span>
|
||||
|
||||
<span class="mi">200</span><span class="p">:</span> <span class="p">(</span><span class="s1">'OK'</span><span class="p">,</span> <span class="s1">'Request fulfilled, document follows'</span><span class="p">),</span>
|
||||
<span class="mi">201</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Created'</span><span class="p">,</span> <span class="s1">'Document created, URL follows'</span><span class="p">),</span>
|
||||
<span class="mi">202</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Accepted'</span><span class="p">,</span>
|
||||
<span class="s1">'Request accepted, processing continues off-line'</span><span class="p">),</span>
|
||||
<span class="mi">203</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Non-Authoritative Information'</span><span class="p">,</span> <span class="s1">'Request fulfilled from cache'</span><span class="p">),</span>
|
||||
<span class="mi">204</span><span class="p">:</span> <span class="p">(</span><span class="s1">'No Content'</span><span class="p">,</span> <span class="s1">'Request fulfilled, nothing follows'</span><span class="p">),</span>
|
||||
<span class="mi">205</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Reset Content'</span><span class="p">,</span> <span class="s1">'Clear input form for further input.'</span><span class="p">),</span>
|
||||
<span class="mi">206</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Partial Content'</span><span class="p">,</span> <span class="s1">'Partial content follows.'</span><span class="p">),</span>
|
||||
|
||||
<span class="mi">300</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Multiple Choices'</span><span class="p">,</span>
|
||||
<span class="s1">'Object has several resources -- see URI list'</span><span class="p">),</span>
|
||||
<span class="mi">301</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Moved Permanently'</span><span class="p">,</span> <span class="s1">'Object moved permanently -- see URI list'</span><span class="p">),</span>
|
||||
<span class="mi">302</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Found'</span><span class="p">,</span> <span class="s1">'Object moved temporarily -- see URI list'</span><span class="p">),</span>
|
||||
<span class="mi">303</span><span class="p">:</span> <span class="p">(</span><span class="s1">'See Other'</span><span class="p">,</span> <span class="s1">'Object moved -- see Method and URL list'</span><span class="p">),</span>
|
||||
<span class="mi">304</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Not Modified'</span><span class="p">,</span>
|
||||
<span class="s1">'Document has not changed since given time'</span><span class="p">),</span>
|
||||
<span class="mi">305</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Use Proxy'</span><span class="p">,</span>
|
||||
<span class="s1">'You must use proxy specified in Location to access this '</span>
|
||||
<span class="s1">'resource.'</span><span class="p">),</span>
|
||||
<span class="mi">307</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Temporary Redirect'</span><span class="p">,</span>
|
||||
<span class="s1">'Object moved temporarily -- see URI list'</span><span class="p">),</span>
|
||||
|
||||
<span class="mi">400</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Bad Request'</span><span class="p">,</span>
|
||||
<span class="s1">'Bad request syntax or unsupported method'</span><span class="p">),</span>
|
||||
<span class="mi">401</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Unauthorized'</span><span class="p">,</span>
|
||||
<span class="s1">'No permission -- see authorization schemes'</span><span class="p">),</span>
|
||||
<span class="mi">402</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Payment Required'</span><span class="p">,</span>
|
||||
<span class="s1">'No payment -- see charging schemes'</span><span class="p">),</span>
|
||||
<span class="mi">403</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Forbidden'</span><span class="p">,</span>
|
||||
<span class="s1">'Request forbidden -- authorization will not help'</span><span class="p">),</span>
|
||||
<span class="mi">404</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Not Found'</span><span class="p">,</span> <span class="s1">'Nothing matches the given URI'</span><span class="p">),</span>
|
||||
<span class="mi">405</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Method Not Allowed'</span><span class="p">,</span>
|
||||
<span class="s1">'Specified method is invalid for this server.'</span><span class="p">),</span>
|
||||
<span class="mi">406</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Not Acceptable'</span><span class="p">,</span> <span class="s1">'URI not available in preferred format.'</span><span class="p">),</span>
|
||||
<span class="mi">407</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Proxy Authentication Required'</span><span class="p">,</span> <span class="s1">'You must authenticate with '</span>
|
||||
<span class="s1">'this proxy before proceeding.'</span><span class="p">),</span>
|
||||
<span class="mi">408</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Request Timeout'</span><span class="p">,</span> <span class="s1">'Request timed out; try again later.'</span><span class="p">),</span>
|
||||
<span class="mi">409</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Conflict'</span><span class="p">,</span> <span class="s1">'Request conflict.'</span><span class="p">),</span>
|
||||
<span class="mi">410</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Gone'</span><span class="p">,</span>
|
||||
<span class="s1">'URI no longer exists and has been permanently removed.'</span><span class="p">),</span>
|
||||
<span class="mi">411</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Length Required'</span><span class="p">,</span> <span class="s1">'Client must specify Content-Length.'</span><span class="p">),</span>
|
||||
<span class="mi">412</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Precondition Failed'</span><span class="p">,</span> <span class="s1">'Precondition in headers is false.'</span><span class="p">),</span>
|
||||
<span class="mi">413</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Request Entity Too Large'</span><span class="p">,</span> <span class="s1">'Entity is too large.'</span><span class="p">),</span>
|
||||
<span class="mi">414</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Request-URI Too Long'</span><span class="p">,</span> <span class="s1">'URI is too long.'</span><span class="p">),</span>
|
||||
<span class="mi">415</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Unsupported Media Type'</span><span class="p">,</span> <span class="s1">'Entity body in unsupported format.'</span><span class="p">),</span>
|
||||
<span class="mi">416</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Requested Range Not Satisfiable'</span><span class="p">,</span>
|
||||
<span class="s1">'Cannot satisfy request range.'</span><span class="p">),</span>
|
||||
<span class="mi">417</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Expectation Failed'</span><span class="p">,</span>
|
||||
<span class="s1">'Expect condition could not be satisfied.'</span><span class="p">),</span>
|
||||
|
||||
<span class="mi">500</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Internal Server Error'</span><span class="p">,</span> <span class="s1">'Server got itself in trouble'</span><span class="p">),</span>
|
||||
<span class="mi">501</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Not Implemented'</span><span class="p">,</span>
|
||||
<span class="s1">'Server does not support this operation'</span><span class="p">),</span>
|
||||
<span class="mi">502</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Bad Gateway'</span><span class="p">,</span> <span class="s1">'Invalid responses from another server/proxy.'</span><span class="p">),</span>
|
||||
<span class="mi">503</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Service Unavailable'</span><span class="p">,</span>
|
||||
<span class="s1">'The server cannot process the request due to a high load'</span><span class="p">),</span>
|
||||
<span class="mi">504</span><span class="p">:</span> <span class="p">(</span><span class="s1">'Gateway Timeout'</span><span class="p">,</span>
|
||||
<span class="s1">'The gateway server did not receive a timely response'</span><span class="p">),</span>
|
||||
<span class="mi">505</span><span class="p">:</span> <span class="p">(</span><span class="s1">'HTTP Version Not Supported'</span><span class="p">,</span> <span class="s1">'Cannot fulfill request.'</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>When an error is raised the server responds by returning an HTTP error code
|
||||
<em>and</em> an error page. You can use the <code class="xref py py-exc docutils literal notranslate"><span class="pre">HTTPError</span></code> instance as a response on the
|
||||
page returned. This means that as well as the code attribute, it also has read,
|
||||
geturl, and info, methods as returned by the <code class="docutils literal notranslate"><span class="pre">urllib.response</span></code> module:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">req</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">Request</span><span class="p">(</span><span class="s1">'http://www.python.org/fish.html'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="k">try</span><span class="p">:</span>
|
||||
<span class="gp">... </span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="n">req</span><span class="p">)</span>
|
||||
<span class="gp">... </span><span class="k">except</span> <span class="n">urllib</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">HTTPError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="gp">... </span> <span class="nb">print</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">code</span><span class="p">)</span>
|
||||
<span class="gp">... </span> <span class="nb">print</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
|
||||
<span class="gp">...</span>
|
||||
<span class="go">404</span>
|
||||
<span class="go">b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"</span>
|
||||
<span class="go"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n\n\n<html</span>
|
||||
<span class="go"> ...</span>
|
||||
<span class="go"> <title>Page Not Found</title>\n</span>
|
||||
<span class="go"> ...</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="wrapping-it-up">
|
||||
<h3>Wrapping it Up<a class="headerlink" href="#wrapping-it-up" title="Permalink to this headline">¶</a></h3>
|
||||
<p>So if you want to be prepared for <code class="xref py py-exc docutils literal notranslate"><span class="pre">HTTPError</span></code> <em>or</em> <code class="xref py py-exc docutils literal notranslate"><span class="pre">URLError</span></code> there are two
|
||||
basic approaches. I prefer the second approach.</p>
|
||||
<div class="section" id="number-1">
|
||||
<h4>Number 1<a class="headerlink" href="#number-1" title="Permalink to this headline">¶</a></h4>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">urllib.request</span> <span class="k">import</span> <span class="n">Request</span><span class="p">,</span> <span class="n">urlopen</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.error</span> <span class="k">import</span> <span class="n">URLError</span><span class="p">,</span> <span class="n">HTTPError</span>
|
||||
<span class="n">req</span> <span class="o">=</span> <span class="n">Request</span><span class="p">(</span><span class="n">someurl</span><span class="p">)</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">response</span> <span class="o">=</span> <span class="n">urlopen</span><span class="p">(</span><span class="n">req</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="n">HTTPError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'The server couldn</span><span class="se">\'</span><span class="s1">t fulfill the request.'</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Error code: '</span><span class="p">,</span> <span class="n">e</span><span class="o">.</span><span class="n">code</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="n">URLError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'We failed to reach a server.'</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Reason: '</span><span class="p">,</span> <span class="n">e</span><span class="o">.</span><span class="n">reason</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># everything is fine</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>The <code class="docutils literal notranslate"><span class="pre">except</span> <span class="pre">HTTPError</span></code> <em>must</em> come first, otherwise <code class="docutils literal notranslate"><span class="pre">except</span> <span class="pre">URLError</span></code>
|
||||
will <em>also</em> catch an <code class="xref py py-exc docutils literal notranslate"><span class="pre">HTTPError</span></code>.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="number-2">
|
||||
<h4>Number 2<a class="headerlink" href="#number-2" title="Permalink to this headline">¶</a></h4>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">urllib.request</span> <span class="k">import</span> <span class="n">Request</span><span class="p">,</span> <span class="n">urlopen</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.error</span> <span class="k">import</span> <span class="n">URLError</span>
|
||||
<span class="n">req</span> <span class="o">=</span> <span class="n">Request</span><span class="p">(</span><span class="n">someurl</span><span class="p">)</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">response</span> <span class="o">=</span> <span class="n">urlopen</span><span class="p">(</span><span class="n">req</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="n">URLError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="s1">'reason'</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'We failed to reach a server.'</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Reason: '</span><span class="p">,</span> <span class="n">e</span><span class="o">.</span><span class="n">reason</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="s1">'code'</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'The server couldn</span><span class="se">\'</span><span class="s1">t fulfill the request.'</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Error code: '</span><span class="p">,</span> <span class="n">e</span><span class="o">.</span><span class="n">code</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># everything is fine</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="info-and-geturl">
|
||||
<h2>info and geturl<a class="headerlink" href="#info-and-geturl" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The response returned by urlopen (or the <code class="xref py py-exc docutils literal notranslate"><span class="pre">HTTPError</span></code> instance) has two
|
||||
useful methods <code class="xref py py-meth docutils literal notranslate"><span class="pre">info()</span></code> and <code class="xref py py-meth docutils literal notranslate"><span class="pre">geturl()</span></code> and is defined in the module
|
||||
<a class="reference internal" href="../library/urllib.request.html#module-urllib.response" title="urllib.response: Response classes used by urllib."><code class="xref py py-mod docutils literal notranslate"><span class="pre">urllib.response</span></code></a>..</p>
|
||||
<p><strong>geturl</strong> - this returns the real URL of the page fetched. This is useful
|
||||
because <code class="docutils literal notranslate"><span class="pre">urlopen</span></code> (or the opener object used) may have followed a
|
||||
redirect. The URL of the page fetched may not be the same as the URL requested.</p>
|
||||
<p><strong>info</strong> - this returns a dictionary-like object that describes the page
|
||||
fetched, particularly the headers sent by the server. It is currently an
|
||||
<code class="xref py py-class docutils literal notranslate"><span class="pre">http.client.HTTPMessage</span></code> instance.</p>
|
||||
<p>Typical headers include ‘Content-length’, ‘Content-type’, and so on. See the
|
||||
<a class="reference external" href="http://jkorpela.fi/http.html">Quick Reference to HTTP Headers</a>
|
||||
for a useful listing of HTTP headers with brief explanations of their meaning
|
||||
and use.</p>
|
||||
</div>
|
||||
<div class="section" id="openers-and-handlers">
|
||||
<h2>Openers and Handlers<a class="headerlink" href="#openers-and-handlers" title="Permalink to this headline">¶</a></h2>
|
||||
<p>When you fetch a URL you use an opener (an instance of the perhaps
|
||||
confusingly-named <a class="reference internal" href="../library/urllib.request.html#urllib.request.OpenerDirector" title="urllib.request.OpenerDirector"><code class="xref py py-class docutils literal notranslate"><span class="pre">urllib.request.OpenerDirector</span></code></a>). Normally we have been using
|
||||
the default opener - via <code class="docutils literal notranslate"><span class="pre">urlopen</span></code> - but you can create custom
|
||||
openers. Openers use handlers. All the “heavy lifting” is done by the
|
||||
handlers. Each handler knows how to open URLs for a particular URL scheme (http,
|
||||
ftp, etc.), or how to handle an aspect of URL opening, for example HTTP
|
||||
redirections or HTTP cookies.</p>
|
||||
<p>You will want to create openers if you want to fetch URLs with specific handlers
|
||||
installed, for example to get an opener that handles cookies, or to get an
|
||||
opener that does not handle redirections.</p>
|
||||
<p>To create an opener, instantiate an <code class="docutils literal notranslate"><span class="pre">OpenerDirector</span></code>, and then call
|
||||
<code class="docutils literal notranslate"><span class="pre">.add_handler(some_handler_instance)</span></code> repeatedly.</p>
|
||||
<p>Alternatively, you can use <code class="docutils literal notranslate"><span class="pre">build_opener</span></code>, which is a convenience function for
|
||||
creating opener objects with a single function call. <code class="docutils literal notranslate"><span class="pre">build_opener</span></code> adds
|
||||
several handlers by default, but provides a quick way to add more and/or
|
||||
override the default handlers.</p>
|
||||
<p>Other sorts of handlers you might want to can handle proxies, authentication,
|
||||
and other common but slightly specialised situations.</p>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">install_opener</span></code> can be used to make an <code class="docutils literal notranslate"><span class="pre">opener</span></code> object the (global) default
|
||||
opener. This means that calls to <code class="docutils literal notranslate"><span class="pre">urlopen</span></code> will use the opener you have
|
||||
installed.</p>
|
||||
<p>Opener objects have an <code class="docutils literal notranslate"><span class="pre">open</span></code> method, which can be called directly to fetch
|
||||
urls in the same way as the <code class="docutils literal notranslate"><span class="pre">urlopen</span></code> function: there’s no need to call
|
||||
<code class="docutils literal notranslate"><span class="pre">install_opener</span></code>, except as a convenience.</p>
|
||||
</div>
|
||||
<div class="section" id="id5">
|
||||
<h2>Basic Authentication<a class="headerlink" href="#id5" title="Permalink to this headline">¶</a></h2>
|
||||
<p>To illustrate creating and installing a handler we will use the
|
||||
<code class="docutils literal notranslate"><span class="pre">HTTPBasicAuthHandler</span></code>. For a more detailed discussion of this subject –
|
||||
including an explanation of how Basic Authentication works - see the <a class="reference external" href="http://www.voidspace.org.uk/python/articles/authentication.shtml">Basic
|
||||
Authentication Tutorial</a>.</p>
|
||||
<p>When authentication is required, the server sends a header (as well as the 401
|
||||
error code) requesting authentication. This specifies the authentication scheme
|
||||
and a ‘realm’. The header looks like: <code class="docutils literal notranslate"><span class="pre">WWW-Authenticate:</span> <span class="pre">SCHEME</span>
|
||||
<span class="pre">realm="REALM"</span></code>.</p>
|
||||
<p>e.g.</p>
|
||||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>WWW-Authenticate: Basic realm="cPanel Users"
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The client should then retry the request with the appropriate name and password
|
||||
for the realm included as a header in the request. This is ‘basic
|
||||
authentication’. In order to simplify this process we can create an instance of
|
||||
<code class="docutils literal notranslate"><span class="pre">HTTPBasicAuthHandler</span></code> and an opener to use this handler.</p>
|
||||
<p>The <code class="docutils literal notranslate"><span class="pre">HTTPBasicAuthHandler</span></code> uses an object called a password manager to handle
|
||||
the mapping of URLs and realms to passwords and usernames. If you know what the
|
||||
realm is (from the authentication header sent by the server), then you can use a
|
||||
<code class="docutils literal notranslate"><span class="pre">HTTPPasswordMgr</span></code>. Frequently one doesn’t care what the realm is. In that
|
||||
case, it is convenient to use <code class="docutils literal notranslate"><span class="pre">HTTPPasswordMgrWithDefaultRealm</span></code>. This allows
|
||||
you to specify a default username and password for a URL. This will be supplied
|
||||
in the absence of you providing an alternative combination for a specific
|
||||
realm. We indicate this by providing <code class="docutils literal notranslate"><span class="pre">None</span></code> as the realm argument to the
|
||||
<code class="docutils literal notranslate"><span class="pre">add_password</span></code> method.</p>
|
||||
<p>The top-level URL is the first URL that requires authentication. URLs “deeper”
|
||||
than the URL you pass to .add_password() will also match.</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># create a password manager</span>
|
||||
<span class="n">password_mgr</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">HTTPPasswordMgrWithDefaultRealm</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># Add the username and password.</span>
|
||||
<span class="c1"># If we knew the realm, we could use it instead of None.</span>
|
||||
<span class="n">top_level_url</span> <span class="o">=</span> <span class="s2">"http://example.com/foo/"</span>
|
||||
<span class="n">password_mgr</span><span class="o">.</span><span class="n">add_password</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">top_level_url</span><span class="p">,</span> <span class="n">username</span><span class="p">,</span> <span class="n">password</span><span class="p">)</span>
|
||||
|
||||
<span class="n">handler</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">HTTPBasicAuthHandler</span><span class="p">(</span><span class="n">password_mgr</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># create "opener" (OpenerDirector instance)</span>
|
||||
<span class="n">opener</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">build_opener</span><span class="p">(</span><span class="n">handler</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># use the opener to fetch a URL</span>
|
||||
<span class="n">opener</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">a_url</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Install the opener.</span>
|
||||
<span class="c1"># Now all calls to urllib.request.urlopen use our opener.</span>
|
||||
<span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">install_opener</span><span class="p">(</span><span class="n">opener</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>In the above example we only supplied our <code class="docutils literal notranslate"><span class="pre">HTTPBasicAuthHandler</span></code> to
|
||||
<code class="docutils literal notranslate"><span class="pre">build_opener</span></code>. By default openers have the handlers for normal situations
|
||||
– <code class="docutils literal notranslate"><span class="pre">ProxyHandler</span></code> (if a proxy setting such as an <span class="target" id="index-3"></span><code class="xref std std-envvar docutils literal notranslate"><span class="pre">http_proxy</span></code>
|
||||
environment variable is set), <code class="docutils literal notranslate"><span class="pre">UnknownHandler</span></code>, <code class="docutils literal notranslate"><span class="pre">HTTPHandler</span></code>,
|
||||
<code class="docutils literal notranslate"><span class="pre">HTTPDefaultErrorHandler</span></code>, <code class="docutils literal notranslate"><span class="pre">HTTPRedirectHandler</span></code>, <code class="docutils literal notranslate"><span class="pre">FTPHandler</span></code>,
|
||||
<code class="docutils literal notranslate"><span class="pre">FileHandler</span></code>, <code class="docutils literal notranslate"><span class="pre">DataHandler</span></code>, <code class="docutils literal notranslate"><span class="pre">HTTPErrorProcessor</span></code>.</p>
|
||||
</div>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">top_level_url</span></code> is in fact <em>either</em> a full URL (including the ‘http:’ scheme
|
||||
component and the hostname and optionally the port number)
|
||||
e.g. <code class="docutils literal notranslate"><span class="pre">"http://example.com/"</span></code> <em>or</em> an “authority” (i.e. the hostname,
|
||||
optionally including the port number) e.g. <code class="docutils literal notranslate"><span class="pre">"example.com"</span></code> or <code class="docutils literal notranslate"><span class="pre">"example.com:8080"</span></code>
|
||||
(the latter example includes a port number). The authority, if present, must
|
||||
NOT contain the “userinfo” component - for example <code class="docutils literal notranslate"><span class="pre">"joe:password@example.com"</span></code> is
|
||||
not correct.</p>
|
||||
</div>
|
||||
<div class="section" id="proxies">
|
||||
<h2>Proxies<a class="headerlink" href="#proxies" title="Permalink to this headline">¶</a></h2>
|
||||
<p><strong>urllib</strong> will auto-detect your proxy settings and use those. This is through
|
||||
the <code class="docutils literal notranslate"><span class="pre">ProxyHandler</span></code>, which is part of the normal handler chain when a proxy
|
||||
setting is detected. Normally that’s a good thing, but there are occasions
|
||||
when it may not be helpful <a class="footnote-reference brackets" href="#id12" id="id6">5</a>. One way to do this is to setup our own
|
||||
<code class="docutils literal notranslate"><span class="pre">ProxyHandler</span></code>, with no proxies defined. This is done using similar steps to
|
||||
setting up a <a class="reference external" href="http://www.voidspace.org.uk/python/articles/authentication.shtml">Basic Authentication</a> handler:</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">proxy_support</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">ProxyHandler</span><span class="p">({})</span>
|
||||
<span class="gp">>>> </span><span class="n">opener</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">build_opener</span><span class="p">(</span><span class="n">proxy_support</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">install_opener</span><span class="p">(</span><span class="n">opener</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Currently <code class="docutils literal notranslate"><span class="pre">urllib.request</span></code> <em>does not</em> support fetching of <code class="docutils literal notranslate"><span class="pre">https</span></code> locations
|
||||
through a proxy. However, this can be enabled by extending urllib.request as
|
||||
shown in the recipe <a class="footnote-reference brackets" href="#id13" id="id7">6</a>.</p>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">HTTP_PROXY</span></code> will be ignored if a variable <code class="docutils literal notranslate"><span class="pre">REQUEST_METHOD</span></code> is set; see
|
||||
the documentation on <a class="reference internal" href="../library/urllib.request.html#urllib.request.getproxies" title="urllib.request.getproxies"><code class="xref py py-func docutils literal notranslate"><span class="pre">getproxies()</span></code></a>.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section" id="sockets-and-layers">
|
||||
<h2>Sockets and Layers<a class="headerlink" href="#sockets-and-layers" title="Permalink to this headline">¶</a></h2>
|
||||
<p>The Python support for fetching resources from the web is layered. urllib uses
|
||||
the <a class="reference internal" href="../library/http.client.html#module-http.client" title="http.client: HTTP and HTTPS protocol client (requires sockets)."><code class="xref py py-mod docutils literal notranslate"><span class="pre">http.client</span></code></a> library, which in turn uses the socket library.</p>
|
||||
<p>As of Python 2.3 you can specify how long a socket should wait for a response
|
||||
before timing out. This can be useful in applications which have to fetch web
|
||||
pages. By default the socket module has <em>no timeout</em> and can hang. Currently,
|
||||
the socket timeout is not exposed at the http.client or urllib.request levels.
|
||||
However, you can set the default timeout globally for all sockets using</p>
|
||||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">socket</span>
|
||||
<span class="kn">import</span> <span class="nn">urllib.request</span>
|
||||
|
||||
<span class="c1"># timeout in seconds</span>
|
||||
<span class="n">timeout</span> <span class="o">=</span> <span class="mi">10</span>
|
||||
<span class="n">socket</span><span class="o">.</span><span class="n">setdefaulttimeout</span><span class="p">(</span><span class="n">timeout</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># this call to urllib.request.urlopen now uses the default timeout</span>
|
||||
<span class="c1"># we have set in the socket module</span>
|
||||
<span class="n">req</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">Request</span><span class="p">(</span><span class="s1">'http://www.voidspace.org.uk'</span><span class="p">)</span>
|
||||
<span class="n">response</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="n">req</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<hr class="docutils" />
|
||||
<div class="section" id="footnotes">
|
||||
<h2>Footnotes<a class="headerlink" href="#footnotes" title="Permalink to this headline">¶</a></h2>
|
||||
<p>This document was reviewed and revised by John Lee.</p>
|
||||
<dl class="footnote brackets">
|
||||
<dt class="label" id="id8"><span class="brackets"><a class="fn-backref" href="#id1">1</a></span></dt>
|
||||
<dd><p>Google for example.</p>
|
||||
</dd>
|
||||
<dt class="label" id="id9"><span class="brackets"><a class="fn-backref" href="#id2">2</a></span></dt>
|
||||
<dd><p>Browser sniffing is a very bad practice for website design - building
|
||||
sites using web standards is much more sensible. Unfortunately a lot of
|
||||
sites still send different versions to different browsers.</p>
|
||||
</dd>
|
||||
<dt class="label" id="id10"><span class="brackets"><a class="fn-backref" href="#id3">3</a></span></dt>
|
||||
<dd><p>The user agent for MSIE 6 is
|
||||
<em>‘Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)’</em></p>
|
||||
</dd>
|
||||
<dt class="label" id="id11"><span class="brackets"><a class="fn-backref" href="#id4">4</a></span></dt>
|
||||
<dd><p>For details of more HTTP request headers, see
|
||||
<a class="reference external" href="http://jkorpela.fi/http.html">Quick Reference to HTTP Headers</a>.</p>
|
||||
</dd>
|
||||
<dt class="label" id="id12"><span class="brackets"><a class="fn-backref" href="#id6">5</a></span></dt>
|
||||
<dd><p>In my case I have to use a proxy to access the internet at work. If you
|
||||
attempt to fetch <em>localhost</em> URLs through this proxy it blocks them. IE
|
||||
is set to use the proxy, which urllib picks up on. In order to test
|
||||
scripts with a localhost server, I have to prevent urllib from using
|
||||
the proxy.</p>
|
||||
</dd>
|
||||
<dt class="label" id="id13"><span class="brackets"><a class="fn-backref" href="#id7">6</a></span></dt>
|
||||
<dd><p>urllib opener for SSL proxy (CONNECT method): <a class="reference external" href="https://code.activestate.com/recipes/456195/">ASPN Cookbook Recipe</a>.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li><a class="reference internal" href="#">HOWTO Fetch Internet Resources Using The urllib Package</a><ul>
|
||||
<li><a class="reference internal" href="#introduction">Introduction</a></li>
|
||||
<li><a class="reference internal" href="#fetching-urls">Fetching URLs</a><ul>
|
||||
<li><a class="reference internal" href="#data">Data</a></li>
|
||||
<li><a class="reference internal" href="#headers">Headers</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#handling-exceptions">Handling Exceptions</a><ul>
|
||||
<li><a class="reference internal" href="#urlerror">URLError</a></li>
|
||||
<li><a class="reference internal" href="#httperror">HTTPError</a><ul>
|
||||
<li><a class="reference internal" href="#error-codes">Error Codes</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#wrapping-it-up">Wrapping it Up</a><ul>
|
||||
<li><a class="reference internal" href="#number-1">Number 1</a></li>
|
||||
<li><a class="reference internal" href="#number-2">Number 2</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a class="reference internal" href="#info-and-geturl">info and geturl</a></li>
|
||||
<li><a class="reference internal" href="#openers-and-handlers">Openers and Handlers</a></li>
|
||||
<li><a class="reference internal" href="#id5">Basic Authentication</a></li>
|
||||
<li><a class="reference internal" href="#proxies">Proxies</a></li>
|
||||
<li><a class="reference internal" href="#sockets-and-layers">Sockets and Layers</a></li>
|
||||
<li><a class="reference internal" href="#footnotes">Footnotes</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h4>Previous topic</h4>
|
||||
<p class="topless"><a href="unicode.html"
|
||||
title="previous chapter">Unicode HOWTO</a></p>
|
||||
<h4>Next topic</h4>
|
||||
<p class="topless"><a href="argparse.html"
|
||||
title="next chapter">Argparse Tutorial</a></p>
|
||||
<div role="note" aria-label="source link">
|
||||
<h3>This Page</h3>
|
||||
<ul class="this-page-menu">
|
||||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||||
<li>
|
||||
<a href="https://github.com/python/cpython/blob/3.7/Doc/howto/urllib2.rst"
|
||||
rel="nofollow">Show Source
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="argparse.html" title="Argparse Tutorial"
|
||||
>next</a> |</li>
|
||||
<li class="right" >
|
||||
<a href="unicode.html" title="Unicode HOWTO"
|
||||
>previous</a> |</li>
|
||||
<li><img src="../_static/py.png" alt=""
|
||||
style="vertical-align: middle; margin-top: -1px"/></li>
|
||||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||||
<li>
|
||||
<span class="language_switcher_placeholder">en</span>
|
||||
<span class="version_switcher_placeholder">3.7.4</span>
|
||||
<a href="../index.html">Documentation </a> »
|
||||
</li>
|
||||
|
||||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||||
<li class="right">
|
||||
|
||||
|
||||
<div class="inline-search" style="display: none" role="search">
|
||||
<form class="inline-search" action="../search.html" method="get">
|
||||
<input placeholder="Quick search" type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
<script type="text/javascript">$('.inline-search').show(0);</script>
|
||||
|
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer">
|
||||
© <a href="../copyright.html">Copyright</a> 2001-2019, Python Software Foundation.
|
||||
<br />
|
||||
The Python Software Foundation is a non-profit corporation.
|
||||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||||
<br />
|
||||
Last updated on Jul 13, 2019.
|
||||
<a href="../bugs.html">Found a bug</a>?
|
||||
<br />
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 2.0.1.
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user