%PDF- %PDF-
| Direktori : /proc/thread-self/root/usr/share/doc/python3-mechanize/html/ |
| Current File : //proc/thread-self/root/usr/share/doc/python3-mechanize/html/index.html |
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
<title>mechanize — mechanize 0.4.7 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="_static/alabaster.css" />
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/doctools.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Frequently Asked Questions" href="faq.html" />
<link rel="stylesheet" href="_static/custom.css" type="text/css" />
<meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
</head><body>
<div class="document">
<div class="documentwrapper">
<div class="bodywrapper">
<div class="body" role="main">
<section id="mechanize">
<h1>mechanize<a class="headerlink" href="#mechanize" title="Permalink to this headline">¶</a></h1>
<p>Stateful programmatic web browsing in Python. Browse pages programmatically
with easy HTML form filling and clicking of links.</p>
<div class="toctree-wrapper compound">
<p class="caption" role="heading"><span class="caption-text">Table of Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="faq.html">Frequently Asked Questions</a><ul>
<li class="toctree-l2"><a class="reference internal" href="faq.html#general">General</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#usage">Usage</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#cookies">Cookies</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#forms">Forms</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#miscellaneous">Miscellaneous</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="browser_api.html">Browser API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="browser_api.html#the-browser">The Browser</a></li>
<li class="toctree-l2"><a class="reference internal" href="browser_api.html#the-request">The Request</a></li>
<li class="toctree-l2"><a class="reference internal" href="browser_api.html#the-response">The Response</a></li>
<li class="toctree-l2"><a class="reference internal" href="browser_api.html#miscellaneous">Miscellaneous</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="forms_api.html">HTML Forms API</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced.html">Advanced topics</a><ul>
<li class="toctree-l2"><a class="reference internal" href="advanced.html#thread-safety">Thread safety</a></li>
<li class="toctree-l2"><a class="reference internal" href="advanced.html#using-custom-ca-certificates">Using custom CA certificates</a></li>
<li class="toctree-l2"><a class="reference internal" href="advanced.html#debugging">Debugging</a></li>
</ul>
</li>
</ul>
</div>
<section id="quickstart">
<h2>Quickstart<a class="headerlink" href="#quickstart" title="Permalink to this headline">¶</a></h2>
<p>The examples below are written for a website that does not exist
(<cite>example.com</cite>), so cannot be run.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">re</span>
<span class="kn">import</span> <span class="nn">mechanize</span>
<span class="n">br</span> <span class="o">=</span> <span class="n">mechanize</span><span class="o">.</span><span class="n">Browser</span><span class="p">()</span>
<span class="n">br</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">"http://www.example.com/"</span><span class="p">)</span>
<span class="c1"># follow second link with element text matching regular expression</span>
<span class="n">response1</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">follow_link</span><span class="p">(</span><span class="n">text_regex</span><span class="o">=</span><span class="sa">r</span><span class="s2">"cheese\s*shop"</span><span class="p">,</span> <span class="n">nr</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">br</span><span class="o">.</span><span class="n">title</span><span class="p">())</span>
<span class="nb">print</span><span class="p">(</span><span class="n">response1</span><span class="o">.</span><span class="n">geturl</span><span class="p">())</span>
<span class="nb">print</span><span class="p">(</span><span class="n">response1</span><span class="o">.</span><span class="n">info</span><span class="p">())</span> <span class="c1"># headers</span>
<span class="nb">print</span><span class="p">(</span><span class="n">response1</span><span class="o">.</span><span class="n">read</span><span class="p">())</span> <span class="c1"># body</span>
<span class="n">br</span><span class="o">.</span><span class="n">select_form</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">"order"</span><span class="p">)</span>
<span class="c1"># Browser passes through unknown attributes (including methods)</span>
<span class="c1"># to the selected HTMLForm.</span>
<span class="n">br</span><span class="p">[</span><span class="s2">"cheeses"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"mozzarella"</span><span class="p">,</span> <span class="s2">"caerphilly"</span><span class="p">]</span> <span class="c1"># (the method here is __setitem__)</span>
<span class="c1"># Submit current form. Browser calls .close() on the current response on</span>
<span class="c1"># navigation, so this closes response1</span>
<span class="n">response2</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">submit</span><span class="p">()</span>
<span class="c1"># print currently selected form (don't call .submit() on this, use br.submit())</span>
<span class="nb">print</span><span class="p">(</span><span class="n">br</span><span class="o">.</span><span class="n">form</span><span class="p">)</span>
<span class="n">response3</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">back</span><span class="p">()</span> <span class="c1"># back to cheese shop (same data as response1)</span>
<span class="c1"># the history mechanism returns cached response objects</span>
<span class="c1"># we can still use the response, even though it was .close()d</span>
<span class="n">response3</span><span class="o">.</span><span class="n">get_data</span><span class="p">()</span> <span class="c1"># like .seek(0) followed by .read()</span>
<span class="n">response4</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">reload</span><span class="p">()</span> <span class="c1"># fetches from server</span>
<span class="k">for</span> <span class="n">form</span> <span class="ow">in</span> <span class="n">br</span><span class="o">.</span><span class="n">forms</span><span class="p">():</span>
<span class="nb">print</span><span class="p">(</span><span class="n">form</span><span class="p">)</span>
<span class="c1"># .links() optionally accepts the keyword args of .follow_/.find_link()</span>
<span class="k">for</span> <span class="n">link</span> <span class="ow">in</span> <span class="n">br</span><span class="o">.</span><span class="n">links</span><span class="p">(</span><span class="n">url_regex</span><span class="o">=</span><span class="s2">"python.org"</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="n">link</span><span class="p">)</span>
<span class="n">br</span><span class="o">.</span><span class="n">follow_link</span><span class="p">(</span><span class="n">link</span><span class="p">)</span> <span class="c1"># takes EITHER Link instance OR keyword args</span>
<span class="n">br</span><span class="o">.</span><span class="n">back</span><span class="p">()</span>
</pre></div>
</div>
<p>You may control the browser’s policy by using the methods of
<cite>mechanize.Browser</cite>’s base class, <cite>mechanize.UserAgent</cite>. For example:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">br</span> <span class="o">=</span> <span class="n">mechanize</span><span class="o">.</span><span class="n">Browser</span><span class="p">()</span>
<span class="c1"># Explicitly configure proxies (Browser will attempt to set good defaults).</span>
<span class="c1"># Note the userinfo ("joe:password@") and port number (":3128") are optional.</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_proxies</span><span class="p">({</span><span class="s2">"http"</span><span class="p">:</span> <span class="s2">"joe:password@myproxy.example.com:3128"</span><span class="p">,</span>
<span class="s2">"ftp"</span><span class="p">:</span> <span class="s2">"proxy.example.com"</span><span class="p">,</span>
<span class="p">})</span>
<span class="c1"># Add HTTP Basic/Digest auth username and password for HTTP proxy access.</span>
<span class="c1"># (equivalent to using "joe:password@..." form above)</span>
<span class="n">br</span><span class="o">.</span><span class="n">add_proxy_password</span><span class="p">(</span><span class="s2">"joe"</span><span class="p">,</span> <span class="s2">"password"</span><span class="p">)</span>
<span class="c1"># Add HTTP Basic/Digest auth username and password for website access.</span>
<span class="n">br</span><span class="o">.</span><span class="n">add_password</span><span class="p">(</span><span class="s2">"http://example.com/protected/"</span><span class="p">,</span> <span class="s2">"joe"</span><span class="p">,</span> <span class="s2">"password"</span><span class="p">)</span>
<span class="c1"># Add an extra header to all outgoing requests, you can also</span>
<span class="c1"># re-order or remove headers in this function.</span>
<span class="n">br</span><span class="o">.</span><span class="n">finalize_request_headers</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">request</span><span class="p">,</span> <span class="n">headers</span><span class="p">:</span> <span class="n">headers</span><span class="o">.</span><span class="fm">__setitem__</span><span class="p">(</span>
<span class="s1">'My-Custom-Header'</span><span class="p">,</span> <span class="s1">'Something'</span><span class="p">)</span>
<span class="c1"># Don't handle HTTP-EQUIV headers (HTTP headers embedded in HTML).</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_handle_equiv</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># Ignore robots.txt. Do not do this without thought and consideration.</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_handle_robots</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># Don't add Referer (sic) header</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_handle_referer</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># Don't handle Refresh redirections</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_handle_refresh</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># Don't handle cookies</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_cookiejar</span><span class="p">()</span>
<span class="c1"># Supply your own mechanize.CookieJar (NOTE: cookie handling is ON by</span>
<span class="c1"># default: no need to do this unless you have some reason to use a</span>
<span class="c1"># particular cookiejar)</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_cookiejar</span><span class="p">(</span><span class="n">cj</span><span class="p">)</span>
<span class="c1"># Tell the browser to send the Accept-Encoding: gzip header to the server</span>
<span class="c1"># to indicate it supports gzip Content-Encoding</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_request_gzip</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Do not verify SSL certificates</span>
<span class="kn">import</span> <span class="nn">ssl</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_ca_data</span><span class="p">(</span><span class="n">context</span><span class="o">=</span><span class="n">ssl</span><span class="o">.</span><span class="n">_create_unverified_context</span><span class="p">(</span><span class="n">cert_reqs</span><span class="o">=</span><span class="n">ssl</span><span class="o">.</span><span class="n">CERT_NONE</span><span class="p">))</span>
<span class="c1"># Log information about HTTP redirects and Refreshes.</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_debug_redirects</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Log HTTP response bodies (i.e. the HTML, most of the time).</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_debug_responses</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Print HTTP headers.</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_debug_http</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># To make sure you're seeing all debug output:</span>
<span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">"mechanize"</span><span class="p">)</span>
<span class="n">logger</span><span class="o">.</span><span class="n">addHandler</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">StreamHandler</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">))</span>
<span class="n">logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>
<span class="c1"># Sometimes it's useful to process bad headers or bad HTML:</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">response</span><span class="p">()</span> <span class="c1"># this is a copy of response</span>
<span class="n">headers</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">info</span><span class="p">()</span> <span class="c1"># this is a HTTPMessage</span>
<span class="n">headers</span><span class="p">[</span><span class="s2">"Content-type"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"text/html; charset=utf-8"</span>
<span class="n">response</span><span class="o">.</span><span class="n">set_data</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">get_data</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"<!---"</span><span class="p">,</span> <span class="s2">"<!--"</span><span class="p">))</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_response</span><span class="p">(</span><span class="n">response</span><span class="p">)</span>
</pre></div>
</div>
<p>mechanize exports the complete interface of <cite>urllib2</cite>:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">mechanize</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">mechanize</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="s2">"http://www.example.com/"</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
</pre></div>
</div>
<p>When using mechanize, anything you would normally import from <cite>urllib2</cite> should
be imported from mechanize instead.</p>
</section>
</section>
<section id="indices-and-tables">
<h1>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Permalink to this headline">¶</a></h1>
<ul class="simple">
<li><p><a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a></p></li>
<li><p><a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a></p></li>
<li><p><a class="reference internal" href="search.html"><span class="std std-ref">Search Page</span></a></p></li>
</ul>
</section>
</div>
</div>
</div>
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
<div class="sphinxsidebarwrapper">
<h1 class="logo"><a href="#">mechanize</a></h1>
<h3>Navigation</h3>
<p class="caption" role="heading"><span class="caption-text">Table of Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="faq.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="browser_api.html">Browser API</a></li>
<li class="toctree-l1"><a class="reference internal" href="forms_api.html">HTML Forms API</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced.html">Advanced topics</a></li>
</ul>
<div class="relations">
<h3>Related Topics</h3>
<ul>
<li><a href="#">Documentation overview</a><ul>
<li>Next: <a href="faq.html" title="next chapter">Frequently Asked Questions</a></li>
</ul></li>
</ul>
</div>
<div id="searchbox" style="display: none" role="search">
<h3 id="searchlabel">Quick search</h3>
<div class="searchformwrapper">
<form class="search" action="search.html" method="get">
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
<input type="submit" value="Go" />
</form>
</div>
</div>
<script>$('#searchbox').show(0);</script>
</div>
</div>
<div class="clearer"></div>
</div>
<div class="footer">
©2021, Kovid Goyal.
|
Powered by <a href="http://sphinx-doc.org/">Sphinx 4.3.2</a>
& <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.12</a>
|
<a href="_sources/index.rst.txt"
rel="nofollow">Page source</a>
</div>
</body>
</html>