%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /usr/share/doc/python3-mechanize/html/
Upload File :
Create Path :
Current File : //usr/share/doc/python3-mechanize/html/index.html

<!DOCTYPE html>

<html>
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />

    <title>mechanize &#8212; mechanize 0.4.7 documentation</title>
    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
    <link rel="stylesheet" type="text/css" href="_static/alabaster.css" />
    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
    <script src="_static/jquery.js"></script>
    <script src="_static/underscore.js"></script>
    <script src="_static/doctools.js"></script>
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" />
    <link rel="next" title="Frequently Asked Questions" href="faq.html" />
   
  <link rel="stylesheet" href="_static/custom.css" type="text/css" />
  
  
  <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />

  </head><body>
  

    <div class="document">
      <div class="documentwrapper">
        <div class="bodywrapper">
          

          <div class="body" role="main">
            
  <section id="mechanize">
<h1>mechanize<a class="headerlink" href="#mechanize" title="Permalink to this headline">¶</a></h1>
<p>Stateful programmatic web browsing in Python. Browse pages programmatically
with easy HTML form filling and clicking of links.</p>
<div class="toctree-wrapper compound">
<p class="caption" role="heading"><span class="caption-text">Table of Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="faq.html">Frequently Asked Questions</a><ul>
<li class="toctree-l2"><a class="reference internal" href="faq.html#general">General</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#usage">Usage</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#cookies">Cookies</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#forms">Forms</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#miscellaneous">Miscellaneous</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="browser_api.html">Browser API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="browser_api.html#the-browser">The Browser</a></li>
<li class="toctree-l2"><a class="reference internal" href="browser_api.html#the-request">The Request</a></li>
<li class="toctree-l2"><a class="reference internal" href="browser_api.html#the-response">The Response</a></li>
<li class="toctree-l2"><a class="reference internal" href="browser_api.html#miscellaneous">Miscellaneous</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="forms_api.html">HTML Forms API</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced.html">Advanced topics</a><ul>
<li class="toctree-l2"><a class="reference internal" href="advanced.html#thread-safety">Thread safety</a></li>
<li class="toctree-l2"><a class="reference internal" href="advanced.html#using-custom-ca-certificates">Using custom CA certificates</a></li>
<li class="toctree-l2"><a class="reference internal" href="advanced.html#debugging">Debugging</a></li>
</ul>
</li>
</ul>
</div>
<section id="quickstart">
<h2>Quickstart<a class="headerlink" href="#quickstart" title="Permalink to this headline">¶</a></h2>
<p>The examples below are written for a website that does not exist
(<cite>example.com</cite>), so cannot be run.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">re</span>
<span class="kn">import</span> <span class="nn">mechanize</span>

<span class="n">br</span> <span class="o">=</span> <span class="n">mechanize</span><span class="o">.</span><span class="n">Browser</span><span class="p">()</span>
<span class="n">br</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">&quot;http://www.example.com/&quot;</span><span class="p">)</span>
<span class="c1"># follow second link with element text matching regular expression</span>
<span class="n">response1</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">follow_link</span><span class="p">(</span><span class="n">text_regex</span><span class="o">=</span><span class="sa">r</span><span class="s2">&quot;cheese\s*shop&quot;</span><span class="p">,</span> <span class="n">nr</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">br</span><span class="o">.</span><span class="n">title</span><span class="p">())</span>
<span class="nb">print</span><span class="p">(</span><span class="n">response1</span><span class="o">.</span><span class="n">geturl</span><span class="p">())</span>
<span class="nb">print</span><span class="p">(</span><span class="n">response1</span><span class="o">.</span><span class="n">info</span><span class="p">())</span>  <span class="c1"># headers</span>
<span class="nb">print</span><span class="p">(</span><span class="n">response1</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>  <span class="c1"># body</span>

<span class="n">br</span><span class="o">.</span><span class="n">select_form</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;order&quot;</span><span class="p">)</span>
<span class="c1"># Browser passes through unknown attributes (including methods)</span>
<span class="c1"># to the selected HTMLForm.</span>
<span class="n">br</span><span class="p">[</span><span class="s2">&quot;cheeses&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;mozzarella&quot;</span><span class="p">,</span> <span class="s2">&quot;caerphilly&quot;</span><span class="p">]</span>  <span class="c1"># (the method here is __setitem__)</span>
<span class="c1"># Submit current form.  Browser calls .close() on the current response on</span>
<span class="c1"># navigation, so this closes response1</span>
<span class="n">response2</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">submit</span><span class="p">()</span>

<span class="c1"># print currently selected form (don&#39;t call .submit() on this, use br.submit())</span>
<span class="nb">print</span><span class="p">(</span><span class="n">br</span><span class="o">.</span><span class="n">form</span><span class="p">)</span>

<span class="n">response3</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">back</span><span class="p">()</span>  <span class="c1"># back to cheese shop (same data as response1)</span>
<span class="c1"># the history mechanism returns cached response objects</span>
<span class="c1"># we can still use the response, even though it was .close()d</span>
<span class="n">response3</span><span class="o">.</span><span class="n">get_data</span><span class="p">()</span>  <span class="c1"># like .seek(0) followed by .read()</span>
<span class="n">response4</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">reload</span><span class="p">()</span>  <span class="c1"># fetches from server</span>

<span class="k">for</span> <span class="n">form</span> <span class="ow">in</span> <span class="n">br</span><span class="o">.</span><span class="n">forms</span><span class="p">():</span>
    <span class="nb">print</span><span class="p">(</span><span class="n">form</span><span class="p">)</span>
<span class="c1"># .links() optionally accepts the keyword args of .follow_/.find_link()</span>
<span class="k">for</span> <span class="n">link</span> <span class="ow">in</span> <span class="n">br</span><span class="o">.</span><span class="n">links</span><span class="p">(</span><span class="n">url_regex</span><span class="o">=</span><span class="s2">&quot;python.org&quot;</span><span class="p">):</span>
    <span class="nb">print</span><span class="p">(</span><span class="n">link</span><span class="p">)</span>
    <span class="n">br</span><span class="o">.</span><span class="n">follow_link</span><span class="p">(</span><span class="n">link</span><span class="p">)</span>  <span class="c1"># takes EITHER Link instance OR keyword args</span>
    <span class="n">br</span><span class="o">.</span><span class="n">back</span><span class="p">()</span>
</pre></div>
</div>
<p>You may control the browser’s policy by using the methods of
<cite>mechanize.Browser</cite>’s base class, <cite>mechanize.UserAgent</cite>.  For example:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">br</span> <span class="o">=</span> <span class="n">mechanize</span><span class="o">.</span><span class="n">Browser</span><span class="p">()</span>
<span class="c1"># Explicitly configure proxies (Browser will attempt to set good defaults).</span>
<span class="c1"># Note the userinfo (&quot;joe:password@&quot;) and port number (&quot;:3128&quot;) are optional.</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_proxies</span><span class="p">({</span><span class="s2">&quot;http&quot;</span><span class="p">:</span> <span class="s2">&quot;joe:password@myproxy.example.com:3128&quot;</span><span class="p">,</span>
                <span class="s2">&quot;ftp&quot;</span><span class="p">:</span> <span class="s2">&quot;proxy.example.com&quot;</span><span class="p">,</span>
                <span class="p">})</span>
<span class="c1"># Add HTTP Basic/Digest auth username and password for HTTP proxy access.</span>
<span class="c1"># (equivalent to using &quot;joe:password@...&quot; form above)</span>
<span class="n">br</span><span class="o">.</span><span class="n">add_proxy_password</span><span class="p">(</span><span class="s2">&quot;joe&quot;</span><span class="p">,</span> <span class="s2">&quot;password&quot;</span><span class="p">)</span>
<span class="c1"># Add HTTP Basic/Digest auth username and password for website access.</span>
<span class="n">br</span><span class="o">.</span><span class="n">add_password</span><span class="p">(</span><span class="s2">&quot;http://example.com/protected/&quot;</span><span class="p">,</span> <span class="s2">&quot;joe&quot;</span><span class="p">,</span> <span class="s2">&quot;password&quot;</span><span class="p">)</span>
<span class="c1"># Add an extra header to all outgoing requests, you can also</span>
<span class="c1"># re-order or remove headers in this function.</span>
<span class="n">br</span><span class="o">.</span><span class="n">finalize_request_headers</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">request</span><span class="p">,</span> <span class="n">headers</span><span class="p">:</span> <span class="n">headers</span><span class="o">.</span><span class="fm">__setitem__</span><span class="p">(</span>
  <span class="s1">&#39;My-Custom-Header&#39;</span><span class="p">,</span> <span class="s1">&#39;Something&#39;</span><span class="p">)</span>
<span class="c1"># Don&#39;t handle HTTP-EQUIV headers (HTTP headers embedded in HTML).</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_handle_equiv</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># Ignore robots.txt.  Do not do this without thought and consideration.</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_handle_robots</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># Don&#39;t add Referer (sic) header</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_handle_referer</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># Don&#39;t handle Refresh redirections</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_handle_refresh</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># Don&#39;t handle cookies</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_cookiejar</span><span class="p">()</span>
<span class="c1"># Supply your own mechanize.CookieJar (NOTE: cookie handling is ON by</span>
<span class="c1"># default: no need to do this unless you have some reason to use a</span>
<span class="c1"># particular cookiejar)</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_cookiejar</span><span class="p">(</span><span class="n">cj</span><span class="p">)</span>
<span class="c1"># Tell the browser to send the Accept-Encoding: gzip header to the server</span>
<span class="c1"># to indicate it supports gzip Content-Encoding</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_request_gzip</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Do not verify SSL certificates</span>
<span class="kn">import</span> <span class="nn">ssl</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_ca_data</span><span class="p">(</span><span class="n">context</span><span class="o">=</span><span class="n">ssl</span><span class="o">.</span><span class="n">_create_unverified_context</span><span class="p">(</span><span class="n">cert_reqs</span><span class="o">=</span><span class="n">ssl</span><span class="o">.</span><span class="n">CERT_NONE</span><span class="p">))</span>
<span class="c1"># Log information about HTTP redirects and Refreshes.</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_debug_redirects</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Log HTTP response bodies (i.e. the HTML, most of the time).</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_debug_responses</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Print HTTP headers.</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_debug_http</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>

<span class="c1"># To make sure you&#39;re seeing all debug output:</span>
<span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">&quot;mechanize&quot;</span><span class="p">)</span>
<span class="n">logger</span><span class="o">.</span><span class="n">addHandler</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">StreamHandler</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">))</span>
<span class="n">logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>

<span class="c1"># Sometimes it&#39;s useful to process bad headers or bad HTML:</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">br</span><span class="o">.</span><span class="n">response</span><span class="p">()</span>  <span class="c1"># this is a copy of response</span>
<span class="n">headers</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">info</span><span class="p">()</span>  <span class="c1"># this is a HTTPMessage</span>
<span class="n">headers</span><span class="p">[</span><span class="s2">&quot;Content-type&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;text/html; charset=utf-8&quot;</span>
<span class="n">response</span><span class="o">.</span><span class="n">set_data</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">get_data</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&lt;!---&quot;</span><span class="p">,</span> <span class="s2">&quot;&lt;!--&quot;</span><span class="p">))</span>
<span class="n">br</span><span class="o">.</span><span class="n">set_response</span><span class="p">(</span><span class="n">response</span><span class="p">)</span>
</pre></div>
</div>
<p>mechanize exports the complete interface of <cite>urllib2</cite>:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">mechanize</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">mechanize</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="s2">&quot;http://www.example.com/&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
</pre></div>
</div>
<p>When using mechanize, anything you would normally import from <cite>urllib2</cite> should
be imported from mechanize instead.</p>
</section>
</section>
<section id="indices-and-tables">
<h1>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Permalink to this headline">¶</a></h1>
<ul class="simple">
<li><p><a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a></p></li>
<li><p><a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a></p></li>
<li><p><a class="reference internal" href="search.html"><span class="std std-ref">Search Page</span></a></p></li>
</ul>
</section>


          </div>
          
        </div>
      </div>
      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
        <div class="sphinxsidebarwrapper">
<h1 class="logo"><a href="#">mechanize</a></h1>








<h3>Navigation</h3>
<p class="caption" role="heading"><span class="caption-text">Table of Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="faq.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="browser_api.html">Browser API</a></li>
<li class="toctree-l1"><a class="reference internal" href="forms_api.html">HTML Forms API</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced.html">Advanced topics</a></li>
</ul>

<div class="relations">
<h3>Related Topics</h3>
<ul>
  <li><a href="#">Documentation overview</a><ul>
      <li>Next: <a href="faq.html" title="next chapter">Frequently Asked Questions</a></li>
  </ul></li>
</ul>
</div>
<div id="searchbox" style="display: none" role="search">
  <h3 id="searchlabel">Quick search</h3>
    <div class="searchformwrapper">
    <form class="search" action="search.html" method="get">
      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
      <input type="submit" value="Go" />
    </form>
    </div>
</div>
<script>$('#searchbox').show(0);</script>








        </div>
      </div>
      <div class="clearer"></div>
    </div>
    <div class="footer">
      &copy;2021, Kovid Goyal.
      
      |
      Powered by <a href="http://sphinx-doc.org/">Sphinx 4.3.2</a>
      &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.12</a>
      
      |
      <a href="_sources/index.rst.txt"
          rel="nofollow">Page source</a>
    </div>

    

    
  </body>
</html>

Zerion Mini Shell 1.0