import requests
from bs4 import BeautifulSoup
from captcha import GTK_captcha as Captcha
from urlparse import urljoin
 
CAPTCHA_LIMIT = 5
 
class Captcha_Limit_Exhausted(Exception):
    pass
 
def captcha_is_required(soup):
    return bool(soup.form)
 
def bypass_captcha(form, s=requests.Session()):
    r2 = s.get(form.iframe['src'])
    soup2=BeautifulSoup(r2.text)
    captcha_url = urljoin(r2.url,soup2.img['src'])
    captcha_r = s.get(captcha_url)
    captcha_text = Captcha.from_request(captcha_r).join()
    return {'recaptcha_challenge_field':soup2.form.find('input',attrs={'name':'recaptcha_challenge_field'})['value'],
            'recaptcha_response_field':captcha_text}
def request_get(url, s=requests.Session()):
    limit = 0
    r = s.get(url)
    while limit<CAPTCHA_LIMIT:    
        soup=BeautifulSoup(r.text)
        if not captcha_is_required(soup):
            return r
        bypass = bypass_captcha(soup.form,s)
        data = {input['name']:input['value'] for input in soup.form.findAll('input')}
        data = dict(data.items() + bypass.items())
        form_action = urljoin(r.url, soup.form['action'])
        r = s.post(form_action,data=data)
        limit+=1
    raise Captcha_Limit_Exhausted

~~DISCUSSION~~

it/bypass_recaptcha.txt · ostatnio zmienione: 2013/08/04 09:20 przez naczelnik
Public Domain
www.chimeric.de Valid CSS Driven by DokuWiki do yourself a favour and use a real browser - get firefox!! Recent changes RSS feed Valid XHTML 1.0