import requests from bs4 import BeautifulSoup from captcha import GTK_captcha as Captcha from urlparse import urljoin CAPTCHA_LIMIT = 5 class Captcha_Limit_Exhausted(Exception): pass def captcha_is_required(soup): return bool(soup.form) def bypass_captcha(form, s=requests.Session()): r2 = s.get(form.iframe['src']) soup2=BeautifulSoup(r2.text) captcha_url = urljoin(r2.url,soup2.img['src']) captcha_r = s.get(captcha_url) captcha_text = Captcha.from_request(captcha_r).join() return {'recaptcha_challenge_field':soup2.form.find('input',attrs={'name':'recaptcha_challenge_field'})['value'], 'recaptcha_response_field':captcha_text} def request_get(url, s=requests.Session()): limit = 0 r = s.get(url) while limit<CAPTCHA_LIMIT: soup=BeautifulSoup(r.text) if not captcha_is_required(soup): return r bypass = bypass_captcha(soup.form,s) data = {input['name']:input['value'] for input in soup.form.findAll('input')} data = dict(data.items() + bypass.items()) form_action = urljoin(r.url, soup.form['action']) r = s.post(form_action,data=data) limit+=1 raise Captcha_Limit_Exhausted
~~DISCUSSION~~