How to Bypass Google Recaptcha V2 Using Python and HTTPX

Feri Lukmansyah
3 min readJan 25, 2023

--

Photo by Niranjan _ Photographs on Unsplash

How to Bypass Google Recaptcha V2 Using Python and HTTPX, When I try to scrape some websites that use google ReCaptcha v2 to search the product.

The Problem

We won’t get the data before we pass the captcha, the alternative way is we can use playwright and selenium but it’s too heavy to run with our application server, so I try as much as possible not to use the browser in a GUI using drivers and such, but I use httpx as an alternative requests module that I normally use.

The Solution

one of the safest solutions is to use a third party to solve this captcha problem, I use the services of anti-captcha to solve this captcha bypass problem here is the module I use

by combining these two modules we can solve this captcha problem let's see the code execution below

Execute the solution

First, we can install the module first using a pip

pip install install anticaptchaofficial httpx

the next step is to create an account at anticapcha.com then copy the API key from the web then enter the dashboard menu, copy the API key there

anti captcha dashboard

here is an example code from the official documentation

# code example
from anticaptchaofficial.recaptchav2proxyless import *

solver = recaptchaV2Proxyless()
solver.set_verbose(1)
solver.set_key("YOUR_API_KEY")
solver.set_website_url("https://website.com")
solver.set_website_key("SITE_KEY")

# Specify softId to earn 10% commission with your app.
# Get your softId here: https://anti-captcha.com/clients/tools/devcenter
solver.set_soft_id(0)

g_response = solver.solve_and_return_solution()
if g_response != 0:
print "g-response: "+g_response
else:
print "task finished with error "+solver.error_code

after reading the above code let’s combine it with httpx

from httpx import Client
from selectolax.parser import HTMLParser
from typing import Optional, Any
from anticaptchaofficial.recaptchav2proxyless import recaptchaV2Proxyless

def get_payload(self, keyword: str) -> dict[str, Any]:
# generating new payload for get new form data automaticaly
response = self.client.get(url=self.url, params=self.params, headers=self.headers)
print(f"Excecute URL {response.url} with status code: {response.status_code}")

# create teamporary area
try:
os.mkdir(join(bc.BASE_DIR, 'temp'))
except FileExistsError:
pass

with open(join(join(bc.BASE_DIR, 'temp'), "res.html"), "w") as file:
file.write(response.text)
file.close()

# save sample cookies


soup = HTMLParser(response.text)
container = soup.css_first("form#SLSearchForm")
district_id = container.css_first("#DistrictGUID").attrs['value']
district_code = container.css_first("input[name=DISTRICTCODE]").attrs['value']
data_status = container.css_first("#DataStatus").attrs['value']
initial_search = container.css_first("input[name=InitialSearch]").attrs['value']
search_type = container.css_first("#SearchType").attrs['value']
sitekey = container.css_first("#recaptcha-div").attrs["data-sitekey"]

# set capcha solver here
print("Solver Setup")
print("Using Anti capcha API KEY: {}".format(self.api_key))
print("website Sitekey: {}".format(sitekey))
self.solver.set_website_url(str(response.url))
self.solver.set_verbose(1)
self.solver.set_key(self.api_key)
self.solver.set_website_key(sitekey)

# return solution
g_recaptcha_response = self.solver.solve_and_return_solution()

# make sure gcapcha is Return a Solution
print("Recaptcha Key: {}".format(g_recaptcha_response))

# append all value
data_dict: dict[str, Any] = {
"DistrictGUID": district_id,
"DISTRICTCODE": district_code,
"DataStatus": data_status,
"InitialSearch": initial_search,
"SearchType": search_type,
"txtAddress_name": keyword,
"g-recaptcha-response": g_recaptcha_response
}

print("Generating new payload")
return data_dict


def get_data(self, keyword: str):

# generate from temporary file

payload: dict[str, Any] = self.get_payload(keyword=keyword)

# using token
token: str = self.get_token()
params: dict[str, Any] = {
"token": token,
"event": "action.PublicSchoolLocatorResults",
"DistrictCode": payload['DISTRICTCODE'],


}

headers: dict[str, Any] = {
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
# "Accept-Language": "en-US,en;q=0.9",
# "Accept-Encoding": "gzip, deflate, br",
# "Cache-Control": "max-age=0",
# "Connection": "keep-alive",
# "Content-Length": "5964",
# "Content-Type": "application/x-www-form-urlencoded",
# "Host": "mybaragar.com",
# "Origin": "https://mybaragar.com",
# "Referer": "https://mybaragar.com/index.cfm?event=page.SchoolLocatorPublic&DistrictCode=BC45",
# "sec-ch-ua": '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
# 'sec-ch-ua-mobile': "?0",
# "sec-ch-ua-platform": "Sec-Fetch-Dest",
# "Sec-Fetch-Mode": "navigate",
# "Sec-Fetch-Site": "same-origin",
# "Sec-Fetch-User": '?1',
# "Upgrade-Insecure-Requests": '1',
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"

}

response = self.client.post(url=self.url, params=params, headers=headers, data=payload)
return response

# response: 200

Conclusion

the conclusion is that we can bypass the captcha without using automation tools such as selenium and playwright, like using httpx and anti captcha, hope this is useful and good luck

--

--