Appearance
模拟登录
- 使用 cookie 模拟登录
直接使用 cookies
- 进入目标网页,登录成功后,获取登录后的 cookie
- 使用 cookie 发送请求
python
import scrapy
class HubSpider(scrapy.Spider):
name = "hub"
allowed_domains = ["github.com"]
start_urls = []
async def start(self):
# 获取登录后登录页面
cookie_str = """cookie str"""
# 组装cookies
cookies = {data.split("=")[0]: data.split("=")[-1] for data in cookie_str.split("; ")}
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/114.0.0.0 Safari/537.36",
}
# 发送请求
yield scrapy.Request(url = "https://github.com/xxx", cookies = cookies, headers = headers)
def parse(self, response):
# 检查登录,并解析数据
print(response.body)模拟登录
- 分析登录页面,获取登录请求的参数
- 发送登录请求,获取登录后的 cookie
- 使用 cookie 发送请求,进入目标页面
python
import scrapy
class LoginSpider(scrapy.Spider):
name = "login"
allowed_domains = ["github.com"]
# 登录页面
start_urls = ["https://github.com/login"]
def parse(self, response):
# 获取登录请求的参数
commit = response.xpath('//input[@name="commit"]/@value').get()
authenticity_token = response.xpath('//input[@name="authenticity_token"]/@value').get()
# 登录请求的参数
data = {
"commit": commit,
"authenticity_token": authenticity_token,
"add_account": "",
"login": "xxx", # 自己的账号
"password": "xxx", # 自己的密码
"webauthn-conditional": "undefined",
"javascript-support": "true",
"webauthn-support": "supported",
"webauthn-iuvpaa-support": "supported",
"return_to": "https://github.com/",
}
print("请求参数", data)
yield scrapy.FormRequest(url = "https://github.com/session", formdata = data, callback = self.check_login)
def check_login(self, response):
# 验证是否登录成功
home = response.css("#dashboard>h2::text").get()
if home == "Home":
print("登录成功")
else:
print("登录失败")