抖音推广运营/太原seo哪家好
在本篇博客中,我们将使用selenium模拟登录新浪微博,破解其登陆时的图片验证码。微博和简书、b站登录有所不同,后两个每次登录都会有验证码,微博有时有有时可能没有,所以写法稍有不同。
我们需要使用验证码识别服务平台。超级鹰,来帮助我们破解图片验证码。下面的chaojiying.py文件用于调用超级鹰服务,帮助我们破解点触或图片验证码,直接照搬就好了:
import requests
from hashlib import md5#使用超级鹰来进行图片验证码和点触验证码的识别
class Chaojiying(object):def __init__(self, username, password, soft_id):self.username = usernameself.password = md5(password.encode('utf-8')).hexdigest()self.soft_id = soft_idself.base_params = {'user': self.username,'pass2': self.password,'softid': self.soft_id,}self.headers = {'Connection': 'Keep-Alive','User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',}def post_pic(self, im, codetype):"""im: 图片字节codetype: 题目类型 参考 http://www.chaojiying.com/price.html"""params = {'codetype': codetype,}params.update(self.base_params)files = {'userfile': ('ccc.jpg', im)}r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files,headers=self.headers)return r.json()def report_error(self, im_id):"""im_id:报错题目的图片ID"""params = {'id': im_id,}params.update(self.base_params)r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)return r.json()
程序主体框架:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException,NoSuchElementException
from WeiBoLogin.chaojiying import Chaojiying
import requests
from requests import RequestException
#微博用户名和密码
USERNAME = ''
PASSWORD = ''#超级鹰用户名、密码、软件ID(需要注册)
CHAOJIYING_USERNAME = ''
CHAOJIYING_PASSWORD = ''
CHAOJIYING_SOFT_ID =
CHAOJIYING_KIND = 1902 #验证码类型 4-6位英文数字组合 破解图片验证码 具体可见官网#破解图片验证码 登陆微博
class WeiboLogin():def __init__(self):self.url = 'https://www.weibo.com'self.browser = webdriver.Chrome()self.wait = WebDriverWait(self.browser, 20)self.username = USERNAMEself.passward = PASSWORDself.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)def __del__(self):self.browser.close()def get_click_image(self,name='captcha_weibo.png'):passdef login_successfully(self):passdef get_click_button(self):passdef open(self):passdef login(self):# 打开登陆界面 输入用户名和密码self.open()#找到并点击登陆按钮button = self.get_click_button()button.click()if self.login_successfully(): # 有时可能不需要验证码print("登陆成功")else:# 获取图片验证码 图片image = self.get_click_image()# 识别图片验证码result = self.chaojiying.post_pic(image, CHAOJIYING_KIND)print(result)# 判断是否成功# 找到验证码输入框verifycode = self.wait.until(EC.presence_of_element_located((By.NAME, 'verifycode')))# 输入验证码verifycode.send_keys(result['pic_str'])# 找到并点击登陆按钮button = self.get_click_button()button.click()if self.login_successfully():print('登录成功')else:self.chaojiying.report_error(result['pic_id'])self.login()if __name__ == '__main__':login = WeiboLogin()login.login()
打开登录界面,输入用户名和密码:
def open(self):"""打开登陆界面 输入用户名和密码:return:"""self.browser.get(self.url)# 找到用户名和密码输入框'''<input id="loginname" type="text" class="W_input" maxlength="128" autocomplete="off" action-data="text=邮箱/会员帐号/手机号" action-type="text_copy" name="username" node-type="username" tabindex="1">'''username = self.wait.until(EC.presence_of_element_located((By.ID,'loginname')))'''<input type="password" class="W_input" maxlength="24" autocomplete="off" value="" action-type="text_copy" name="password" node-type="password" tabindex="2">'''password = self.wait.until(EC.presence_of_element_located((By.NAME,'password')))# 输入用户名和密码username.send_keys(self.username)password.send_keys(self.passward)
找到登录按钮:
def get_click_button(self):"""找到登陆按钮:return:"""'''<a href="javascript:void(0)" class="W_btn_a btn_32px" action-type="btn_submit" node-type="submitBtn" suda-data="key=tblog_weibologin3&value=click_sign" tabindex="6"><span node-type="submitStates">登录</span></a>'''button = self.wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.W_btn_a')))return button
判断登录是否成功,如果成功会有消息提示:
def login_successfully(self):"""判断登陆是否成功:return:"""'''<em class="W_ficon ficon_mail S_ficon">I</em>'''try:return bool(WebDriverWait(self.browser,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,'.ficon_mail'))))except TimeoutException:return False
如果登录不成功,则获取图片验证码:
def get_click_image(self,name='captcha_weibo.png'):''':param name::return:''''''<img width="95" height="34" action-type="btn_change_verifycode" node-type="verifycode_image" src="https://login.sina.com.cn/cgi/pin.php?r=88815771&s=0&p=gz-66c0488ef9191010d88bea8c9f3a09fdf3bf">'''try:element = self.wait.until(EC.presence_of_element_located((By.XPATH,'//img[@node-type="verifycode_image"]')))verifycode_image = element.get_attribute('src') # 得到图片链接verifycode_image = get_html(verifycode_image).content #获取图片内容with open(name, 'wb') as f:f.write(verifycode_image)return verifycode_imageexcept NoSuchElementException:print("")return None
完整项目