Python 优惠券爬虫代码优化：提高稳定性和错误处理

本文提供一个 Python 优惠券爬虫代码示例，并针对其稳定性和错误处理机制进行优化。

原始代码:

import requests
from http.cookiejar import CookieJar
from bs4 import BeautifulSoup
import urllib.parse

LOGIN_URL = 'http://example.com/login'
COUPON_URL = 'http://example.com/coupons'
COUPON_TARGET_URL = 'http://example.com/coupons/123'


class CouponSpider:

    def __init__(self):
        self.session = requests.Session()
        self.session.cookies = CookieJar()

    def login(self, username, password):
        data = {'username': username, 'password': password}
        self.session.post(LOGIN_URL, data=data)
        print('Login success')

        # 查看Cookie信息
        for cookie in self.session.cookies:
            print(cookie.name + ': ' + cookie.value)

    def grab_coupon(self):
        response = self.session.get(COUPON_URL)
        html = response.content.decode('UTF-8')

        soup = BeautifulSoup(html, 'html.parser')
        target_element = soup.find('a', href=COUPON_TARGET_URL)
        target_url = urllib.parse.urljoin(COUPON_URL, target_element['href'])

        response = self.session.get(target_url)
        print(response.content.decode('UTF-8'))
        print('Grab coupon success')


if __name__ == '__main__':
    spider = CouponSpider()
    spider.login('username', 'password')
    spider.grab_coupon()

优化后的代码:

import requests
from http.cookiejar import CookieJar
from bs4 import BeautifulSoup
import urllib.parse

LOGIN_URL = 'http://example.com/login'
COUPON_URL = 'http://example.com/coupons'
COUPON_TARGET_URL = 'http://example.com/coupons/123'


class CouponSpider:

    def __init__(self):
        self.session = requests.Session()
        self.session.cookies = CookieJar()

    def login(self, username, password):
        data = {'username': username, 'password': password}
        response = self.session.post(LOGIN_URL, data=data)
        if response.status_code == 200:
            print('Login success')
        else:
            print('Login failed')

        # 查看Cookie信息
        for cookie in self.session.cookies:
            print(cookie.name + ': ' + cookie.value)

    def grab_coupon(self):
        response = self.session.get(COUPON_URL)
        if response.status_code == 200:
            html = response.content.decode('UTF-8')
            soup = BeautifulSoup(html, 'html.parser')
            target_element = soup.find('a', href=COUPON_TARGET_URL)
            if target_element:
                target_url = urllib.parse.urljoin(COUPON_URL, target_element['href'])
                response = self.session.get(target_url)
                if response.status_code == 200:
                    print(response.content.decode('UTF-8'))
                    print('Grab coupon success')
                else:
                    print('Failed to grab coupon')
            else:
                print('Coupon not found')
        else:
            print('Failed to access coupon page')


if __name__ == '__main__':
    spider = CouponSpider()
    spider.login('username', 'password')
    spider.grab_coupon()

修改说明:

在login方法中，根据响应状态码判断登录是否成功。
在grab_coupon方法中，根据响应状态码判断访问页面和抢购优惠券是否成功。
对可能出现的None对象进行判断，避免程序因为找不到目标元素而出错。
修改了部分输出信息，更加直观。

通过这些优化，代码更加稳定可靠，能够更好地处理各种错误情况，提高爬虫的成功率。