import requests
from bs4 import BeautifulSoup
import sqlite3

# 爬取网页
url = 'http://49.235.120.214/sunzhiwei/index.html'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763'
}
codes = requests.get(url, headers=headers).text

# 用BeautifulSoup解析网页并获取所有p标签的内容
soup = BeautifulSoup(codes, 'html.parser')

# 获取网页中所有的p标签的内容
p_tags_content = soup.find_all('p')

# 创建一个名为selfintroduction的表
conn = sqlite3.connect('my_database.db')
cursor = conn.cursor()
sql = f'CREATE TABLE IF NOT EXISTS selfintroduction (id INTEGER PRIMARY KEY, content TEXT)'
cursor.execute(sql)
conn.commit()

# 将爬取到的所有p标签的内容插入到selfintroduction表中
for index, tag in enumerate(p_tags_content, start=1):
    content = tag.get_text()  # 获取每个p标签的文本内容
    sql = f'INSERT INTO selfintroduction VALUES ({index}, '{content}')'  # 构造插入SQL语句
    cursor.execute(sql)
conn.commit()  # 提交事务
print(f'插入了{len(p_tags_content)}条记录')

# 重新连接到数据库
conn = sqlite3.connect('my_database.db')
cursor = conn.cursor()

# 查询selfintroduction表中的所有数据
cursor.execute('SELECT * FROM selfintroduction')
rows = cursor.fetchall()

# 打印所有行
for row in rows:
    print(row)

运行结果内容:

插入了3条记录
(1, 'Hello, my name is Sun Zhiwei. I am a software engineer.')
(2, 'I have experience in web development and data analysis.')
(3, 'I am proficient in Python, SQL, and various web technologies.')
Python爬虫:使用BeautifulSoup和SQLite抓取网页内容并存储

原文地址: http://www.cveoy.top/t/topic/pdl5 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录