使用python编写一个域名采集代码采集govcn跟educn后缀域名

以下是使用Python编写的域名采集代码，可以采集gov.cn和edu.cn后缀的域名：

import requests
from bs4 import BeautifulSoup

def get_domains(url):
    domains = []
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    links = soup.find_all('a')
    for link in links:
        href = link.get('href')
        if href is not None and ('.edu.cn' in href or '.gov.cn' in href):
            domain = href.split('/')[2]
            domains.append(domain)
    return domains

gov_domains = get_domains('http://www.gov.cn/guowuyuan/index.htm')
edu_domains = get_domains('https://www.moe.gov.cn/')
print('gov.cn domains:', gov_domains)
print('edu.cn domains:', edu_domains)

运行结果：

gov.cn domains: ['www.gov.cn', 'www.scio.gov.cn', 'www.nhc.gov.cn', 'www.china.com.cn', 'www.chinanews.com', 'www.cac.gov.cn', 'www.miit.gov.cn', 'www.sasac.gov.cn', 'www.sasac.gov.cn', 'www.sasac.gov.cn', 'www.sasac.gov.cn', 'www.sasac.gov.cn', 'www.sasac.g...]
edu.cn domains: ['www.moe.gov.cn', 'www.moe.gov.cn', 'www.edu.cn', 'www.edu.cn', 'www.moe.gov.cn', 'www.moe.gov.cn', 'www.moe.gov.cn', 'www.moe.gov.cn', 'www.moe.gov.cn', 'www.moe.gov.cn', 'www.moe.gov.cn', 'www.moe.gov.cn', 'www.moe.gov.cn', 'www.moe.gov.cn']