DMM 作品 CID 查询:区分高清标清月额和非月额作品
python/nimport requests/nfrom bs4 import BeautifulSoup/nimport re/n/nheaders = {/n 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'/n}/n/ndef get_monthly_product_info(product_id):/n url = 'https://www.dmm.co.jp/monthly/premium/-/detail/=/cid={}/'.format(product_id)/n response = requests.get(url, headers=headers)/n soup = BeautifulSoup(response.text, 'html.parser')/n/n title_tag = soup.find('h1', {'class': 'ttl-sectDetail ttl-product'})/n title = title_tag.text.strip() if title_tag else None/n/n quality_select = soup.find('select', {'id': 'download_bitrate'})/n selected_option = quality_select.find('option', {'selected': True})/n quality = selected_option.text.split('(')[-1].replace(')', '').strip() if selected_option else None/n quality_value = selected_option['value'] if selected_option else None/n/n size_spans = soup.select('#download_filesize > span')/n sizes = {}/n for span in size_spans:/n size_text = span.text.strip()/n if '容量(目安)' in size_text:/n size_value = int(size_text.split(':')[-1].replace('MB', ''))/n sizes[size_value] = span/n max_size = max(sizes.keys())/n max_size_tag = sizes[max_size]/n/n return {/n 'title': title,/n 'quality': quality,/n 'quality_value': quality_value,/n 'size': max_size_tag.text.strip()/n }/n/ndef format_product_id(product_id):/n if '-' in product_id:/n product_id = product_id.replace('-', '00')/n return product_id/n/ncid_str = input('请输入要查询的cid, 多个CID用空格或逗号分隔:')/ncids = [format_product_id(cid.strip()) for cid in cid_str.replace(',', ' ').split()]/n/nhigh_definition_cids = []/nstandard_definition_cids = []/n/nfor cid in cids:/n # 月额作品/n search_url = 'https://www.dmm.co.jp/monthly/-/list/search/=/?searchstr={}'.format(cid)/n response = requests.get(search_url, headers=headers)/n soup = BeautifulSoup(response.text, 'html.parser')/n/n link_tags = soup.find_all('a', href=re.compile(r'https://www/.dmm/.co/.jp/monthly/premium/-/detail/=/cid=/w+'))/n/n if link_tags:/n # 月额作品/n link = link_tags[0]['href'].split('?')[0]/n product_id_match = re.search(r'cid=([/w/d]+)', link)/n product_id = product_id_match.group(1)/n product_info = get_monthly_product_info(product_id)/n print(f'{product_id} {product_info['title']}')/n print(f'分辨率:{product_info['quality']}, 码率:{product_info['quality_value']}, {product_info['size']}/n') /n if product_info['quality_value'] and int(product_info['quality_value']) > 3000:/n high_definition_cids.append(product_id)/n else:/n standard_definition_cids.append(product_id)/n else:/n # 非月额作品/n search_url = 'https://www.dmm.co.jp/search/=/searchstr={}'.format(cid)/n response = requests.get(search_url, headers=headers)/n soup = BeautifulSoup(response.text, 'html.parser')/n/n link_tags = soup.find_all('a', href=re.compile(r'https://www/.dmm/.co/.jp/digital/videoa/-/detail/=/cid=/w+'))/n/n if link_tags:/n # 非月额作品/n link = link_tags[0]['href'].split('?')[0]/n product_id_match = re.search(r'cid=([/w/d]+)', link)/n product_id = product_id_match.group(1)/n print(f'非月额:{product_id}')/n standard_definition_cids.append(product_id) # 将非月额作品加入标清列表/n else:/n # 没有找到作品, 则跳过该cid的查询/n print(f'未找到cid:{cid}')/n continue/n /nprint()/nhigh_definition_count = len(high_definition_cids)/nstandard_definition_count = len(standard_definition_cids)/n/nif high_definition_count > 0:/n print(f'高清月额({high_definition_count}):{'|'.join(high_definition_cids)}')/nif standard_definition_count > 0:/n print(f'标清月额({standard_definition_count}):{'|'.join(standard_definition_cids)}')/n
原文地址: https://www.cveoy.top/t/topic/nH1C 著作权归作者所有。请勿转载和采集!