import jiebadef parse_addressaddress # 分词 words = jiebacutaddress # 初始化地址要素 province = city = district = street = # 遍历分词结果 for word in words # 判断是否为省份 if w
import jieba
def parse_address(address): # 分词 words = jieba.cut(address) # 初始化地址要素 province = '' city = '' district = '' street = '' # 遍历分词结果 for word in words: # 判断是否为省份 if word.endswith('省') or word.endswith('自治区'): province = word # 判断是否为直辖市 elif word in ['北京', '上海', '天津', '重庆']: province = word + '市' city = word + '市' # 判断是否为城市 elif word.endswith('市'): city = word # 判断是否为区县 elif word.endswith('区') or word.endswith('县'): district = word # 判断是否为街道 elif word.endswith('街') or word.endswith('路') or word.endswith('巷') or word.endswith('道'): street = word # 返回地址要素 return province, city, district, street
读取文件中的地址信息
with open('1.txt', 'r', encoding='utf-8') as f: for line in f: # 获取地址信息 info = line.strip().split('')[1] # 解析地址要素 province, city, district, street = parse_address(info) # 输出地址要素 print('省份:', province) print('城市:', city) print('区县:', district) print('街道:', street) print('-----------------------------'
原文地址: https://www.cveoy.top/t/topic/fyYO 著作权归作者所有。请勿转载和采集!