import jiebadef parse_addressaddress # 分词 words = jiebacutaddress # 初始化地址要素 province = city = district = street = road = houseno = cellno = floorno = devzon
import jieba
def parse_address(address): # 分词 words = jieba.cut(address) # 初始化地址要素 province = '' city = '' district = '' street = '' road = '' houseno = '' cellno = '' floorno = '' devzone = '' community = '' intersection = '' subpoi= '' assist = '' poi=''
# 初始化标签
tag_list = []
# 遍历分词结果
for word in words:
# 判断是否为省份
if word.endswith('省') or word.endswith('自治区')or word in['河北', '山西', '辽宁', '吉林', '黑龙江', '江苏', '浙江', '安徽', '福建', '江西', '山东', '河南', '湖北', '湖南', '广东', '广西', '海南', '四川', '贵州', '云南', '西藏', '陕西', '甘肃', '青海', '宁夏', '新疆', '台湾']:
for char in word:
if char == word[0]:
tag_list.append('B-prov')
elif char == word[-1]:
tag_list.append('E-prov')
else:
tag_list.append('I-prov')
province = word
# 判断是否为直辖市
elif word in ['北京', '上海'
原文地址: https://www.cveoy.top/t/topic/hsZr 著作权归作者所有。请勿转载和采集!