Python从Conll文件中提取地名信息
# 读取文件
with open('train.conll', 'r', encoding='utf-8') as f:
lines = f.readlines()
# 提取词
words = []
for line in lines:
if line == '
':
# 处理输出
ci = ''.join(words)
with open('ci.txt', 'a', encoding='utf-8') as f:
f.write(ci + '
')
# 清空词列表
words = []
else:
word, tag = line.strip().split()
if tag in ['B-prov', 'B-district', 'B-town', 'B-community', 'B-village_group', 'B-road', 'B-subpoi']:
words.append(word)
elif tag in ['I-prov', 'I-district', 'I-town', 'I-community', 'I-village_group', 'I-road', 'I-subpoi']:
words[-1] += word
else:
words.append(word)
# 提取省市区镇村
loc = []
for line in open('ci.txt', 'r', encoding='utf-8'):
line = line.strip()
if line:
loc.append(line)
else:
break
# 输出到ci.txt
with open('ci.txt', 'a', encoding='utf-8') as f:
f.write('
')
for l in loc:
f.write(l + '
')
f.write('
')
原文地址: https://www.cveoy.top/t/topic/f2J5 著作权归作者所有。请勿转载和采集!