# 读取文件
with open('train.conll', 'r', encoding='utf-8') as f:
    lines = f.readlines()

# 提取词
words = []
for line in lines:
    if line == '
':
        # 处理输出
        ci = ''.join(words)
        with open('ci.txt', 'a', encoding='utf-8') as f:
            f.write(ci + '
')
        # 清空词列表
        words = []
    else:
        word, tag = line.strip().split()
        if tag in ['B-prov', 'B-district', 'B-town', 'B-community', 'B-village_group', 'B-road', 'B-subpoi']:
            words.append(word)
        elif tag in ['I-prov', 'I-district', 'I-town', 'I-community', 'I-village_group', 'I-road', 'I-subpoi']:
            words[-1] += word
        else:
            words.append(word)

# 提取省市区镇村
loc = []
for line in open('ci.txt', 'r', encoding='utf-8'):
    line = line.strip()
    if line:
        loc.append(line)
    else:
        break

# 输出到ci.txt
with open('ci.txt', 'a', encoding='utf-8') as f:
    f.write('
')
    for l in loc:
        f.write(l + '
')
        f.write('
')
Python从Conll文件中提取地名信息

原文地址: https://www.cveoy.top/t/topic/f2J5 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录