读取train.conll文件

with open("train.conll", "r", encoding="utf-8") as f: data = f.readlines()

提取词

words = [] for line in data: line = line.strip() if line: word, tag = line.split() if tag.startswith("B-") or tag.startswith("I-"): words.append(word)

拼接词并输出

output = "".join(words) print(output)

提取地名并输出

places = [] current_place = "" for line in data: line = line.strip() if line: word, tag = line.split() if tag == "B-prov": current_place = word elif tag == "I-prov": current_place += word elif tag == "E-prov": current_place += word places.append(current_place) current_place = "" elif tag == "B-district": current_place = word elif tag == "I-district": current_place += word elif tag == "E-district": current_place += word places.append(current_place) current_place = "" elif tag == "B-town": current_place = word elif tag == "I-town": current_place += word elif tag == "E-town": current_place += word places.append(current_place) current_place = "" elif tag == "B-community": current_place = word elif tag == "I-community": current_place += word elif tag == "E-community": current_place += word places.append(current_place) current_place = "" elif tag == "B-village_group": current_place = word elif tag == "E-village_group": current_place += word places.append(current_place) current_place = "" elif tag == "B-road": current_place = word elif tag == "I-road": current_place += word elif tag == "E-road": current_place += word places.append(current_place) current_place = "" elif tag == "B-subpoi": current_place = word elif tag == "I-subpoi": current_place += word elif tag == "E-subpoi": current_place += word places.append(current_place) current_place = ""

输出地名

with open("ci.txt", "w", encoding="utf-8") as f: for place in places: f.write(place + "\n")


原文地址: https://www.cveoy.top/t/topic/f2J6 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录