Python代码实现基于标签的文本信息提取
读取train.conll文件
with open("train.conll", "r", encoding="utf-8") as f: data = f.readlines()
提取词
words = [] for line in data: line = line.strip() if line: word, tag = line.split() if tag.startswith("B-") or tag.startswith("I-"): words.append(word)
拼接词并输出
output = "".join(words) print(output)
提取地名并输出
places = [] current_place = "" for line in data: line = line.strip() if line: word, tag = line.split() if tag == "B-prov": current_place = word elif tag == "I-prov": current_place += word elif tag == "E-prov": current_place += word places.append(current_place) current_place = "" elif tag == "B-district": current_place = word elif tag == "I-district": current_place += word elif tag == "E-district": current_place += word places.append(current_place) current_place = "" elif tag == "B-town": current_place = word elif tag == "I-town": current_place += word elif tag == "E-town": current_place += word places.append(current_place) current_place = "" elif tag == "B-community": current_place = word elif tag == "I-community": current_place += word elif tag == "E-community": current_place += word places.append(current_place) current_place = "" elif tag == "B-village_group": current_place = word elif tag == "E-village_group": current_place += word places.append(current_place) current_place = "" elif tag == "B-road": current_place = word elif tag == "I-road": current_place += word elif tag == "E-road": current_place += word places.append(current_place) current_place = "" elif tag == "B-subpoi": current_place = word elif tag == "I-subpoi": current_place += word elif tag == "E-subpoi": current_place += word places.append(current_place) current_place = ""
输出地名
with open("ci.txt", "w", encoding="utf-8") as f: for place in places: f.write(place + "\n")
原文地址: https://www.cveoy.top/t/topic/f2J6 著作权归作者所有。请勿转载和采集!