gushiGPT / data /gushi /getdata.py
xRunda
首次提交
5dcfa45
raw
history blame contribute delete
531 Bytes
# _*_ coding:utf-8 _*_
import glob
import json
datas=glob.glob("chinese-poetry/全唐诗/poet.*json")
for data in datas:
with open(data,"r",encoding='utf-8') as fp:
tangshi=json.load(fp)
for each_shi in tangshi:
if len(each_shi["paragraphs"])==2 and len(each_shi["paragraphs"][0])==16 and len(each_shi["paragraphs"][1])==16:
with open('data/gushi/gushi.txt',"a",encoding='utf-8') as f:
f.write("".join(each_shi["paragraphs"]))
f.write("\n")