gpt4 book ai didi

Python 下一行如果 <! - 正则表达式不匹配

转载 作者:行者123 更新时间:2023-12-01 04:07:26 25 4
gpt4 key购买 nike

我正在编写一个脚本来解析 csv 文件。

如果文件开始,我希望能够移动到下一个文本行并继续解析。

问题出在我的正则表达式上,我无法匹配。

if re.match(r'^.\<\!', line):
line.next()

示例文本

<!-- Copyright Notice: © 2010 Racing NSW (and other parties working with it). NSW racing information,including fields, form and results, is subject to copyright which is owned by Racing NSW and other parties working with it. -->

Meeting,17/02/16,CANT,Canterbury Park,Weights,TAB,+6m Entire Circuit, ,
Race,1,BENCHMARK 77 HANDICAP,BM77,BM77,1550,BM77 ,3U ,~ ,HCP ,54,0,0,17/02/2016,, , , , ,BenchMark 77, Handicap, For Three-Years-Old and Upwards, No sex restriction,Of $40000. First $23025, second $7925, third $3960, fourth $1885, fifth $955, sixth $450, seventh $450, eighth $450, ninth $450, tenth $450
Horse,1,Balboa Park (NZ),0,"Gai Waterhouse",Randwick,,0,54.5,3-1-1-0 $30000.00,,0,0,0,,65.00,G,
Horse,2,Baylie Louise,0,"Matthew Dale",Canberra,,0,55,16-6-2-4 $112545.00,,0,0,0,,69.00,M,
Horse,3,Beretta,0,"Kris Lees",Broadmeadow,,0,55.5,8-2-1-1 $38305.00,,0,0,0,,66.00,G,
Horse,4,Elle Lou,0,"Chris Waller",Rosehill,,0,57.5,14-2-4-0 $141625.00,,0,0,0,,74.00,M,
Horse,5,Got Unders,0,"Ken Lantry",Broadmeadow,,0,60,33-4-9-9 $140735.00,,0,0,0,,75.00,G,
Horse,6,Lord de Air,0,"Bede Murray",Kembla Grange,,0,57,16-4-2-3 $89050.00,,0,0,0,,69.00,G,
Horse,7,Lucky Liaison,0,"Kristen Buchanan",Wyong,,0,61,49-8-6-8 $257865.00,,0,0,0,,77.00,G,
Horse,8,Makeadane,0,"John P Thompson",Randwick,,0,55,15-2-2-2 $65002.00,,0,0,0,,65.00,G,
Horse,9,Miss Denni (NZ),0,"Chris Waller",Rosehill,,0,57.5,12-2-5-1 $102075.00,,0,0,0,,74.00,M,
Horse,10,Multifacets (NZ),0,"Chris Waller",Rosehill,,0,54,6-1-0-0 $19845.00,,0,0,0,,62.00,C,
Horse,11,Mydream,0,"Melissa Harrison",Kembla Grange,,0,56.5,34-8-2-3 $142520.00,,0,0,0,,72.00,M,
Horse,12,Never Back Down,0,"Jim & Greg Lee",Randwick,,0,58,33-4-3-8 $151090.00,,0,0,0,,71.00,G,
Horse,13,Orcym Sam,0,"Gwenda Markwell",Kembla Grange,,0,59,6-3-2-0 $44350.00,,0,0,0,,73.00,G,
Horse,14,Recife Beach,0,"Kim Waugh",Wyong,,0,57,21-3-5-2 $77175.00,,0,0,0,,69.00,G,
Horse,15,Soros,0,"Joseph Pride",Warwick Farm,,0,60,36-6-2-4 $249975.00,,0,0,0,,75.00,G,
Horse,16,Spiritos,0,"Chris Waller",Rosehill,,0,55.5,8-2-0-1 $45585.00,,0,0,0,,67.00,G,
Horse,17,Ultima Chance,0,"Scott Collings",Goulburn,,0,55,39-9-6-3 $104437.00,,0,0,0,,65.00,G,
Race,2,BENCHMARK 72 HANDICAP,BM72,BM72,1250,BM72 ,3U ,~ ,HCP ,55.5,0,0,17/02/2016,,

这是完整的文件

import csv
import re
from sys import argv
SCRIPT, FILENAME = argv


def out_file_name(file_name):
"""take an input file and keep the name with appended _clean"""
file_parts = file_name.split(".",)
output_file = file_parts[0] + '_clean.' + file_parts[1]
return output_file


def race_table(text_file):
"""utility to reorganise poorly made csv entry"""
output_table = []
for line in enumerate(text_file):
if re.match(r'^.\<\!', line):
line.next()
for record in text_file:
if record[0] == 'Meeting':
meeting = record[3]
rail = record[6]
weather = record[7]
track = record[8]
elif record[0] == 'Race':
date = record[13]
race = record[1]
benchmark = record[4]
distance = record[5]
elif record[0] == 'Horse':
number = record[1]
name = record[2]
jockey = record[6]
barrier = record[7]
weight = record[8]
results = record[9]
res_split = re.split('[- ]', results)
starts = res_split[0]
wins = res_split[1]
seconds = res_split[2]
thirds = res_split[3]
try:
prizemoney = res_split[4]
except IndexError:
prizemoney = 0
trainer = record[4]
location = record[5]
b_rating = record[15]
sex = record[16]
print(name, wins, seconds)
output_table.append((meeting, date, rail, weather, track, distance,
benchmark, race, number, name, sex, b_rating,
weight, barrier, starts, wins, seconds,
thirds, prizemoney, trainer, location, jockey
))
return output_table

MY_FILE = out_file_name(FILENAME)

with open(FILENAME, 'r') as f_in, open(MY_FILE, 'w') as f_out:
CONTENT = csv.reader(f_in)
# print(content)
FILE_CONTENTS = race_table(CONTENT)
# print new_name
# f_out.write(str(FILE_CONTENTS))
headers = ['MEETING', 'DATE', 'RAIL', 'WEATHER', 'TRACK', 'DISTANCE',
'BENCHMARK', 'RACE', 'NUMBER', 'NAME', 'SEX', 'B_RATING',
'WEIGHT', 'BARRIER', 'STARTS', 'WINS', 'SECONDS', 'THIRDS',
'PRIZEMONEY', 'TRAINER', 'LOCATION', 'JOCKEY']

f_csv = csv.writer(f_out)
f_csv.writerow(headers)
f_csv.writerows(FILE_CONTENTS)


if __name__ == '__main__':
pass

最佳答案

删除表达式开头的点:

>>> s = "<!-- Copyright Notice: © 2010 Racing NSW (and other parties working with it). NSW racing information,including fields, form and results, is subject to copyright which is owned by Racing NSW and other parties working with it. -->"
>>>
>>> re.match(r'^.\<\!', s)
>>> re.match(r'^\<\!', s)
<_sre.SRE_Match object at 0x10da7fed0>

或者,您可以在初始化 csv.reader 时提前过滤注释:

with open(FILENAME, 'r') as f_in, open(MY_FILE, 'w') as f_out:
CONTENT = csv.reader(row for row in f_in if not row.startswith('<!--'))

关于Python 下一行如果 &lt;! - 正则表达式不匹配,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/35388434/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com