from pyparsing import OneOrMore
def get_as_decimal(s, lok, tokens):
int_part, _sep, dec_part = tokens
return Decimal('{}.{}'.format(int_part, dec_part))
def get_as_date(s, loc, tokens):
a, b, c = tokens
if len(a) == 4:
return datetime.date(int(a), int(b), int(c))
else:
return datetime.date(int(c), int(b), int(a))
def get_as_bool(s, loc, tokens):
return tokens[0] in ('1', 'S', 'Y', 'T', 'True')
sep = Suppress(';')
quote = Suppress('"')
content = Regex("[^\";]+")
text = quote + content + quote ^ content
text.setParseAction(lambda tokens: tokens[0].strip())
boolean = oneOf('1 S Y T True 0 N F False')
boolean.setParseAction(get_as_bool)
dash = Suppress('-')
slash = Suppress('/')
year = Regex('\d{4}')
month = Regex('\d{1,2}')
day = Regex('\d{1,2}')
date = year + dash + month + dash + day ^ day + slash + month + slash + year
date.setParseAction(get_as_date)
cost = Word(nums) + oneOf('. ,') + Regex('\d\d')
cost.setParseAction(get_as_decimal)
line = Group(text + sep + boolean + sep + date + sep + cost)
lines = OneOrMore(line)
lines.setParseAction(lambda tokens: list(tokens))
header = Suppress(Regex('.+'))
parser = header + lines + StringEnd()
Añadir un comentario