来自 Python 文本文件的字典

3回答

慕桂英4014372

def process_file(filename):    myDict = {}    rootkey = None    f = open(filename, 'r')    for line in f:        if line[1:3].isdigit():           # if the second and third character from the checked string (line) is the ASCII Code in range 0x30..0x39 ("0".."9"), i.e.: str.isdigit()            subkey, data = line.rstrip().split(" ",1)     # split into two parts... the first one is the number with or without "E" at begin            myDict[rootkey][subkey] = data        else:            rootkey = line.rstrip()       # str.rstrip() is used to delete newlines (or another so called "empty spaces")            myDict[rootkey] = {}          # prepare a new empty rootkey into your myDict    f.close()    return myDict在 Python 控制台中测试：>>> d = process_file('/tmp/file.txt')>>>>>> d['Intestinal infectious diseases (001-003)']{'003': 'Salmonella', '002': 'Fever', '001': 'Cholera'}>>> d['Intestinal infectious diseases (001-003)']['002']'Fever'>>> d['Activity (E001-E002)']{'E001': 'Activities involving x and y', 'E002': 'Other activities'}>>> d['Activity (E001-E002)']['E001']'Activities involving x and y'>>>>>> d{'Activity (E001-E002)': {'E001': 'Activities involving x and y', 'E002': 'Other activities'}, 'External Cause Status (E000)': {'E000': 'External cause status'}, 'Intestinal infectious diseases (001-003)': {'003': 'Salmonella', '002': 'Fever', '001': 'Cholera'}, 'Zoonotic bacterial diseases (020-022)': {'021': 'Tularemia', '020': 'Plague', '022': 'Anthrax'}}警告：文件中的第一行必须是“rootkey”！不是“子密钥”或数据！否则原因可能是引发错误:-)注意：也许您应该删除第一个“E”字符。还是做不到？你需要把这个“E”字符留在某个地方吗？

陪伴而非守候

一种解决方案是使用正则表达式来帮助您表征和解析您可能在此文件中遇到的两种类型的行：import reheader_re = re.compile(r'([\w\s]+) \(([\w\s\-]+)\)')entry_re = re.compile(r'([EV]?\d{3}) (.+)')这使您可以非常轻松地检查遇到的线路类型，并根据需要将其分开：# Check if a line is a header:header = header_re.match(line)if header:    header_name, header_codes = header.groups()  # e.g. ('Intestinal infectious diseases', '001-009')    # Do whatever you need to do when you encounter a new group    # ...else:    entry = entry_re.match(line)    # If the line wasn't a header, it ought to be an entry,    # otherwise we've encountered something we didn't expect    assert entry is not None    entry_number, entry_name = entry.groups()  # e.g. ('001', 'Cholera')    # Do whatever you need to do when you encounter an entry in a group    # ...使用它来重新工作您的功能，我们可以编写以下内容：import redef process_file(filename):    header_re = re.compile(r'([\w\s]+) \(([\w\s\-]+)\)')    entry_re = re.compile(r'([EV]?\d{3}) (.+)')    all_groups = {}    current_group = None    with open(filename, 'r') as f:        for line in f:            # Check if a line is a header:            header = header_re.match(line)            if header:                current_group = {}                all_groups[header.group(0)] = current_group            else:                entry = entry_re.match(line)                # If the line wasn't a header, it ought to be an entry,                # otherwise we've encountered something we didn't expect                assert entry is not None                entry_number, entry_name = entry.groups()  # e.g. ('001', 'Cholera')                current_group[entry_number] = entry_name    return all_groups

守着一只汪

尝试使用正则表达式来确定它是标题还是疾病import remydict = {}with open(filename, "r") as f:    header = None    for line in f:        match_desease = re.match(r"(E?\d\d\d) (.*)", line)        if not match_desease:            header = line        else:            code = match_desease.group(1)            desease = match_desease.group(2)            mydict[header][code] = desease