慕斯王
使用OP中的代码创建数据框的方法不正确,即方法awsc为空。如果没有看到文件,就不可能知道是否pd.read_json(file, lines=True)是正确的使用方法。pd.json_normalize(awsc['userIdentity'])将在 的专栏上工作dicts。不过该列很可能是字符串。如果dicts是str类型,则使用ast.literal_eval将它们转换为dict类型。import pandas as pdfrom ast import literal_eval# crate a list to add dataframes toawsc_list = list()# iterate through the list of and append them to awsc_listfor file in file_list: awsc_list.append(pd.read_json(file, lines=True)) # concat the files into a single dataframeawsc = pd.concat(awsc_list).reset_index(drop=True)# convert the userIdentity column to dict type, if it contains str typeawsc.userIdentity = awsc.userIdentity.apply(literal_eval)# normalize userIdentitynormalized = pd.json_normalize(awsc['userIdentity'], sep='_')# join awsc with normalized and drop the userIdentity columnawsc = awsc.join(normalized).drop('userIdentity', 1)# rename the columnsawsc.rename(columns={'type':'userIdentity_type', 'principalId':'userIdentity_principalId', 'arn':'userIdentity_arn', 'accountId':'userIdentity_accountId', 'accessKeyId':'userIdentity_accessKeyId', 'userName':'userIdentity_userName',}, inplace=True)带有示例数据的新代码密钥已经具有正确的名称,因此无需重命名使用.json_normalize读取日志,标准化'userIdentity',因此不需要第二步。另请参阅将 Pandas 列中的字典/列表拆分为单独的列import jsonimport pandas as pd# crate a list to add dataframes toawsc_list = list()# list of filesfiles_list = ['test.json', 'test2.json']# read the filessfor file in files_list: with open(file, 'r', encoding='utf-8') as f: data = json.loads(f.read()) # normalize the file and append it to the list of dataframe awsc_list.append(pd.json_normalize(data, 'Records', sep='_')) # concat the files into a single dataframeawsc = pd.concat(awsc_list).reset_index(drop=True)# display(awsc) eventVersion eventTime eventSource eventName awsRegion sourceIPAddress userAgent userIdentity_type userIdentity_principalId userIdentity_arn userIdentity_accessKeyId userIdentity_accountId userIdentity_userName requestParameters_instancesSet_items responseElements_instancesSet_items requestParameters_force userIdentity_sessionContext_attributes_mfaAuthenticated userIdentity_sessionContext_attributes_creationDate requestParameters_keyName responseElements_keyName responseElements_keyFingerprint responseElements_keyMaterial0 1.0 2014-03-06T21:22:54Z ec2.amazonaws.com StartInstances us-east-2 205.251.233.176 ec2-api-tools 1.6.12.2 IAMUser EX_PRINCIPAL_ID arn:aws:iam::123456789012:user/Alice EXAMPLE_KEY_ID 123456789012 Alice [{'instanceId': 'i-ebeaf9e2'}] [{'instanceId': 'i-ebeaf9e2', 'currentState': {'code': 0, 'name': 'pending'}, 'previousState': {'code': 80, 'name': 'stopped'}}] NaN NaN NaN NaN NaN NaN NaN1 1.0 2014-03-06T21:01:59Z ec2.amazonaws.com StopInstances us-east-2 205.251.233.176 ec2-api-tools 1.6.12.2 IAMUser EX_PRINCIPAL_ID arn:aws:iam::123456789012:user/Alice EXAMPLE_KEY_ID 123456789012 Alice [{'instanceId': 'i-ebeaf9e2'}] [{'instanceId': 'i-ebeaf9e2', 'currentState': {'code': 64, 'name': 'stopping'}, 'previousState': {'code': 16, 'name': 'running'}}] False NaN NaN NaN NaN NaN NaN2 1.0 2014-03-06T17:10:34Z ec2.amazonaws.com CreateKeyPair us-east-2 72.21.198.64 EC2ConsoleBackend, aws-sdk-java/Linux/x.xx.fleetxen Java_HotSpot(TM)_64-Bit_Server_VM/xx IAMUser EX_PRINCIPAL_ID arn:aws:iam::123456789012:user/Alice EXAMPLE_KEY_ID 123456789012 Alice NaN NaN NaN false 2014-03-06T15:15:06Z mykeypair mykeypair 30:1d:46:d0:5b:ad:7e:1b:b6:70:62:8b:ff:38:b5:e9:ab:5d:b8:21 <sensitiveDataRemoved>3 1.0 2014-03-06T21:22:54Z ec2.amazonaws.com StartInstances us-east-2 205.251.233.176 ec2-api-tools 1.6.12.2 IAMUser EX_PRINCIPAL_ID arn:aws:iam::123456789012:user/Alice EXAMPLE_KEY_ID 123456789012 Alice [{'instanceId': 'i-ebeaf9e2'}] [{'instanceId': 'i-ebeaf9e2', 'currentState': {'code': 0, 'name': 'pending'}, 'previousState': {'code': 80, 'name': 'stopped'}}] NaN NaN NaN NaN NaN NaN NaN样本数据来自 AWS 的示例数据:CloudTrail 日志文件示例test.jsonJSON 列表[{ "Records": [{ "eventVersion": "1.0", "userIdentity": { "type": "IAMUser", "principalId": "EX_PRINCIPAL_ID", "arn": "arn:aws:iam::123456789012:user/Alice", "accessKeyId": "EXAMPLE_KEY_ID", "accountId": "123456789012", "userName": "Alice" }, "eventTime": "2014-03-06T21:22:54Z", "eventSource": "ec2.amazonaws.com", "eventName": "StartInstances", "awsRegion": "us-east-2", "sourceIPAddress": "205.251.233.176", "userAgent": "ec2-api-tools 1.6.12.2", "requestParameters": { "instancesSet": { "items": [{ "instanceId": "i-ebeaf9e2" } ] } }, "responseElements": { "instancesSet": { "items": [{ "instanceId": "i-ebeaf9e2", "currentState": { "code": 0, "name": "pending" }, "previousState": { "code": 80, "name": "stopped" } } ] } } } ] }, { "Records": [{ "eventVersion": "1.0", "userIdentity": { "type": "IAMUser", "principalId": "EX_PRINCIPAL_ID", "arn": "arn:aws:iam::123456789012:user/Alice", "accountId": "123456789012", "accessKeyId": "EXAMPLE_KEY_ID", "userName": "Alice" }, "eventTime": "2014-03-06T21:01:59Z", "eventSource": "ec2.amazonaws.com", "eventName": "StopInstances", "awsRegion": "us-east-2", "sourceIPAddress": "205.251.233.176", "userAgent": "ec2-api-tools 1.6.12.2", "requestParameters": { "instancesSet": { "items": [{ "instanceId": "i-ebeaf9e2" } ] }, "force": false }, "responseElements": { "instancesSet": { "items": [{ "instanceId": "i-ebeaf9e2", "currentState": { "code": 64, "name": "stopping" }, "previousState": { "code": 16, "name": "running" } } ] } } } ] }, { "Records": [{ "eventVersion": "1.0", "userIdentity": { "type": "IAMUser", "principalId": "EX_PRINCIPAL_ID", "arn": "arn:aws:iam::123456789012:user/Alice", "accountId": "123456789012", "accessKeyId": "EXAMPLE_KEY_ID", "userName": "Alice", "sessionContext": { "attributes": { "mfaAuthenticated": "false", "creationDate": "2014-03-06T15:15:06Z" } } }, "eventTime": "2014-03-06T17:10:34Z", "eventSource": "ec2.amazonaws.com", "eventName": "CreateKeyPair", "awsRegion": "us-east-2", "sourceIPAddress": "72.21.198.64", "userAgent": "EC2ConsoleBackend, aws-sdk-java/Linux/x.xx.fleetxen Java_HotSpot(TM)_64-Bit_Server_VM/xx", "requestParameters": { "keyName": "mykeypair" }, "responseElements": { "keyName": "mykeypair", "keyFingerprint": "30:1d:46:d0:5b:ad:7e:1b:b6:70:62:8b:ff:38:b5:e9:ab:5d:b8:21", "keyMaterial": "\u003csensitiveDataRemoved\u003e" } } ] }]test2.json一个 JSON{ "Records": [{ "eventVersion": "1.0", "userIdentity": { "type": "IAMUser", "principalId": "EX_PRINCIPAL_ID", "arn": "arn:aws:iam::123456789012:user/Alice", "accessKeyId": "EXAMPLE_KEY_ID", "accountId": "123456789012", "userName": "Alice" }, "eventTime": "2014-03-06T21:22:54Z", "eventSource": "ec2.amazonaws.com", "eventName": "StartInstances", "awsRegion": "us-east-2", "sourceIPAddress": "205.251.233.176", "userAgent": "ec2-api-tools 1.6.12.2", "requestParameters": { "instancesSet": { "items": [{ "instanceId": "i-ebeaf9e2" } ] } }, "responseElements": { "instancesSet": { "items": [{ "instanceId": "i-ebeaf9e2", "currentState": { "code": 0, "name": "pending" }, "previousState": { "code": 80, "name": "stopped" } } ] } } } ]}