我的代码合并 csv 文件并使用 pandas 删除重复项。是否可以向单个合并文件添加带有值的附加标头?
应调用附加标头Host Alias并应对应于Host Name
例如Host Name是dpc01n1和对应的Host Alias应该dev_dom1 Host Name是dpc02n1和对应的Host Alias应该是dev_dom2 等。
这是我的代码
from glob import glob
import pandas as pd
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
input_path = r'C:\Users\urale\Desktop\logs'
output_path = r'C:\Users\urale\Desktop\logs' + '\\'
output_name = 'output.csv'
stock_files = sorted(glob(input_path + '\pc_dblatmonstat_*_*.log'))
print(bcolors.OKBLUE + 'Getting .log files from', input_path)
final_headers = [
'Start Time',
'epoch',
'Host Name',
'Db Alias',
'Database',
'Db Host',
'Db Host IP',
'IP Port',
'Latency (us)'
]
#read in files via list comprehension
content = [pd.read_csv(f,usecols = final_headers, sep='[;]',engine='python')
for f in stock_files]
print(bcolors.OKBLUE + 'Reading files')
#combine files into one dataframe
combo = pd.concat(content,ignore_index = True)
print(bcolors.OKBLUE + 'Combining files')
#drop duplicates
combo = combo.drop_duplicates()
#combo = combo.drop_duplicates(final_headers, keep=False)
print(bcolors.OKBLUE + 'Dropping duplicates')
#write to csv:
combo.to_csv(output_path + output_name, index = False)
print(bcolors.OKGREEN + 'Merged file output to', output_path, 'as', output_name)
交互式爱情
jeck猫
相关分类