import os
import re
from collections import Counter
from collections import OrderedDict
fileNames = []
textInfo = []
d = {}
currentDirectoryPath = os.getcwd()
print(currentDirectoryPath)
regexp = re.compile(
r'(?P<clientIP>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).+\['
+ '(?P<timestamp>\d{2}/[A-Z][a-z]{2}/\d\d\d\d).+\"'
+ '(?P<action>[A-Z]{3,4}).+\"'
+ '\s*(?P<statuscode>[1-5][0-9][0-9])'
)
os.chdir("/content/drive/log")
currentDirectoryPath = os.getcwd()
listOfFileNames = os.listdir(currentDirectoryPath)
#for files in listOfFileNames :
#print(files)
f = open('access_1.log', 'r')
matched = 0
failed = 0
cnt_clientIPs = Counter()
cnt_clientAction = Counter()
cnt_clientTimeStamp = Counter()
cnt_clientStatusCode = Counter()
for line in f:
m = re.match(regexp, line)
if m:
cnt_clientIPs.update([m.group('clientIP')])
cnt_clientAction.update([m.group('action')])
cnt_clientStatusCode.update([m.group('statuscode')])
matched += 1
else:
failed += 1
continue
print("""""\
client .........: %s
timestamp ......: %s
action .........: %s
statuscode.........: %s
""" % ( m.group('clientIP'),
m.group('timestamp'),
m.group('action'),
m.group('statuscode'),
))
for line in f:
m = re.match(regexp, line)
if m:
d = {m.group("clientIP"): m.group("statuscode")}
print(d)
userInputIP = input("Enter how many of the top clients you want to see. ")
print('[*] %d lines matched the regular expression' % (matched))
print('[*] %d lines failed to match the regular expression' % (failed), end='\n\n')
print('[*] ============================================')
print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
print('[*] ============================================')
for clientIP, count in cnt_clientIPs.most_common(int(userInputIP)):
print('[*] %30s: %d' % (clientIP, count))
print('[*] ============================================')
上面的这些行是一些测试行,可以帮助你们并显示我在文本文件中处理的内容。
翻翻过去那场雪
相关分类