今天有个老项目出现个问题,这个项目以前网元之间的交互是使用redis里面的list,但是测试发现当list里面的数据量很大(10W+)的时候,python项目处理list里面的数据的时候效率非常低下,对此,经过分析,发现list里面的数据有很多都是重复的,所以就临时写了个脚本,将List里面的重复数据进行删除,同时不影响list里面的数据的顺序,代码参考如下:
#!/usr/bin/env python # -*- coding: utf-8 -*- import logging import redis, traceback from tornado.log import LogFormatter as _LogFormatter REDIS_DICT = { "host": "192.168.10.102", "port": 6679, "db": 0, "password": "123456" } class LogFormatter(_LogFormatter, object): """Init tornado.log.LogFormatter from logging.config.fileConfig""" def __init__(self, **kwargs): if kwargs.get('fmt') is None: kwargs['fmt'] = '%(color)s[%(levelname)s %(asctime)s %(filename)s:%(lineno)d]%(end_color)s %(message)s' if kwargs.get('datefmt') is None: kwargs['datefmt'] = '%Y-%m-%d %H:%M:%S' super(LogFormatter, self).__init__(**kwargs) runtime = 0 logger = logging.getLogger() channel = logging.StreamHandler() channel.setFormatter(LogFormatter()) logger.addHandler(channel) logger = logging.getLogger("cobbler") class RedisUtils(object): def __new__(cls, connection_kwargs=None): if not hasattr(cls, '_instance') and connection_kwargs is not None: org = super(RedisUtils, cls) cls._instance = org.__new__(cls) cls.pool = redis.ConnectionPool(**connection_kwargs) return cls._instance def __init__(self, kwargs=None): try: self.conn = redis.StrictRedis(connection_pool=self.pool) for func_name in dir(self.conn): if not func_name.startswith("_") and func_name[0].islower(): func = getattr(self.conn, func_name) setattr(self, func_name, func) except: logger.error(traceback.format_exc()) def cobblerEx(): rds = RedisUtils(REDIS_DICT) # 获取list中所有数据 prophecy_task_list = rds.lrange("prophecy_task", 0, -1) print "prophecy_task中的总量为", len(prophecy_task_list) print "开始将重复的数据剔除,然后保存在tempzset中" count = 1 for task in prophecy_task_list: index = rds.zrank("tempzset", task) if index >= 0: print "已经存在" else: count = count + 1 rds.zadd("tempzset", count, task) continue tempzset = rds.zrange("tempzset", 0, -1) print "tempzset数据保存完成,数据量大小为", len(tempzset) print "开始直接删除prophecy_task中数据" # 删除prophecy_task中数据 rds.delete("prophecy_task") print "开始将tempzset中数据添加到prophecy_task中" for temp in tempzset: rds.rpush("prophecy_task", temp) rds.delete("tempzset") print "处理完成!" if __name__ == '__main__': cobblerEx()