我有以下代码将文档插入到 MongoDB 中,问题是它非常慢,因为我无法对其进行多处理器处理,并且考虑到我必须检查插入的每个文档是否已经存在,我相信不可能使用批量-插入。我想知道是否有更快的方法来解决这个问题。在进行下面的分析后,我发现check record()和update_upstream()是两个非常耗时的函数。因此优化它们会提高整体速度。任何有关如何优化以下内容的意见都将受到高度赞赏。谢谢你!
import os
import pymongo
from directory import Directory
from pymongo import ASCENDING
from pymongo import DESCENDING
from pymongo import MongoClient
from storage_config import StorageConfig
from tqdm import tqdm
dir = Directory()
def DB_collections(collection_type):
types = {'p': 'player_stats',
't': 'team_standings',
'f': 'fixture_stats',
'l': 'league_standings',
'pf': 'fixture_players_stats'}
return types.get(collection_type)
class DB():
def __init__(self, league, season, func=None):
self.db_user = os.environ.get('DB_user')
self.db_pass = os.environ.get('DB_pass')
self.MONGODB_URL = f'mongodb+srv://{self.db_user}:{self.db_pass}@cluster0-mbqxj.mongodb.net/<dbname>?retryWrites=true&w=majority'
self.league = league
self.season = str(season)
self.client = MongoClient(self.MONGODB_URL)
self.DATABASE = self.client[self.league + self.season]
self.pool = multiprocessing.cpu_count()
self.playerfile = f'{self.league}_{self.season}_playerstats.json'
self.teamfile = f'{self.league}_{self.season}_team_standings.json'
self.fixturefile = f'{self.league}_{self.season}_fixturestats.json'
self.leaguefile = f'{self.league}_{self.season}_league_standings.json'
self.player_fixture = f'{self.league}_{self.season}_player_fixture.json'
self.func = func
def execute(self):
if self.func is not None:
return self.func(self)
def import_json(file):
"""Imports a json file in read mode
Args:
file(str): Name of file
"""
return dir.load_json(file , StorageConfig.DB_DIR)
慕勒3428872
相关分类