multiprocessing.map 的替代方案，不会存储函数的返回值

import objgraph我已经使用和打印跟踪了内存使用情况objgraph.show_most_common_types(limit=20)。我注意到元组和列表的数量在子进程的持续时间内不断增加。为了解决这个问题，我更改了maxtasksperchild在Pool一段时间后强制关闭进程并因此释放内存。from functools import partialimport itertoolsimport multiprocessingimport randomimport time# Tracing memory leaksimport objgraphdef svm(input_data, params):    # Copy the data to avoid changing the original data    # as input_data is a reference to a pandas dataframe.    dataset = input_data.copy()    # Use svm here to analyse data    score = sum(dataset) + sum(params)  # simulate score of svm    # Simulate a process that takes a bit of time    time.sleep(0.5)    return (score, params)if __name__ == "__main__":    # iterable settings    total_combinations = 2    total_features = 12    # Keep track of best score    best_score = -1000    best_param = [0 for _ in range(total_features)]    # Simulate a dataframe with random data    input_data = [random.random() for _ in range(100000)]    # Create a partial function with the necessary args    func = partial(svm, input_data)    params = itertools.product(range(total_combinations), repeat=total_features)    # Without this, multiprocessing gives error    multiprocessing.freeze_support()    # Set the number of worker processes    # Empty for all the cores    # Int for number of processes    with multiprocessing.Pool(maxtasksperchild=5) as pool:        # Calculate scores concurrently        # As the iterable is in the order of millions, this value        # will get continuously large until it uses all available        # memory as the map stores the results, that in this case        # it's not needed.        for score, param in pool.imap_unordered(func, iterable=params, chunksize=10):            if score > best_score:                best_score = score                best_param = param                # print(best_score)            # Count the number of objects in the memory            # If the number of objects keep increasing, it's a memory leak            print(objgraph.show_most_common_types(limit=20))    # Wait for all the processes to terminate their tasks    pool.close()    pool.join()    print(best_score)    print(best_param)

multiprocessing.map 的替代方案，不会存储函数的返回值

2回答