2 回答
TA贡献1773条经验 获得超3个赞
import objgraph我已经使用和打印跟踪了内存使用情况objgraph.show_most_common_types(limit=20)。我注意到元组和列表的数量在子进程的持续时间内不断增加。为了解决这个问题,我更改了maxtasksperchild在Pool一段时间后强制关闭进程并因此释放内存。
from functools import partial
import itertools
import multiprocessing
import random
import time
# Tracing memory leaks
import objgraph
def svm(input_data, params):
# Copy the data to avoid changing the original data
# as input_data is a reference to a pandas dataframe.
dataset = input_data.copy()
# Use svm here to analyse data
score = sum(dataset) + sum(params) # simulate score of svm
# Simulate a process that takes a bit of time
time.sleep(0.5)
return (score, params)
if __name__ == "__main__":
# iterable settings
total_combinations = 2
total_features = 12
# Keep track of best score
best_score = -1000
best_param = [0 for _ in range(total_features)]
# Simulate a dataframe with random data
input_data = [random.random() for _ in range(100000)]
# Create a partial function with the necessary args
func = partial(svm, input_data)
params = itertools.product(range(total_combinations), repeat=total_features)
# Without this, multiprocessing gives error
multiprocessing.freeze_support()
# Set the number of worker processes
# Empty for all the cores
# Int for number of processes
with multiprocessing.Pool(maxtasksperchild=5) as pool:
# Calculate scores concurrently
# As the iterable is in the order of millions, this value
# will get continuously large until it uses all available
# memory as the map stores the results, that in this case
# it's not needed.
for score, param in pool.imap_unordered(func, iterable=params, chunksize=10):
if score > best_score:
best_score = score
best_param = param
# print(best_score)
# Count the number of objects in the memory
# If the number of objects keep increasing, it's a memory leak
print(objgraph.show_most_common_types(limit=20))
# Wait for all the processes to terminate their tasks
pool.close()
pool.join()
print(best_score)
print(best_param)
添加回答
举报