温温酱
您可以为MD5()接受NumPy数组的OpenSSL函数编写包装器。我们的基准将是纯Python实现。使用cffi创建包装器:import cffiffi = cffi.FFI()header = r"""void md5_array(uint64_t* buffer, int len, unsigned char* out);"""source = r"""#include <stdint.h>#include <openssl/md5.h>void md5_array(uint64_t * buffer, int len, unsigned char * out) { int i = 0; for(i=0; i<len; i++) { MD5((const unsigned char *) &buffer[i], 8, out + i*16); }}"""ffi.set_source("_md5", source, libraries=['ssl'])ffi.cdef(header)if __name__ == "__main__": ffi.compile()和import numpy as npimport _md5def md5_array(data): out = np.zeros(data.shape, dtype='|S16') _md5.lib.md5_array( _md5.ffi.from_buffer(data), data.size, _md5.ffi.cast("unsigned char *", _md5.ffi.from_buffer(out)) ) return out并比较两个:import numpy as npimport hashlibdata = np.arange(16, dtype=np.uint64)out = [hashlib.md5(i).digest() for i in data]print(data)# [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]print(out)# [b'}\xea6+?\xac\x8e\x00\x95jIR\xa3\xd4\xf4t', ... , b'w)\r\xf2^\x84\x11w\xbb\xa1\x94\xc1\x8c8XS']out = md5_array(data)print(out)# [b'}\xea6+?\xac\x8e\x00\x95jIR\xa3\xd4\xf4t', ... , b'w)\r\xf2^\x84\x11w\xbb\xa1\x94\xc1\x8c8XS']对于大型阵列,速度要快15倍左右(老实说,我对此感到有些失望...)data = np.arange(100000, dtype=np.uint64)%timeit [hashlib.md5(i).digest() for i in data]169 ms ± 3.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)%timeit md5_array(data)12.1 ms ± 144 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)