猿问

返回多个 py::array 而不在 pybind11 中复制

我正在尝试使用 pybind11 在 C++ 中构建一个 python 模块。我有以下代码:


#include <pybind11/pybind11.h>

#include <pybind11/stl.h>

#include <pybind11/numpy.h>


namespace py = pybind11;


struct ContainerElement

{

    uint8_t i;

    double d;

    double d2;

};


class Container

{

private:

    std::vector<uint8_t> ints;

    std::vector<double> doubles;

    std::vector<double> doubles2;


public:


    std::vector<uint8_t>& getInts() { return ints; }

    std::vector<double>& getDoubles() { return doubles; }

    std::vector<double>& getDoubles2() { return doubles2; }


    void addElement(ContainerElement element)

    {

        ints.emplace_back(element.i);

        doubles.emplace_back(element.d);

        doubles2.emplace_back(element.d2);

    }

};


void fillContainer(Container& container)

{

    for (int i = 0; i < 1e6; ++i)

    {

        container.addElement({(uint8_t)i, (double)i,(double)i });

    }

}


PYBIND11_MODULE(containerInterface, m) {

    py::class_<Container>(m, "Container")

        .def(py::init<>())

        .def("getInts", [](Container& container)

        {

            return py::array_t<uint8_t>(

                    { container.getInts().size() },

                    { sizeof(uint8_t) },

                    container.getInts().data());

        })

        .def("getDoubles", [](Container& container)

        {

            return py::array_t<double>(

                    { container.getDoubles().size() },

                    { sizeof(double) },

                    container.getDoubles().data());

        })



这可行,但是当我检查程序的内存使用情况(使用psutil.Process(os.getpid()).memory_info().rss)时,当我调用函数getInts, getDoubles和getDoubles2. 有没有办法避免这种情况?


我尝试过使用np.array(container.getInts(), copy=False),但它仍然会复制。我还尝试使用py::buffer_protocol()此处提到的 Container 类:https ://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html 。但是,我只能对 Ints 向量或 Doubles 向量进行此操作,而不能同时对所有向量进行操作。


然后我可以使用i = np.array(container, copy=False),而无需制作副本。但是,正如我所说,它现在仅适用于Ints矢量。


茅侃侃
浏览 220回答 3
3回答

胡说叔叔

我找到了一个可行的解决方案。虽然它可能不是最优雅的。我创建了三个新类Ints,它们采用原始容器并通过函数调用公开各自的Doubles向量。使用这三个类,我可以为所有类指定三次缓冲区协议。Doubles2getValues()#include <pybind11/pybind11.h>#include <pybind11/stl.h>#include <pybind11/numpy.h>#include <pybind11/buffer_info.h>namespace py = pybind11;struct ContainerElement{&nbsp; &nbsp; uint8_t i;&nbsp; &nbsp; double d;&nbsp; &nbsp; double d2;};class Container{private:&nbsp; &nbsp; std::vector<uint8_t> ints;&nbsp; &nbsp; std::vector<double> doubles;&nbsp; &nbsp; std::vector<double> doubles2;public:&nbsp; &nbsp; std::vector<uint8_t>& getInts() { return ints; }&nbsp; &nbsp; std::vector<double>& getDoubles() { return doubles; }&nbsp; &nbsp; std::vector<double>& getDoubles2() { return doubles2; }&nbsp; &nbsp; void addElement(ContainerElement element)&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; ints.emplace_back(element.i);&nbsp; &nbsp; &nbsp; &nbsp; doubles.emplace_back(element.d);&nbsp; &nbsp; &nbsp; &nbsp; doubles2.emplace_back(element.d2);&nbsp; &nbsp; }};void fillContainer(Container& container){&nbsp; &nbsp; for (int i = 0; i < 1e6; ++i)&nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; container.addElement({ (uint8_t)i, (double)i,(double)i });&nbsp; &nbsp; }}class Ints{private:&nbsp; &nbsp; Container& cont;public:&nbsp; &nbsp; Ints(Container& cont) : cont(cont) {}&nbsp; &nbsp; std::vector<uint8_t>& getValues() { return cont.getInts(); }};class Doubles{private:&nbsp; &nbsp; Container& cont;public:&nbsp; &nbsp; Doubles(Container& cont) : cont(cont) {}&nbsp; &nbsp; std::vector<double>& getValues() { return cont.getDoubles(); }};class Doubles2{private:&nbsp; &nbsp; Container& cont;public:&nbsp; &nbsp; Doubles2(Container& cont) : cont(cont) {}&nbsp; &nbsp; std::vector<double>& getValues() { return cont.getDoubles2(); }};PYBIND11_MODULE(newInterface, m) {&nbsp; &nbsp; py::class_<Container>(m, "Container")&nbsp; &nbsp; &nbsp; &nbsp; .def(py::init<>());&nbsp; &nbsp; py::class_<Ints>(m, "Ints", py::buffer_protocol())&nbsp; &nbsp; &nbsp; &nbsp; .def(py::init<Container&>(), py::keep_alive<1, 2>())&nbsp; &nbsp; &nbsp; &nbsp; .def_buffer([](Ints& ints) -> py::buffer_info {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; return py::buffer_info(&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ints.getValues().data(),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; sizeof(uint8_t),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; py::format_descriptor<uint8_t>::format(),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ints.getValues().size()&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; );&nbsp; &nbsp; &nbsp; &nbsp; });&nbsp; &nbsp; py::class_<Doubles>(m, "Doubles", py::buffer_protocol())&nbsp; &nbsp; &nbsp; &nbsp; .def(py::init<Container&>(), py::keep_alive<1, 2>())&nbsp; &nbsp; &nbsp; &nbsp; .def_buffer([](Doubles& doubles) -> py::buffer_info {&nbsp; &nbsp; &nbsp; &nbsp; return py::buffer_info(&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; doubles.getValues().data(),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; sizeof(double),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; py::format_descriptor<double>::format(),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; doubles.getValues().size()&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; );&nbsp; &nbsp; &nbsp; &nbsp; });&nbsp; &nbsp; py::class_<Doubles2>(m, "Doubles2", py::buffer_protocol())&nbsp; &nbsp; &nbsp; &nbsp; .def(py::init<Container&>(), py::keep_alive<1, 2>())&nbsp; &nbsp; &nbsp; &nbsp; .def_buffer([](Doubles2& doubles2) -> py::buffer_info {&nbsp; &nbsp; &nbsp; &nbsp; return py::buffer_info(&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; doubles2.getValues().data(),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; sizeof(double),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; py::format_descriptor<double>::format(),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; doubles2.getValues().size()&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; );&nbsp; &nbsp; &nbsp; &nbsp; });&nbsp; &nbsp; m.def("fillContainer", &fillContainer);}这样我就可以在 Python 中按以下方式使用代码:import newInterface as ciimport numpy as npcontainer = ci.Container()ci.fillContainer(container)i = np.array(ci.Ints(container), copy=False)&nbsp; &nbsp;d = np.array(ci.Doubles(container), copy=False)&nbsp; &nbsp;&nbsp;d2 = np.array(ci.Doubles2(container), copy=False)一旦fillContainer填充了容器,numpy 数组的构造就不会从这个容器中复制值。

慕的地8271018

我猜您必须指定访问函数返回引用而不是副本,这可能是默认值。我不知道你是如何用 pybind 做到这一点的,但我已经用 boost::python 和Ponder做到了这一点。即您需要指定退货政策。

月关宝盒

这并不能直接解决问题,但仍然允许在不进行复制的情况下返回数组缓冲区。灵感来自这个线程: https ://github.com/pybind/pybind11/issues/1042基本上,只需向 py::array() 构造函数提供一个 py::capsule。这样,py::array() 构造函数就不会分配单独的缓冲区和副本。例如:// Use this if the C++ buffer should NOT be deallocated// once Python no longer has a reference to itpy::capsule buffer_handle([](){});// Use this if the C++ buffer SHOULD be deallocated// once the Python no longer has a reference to it// py::capsule buffer_handle(data_buffer, [](void* p){ free(p); });return py::array(py::buffer_info(&nbsp; &nbsp; &nbsp; &nbsp; data_buffer,&nbsp; &nbsp; &nbsp; &nbsp; element_size,&nbsp; &nbsp; &nbsp; &nbsp; data_type,&nbsp; &nbsp; &nbsp; &nbsp; dims_length,&nbsp; &nbsp; &nbsp; &nbsp; dims,&nbsp; &nbsp; &nbsp; &nbsp; strides), buffer_handle);
随时随地看视频慕课网APP

相关分类

Python
我要回答