gpt4 book ai didi

python - 如何将 C++ 函数周围的 R 包装器转换为 Python/Numpy

转载 作者:行者123 更新时间:2023-12-03 17:40:10 28 4
gpt4 key购买 nike

R包Ckmeans.1d.dp依赖 C++ code完成其 99% 的工作。

我想在 Python 中使用这个功能而不必依赖 RPy2。因此,我想将 R 包装器“转换”为一个类似的 Python 包装器,它在 Numpy 数组上运行,就像 R 代码在 R vector 上运行一样。这可能吗?看起来应该是这样,因为 C++ 代码本身看起来(在我未经训练的眼睛看来)就像它自己站起来一样。

但是,Cython 的文档并没有真正涵盖这个用例,即用 Python 包装现有的 C++。简单提一下 herehere ,但由于我以前从未使用过 C++,所以我很困惑。

这是我的尝试,失败了,出现了一系列“Cannot assign type 'double' to 'double *' 错误:

目录结构

.
├── Ckmeans.1d.dp # clone of https://github.com/cran/Ckmeans.1d.dp
├── ckmeans
│   ├── __init__.py
│   └── _ckmeans.pyx
├── setup.py
└── src
   └── Ckmeans.1d.dp_pymain.cpp

src/Ckmeans.1d.dp_pymain.cpp
#include "../Ckmeans.1d.dp/src/Ckmeans.1d.dp.h"
static void Ckmeans_1d_dp(double *x, int* length, double *y, int * ylength,
int* minK, int *maxK, int* cluster,
double* centers, double* withinss, int* size)
{
// Call C++ version one-dimensional clustering algorithm*/
if(*ylength != *length) { y = 0; }

kmeans_1d_dp(x, (size_t)*length, y, (size_t)(*minK), (size_t)(*maxK),
cluster, centers, withinss, size);

// Change the cluster numbering from 0-based to 1-based
for(size_t i=0; i< *length; ++i) {
cluster[i] ++;
}
}

ckmeans/ 初始化 .py
from ._ckmeans import ckmeans

ckmeans/_ckmeans.pyx
cimport numpy as np
import numpy as np
from .ckmeans import ClusterResult

cdef extern from "../src/Ckmeans.1d.dp_pymain.cpp":
void Ckmeans_1d_dp(double *x, int* length,
double *y, int * ylength,
int* minK, int *maxK,
int* cluster, double* centers, double* withinss, int* size)

def ckmeans(np.ndarray[np.double_t, ndim=1] x, int* min_k, int* max_k):
cdef int n_x = len(x)
cdef double y = np.repeat(1, N)
cdef int n_y = len(y)
cdef double cluster
cdef double centers
cdef double within_ss
cdef int sizes
Ckmeans_1d_dp(x, n_x, y, n_y, min_k, max_k, cluster, centers, within_ss, sizes)
return (np.array(cluster), np.array(centers), np.array(within_ss), np.array(sizes))

最佳答案

cdef extern部分是正确的。问题(正如 Mihai Todor 在 2016 年的评论中指出的)是我没有将指针传递给 Ckmeans_1d_dp功能。
Cython 使用相同的“地址”&用于获取指针的 C 语法,例如&x是指向 x 的指针.
为了获得指向 Numpy 数组的指针,您应该获取数组第一个元素的地址,如 &x[0]对于阵列 x .确保数组在内存中是连续的(顺序元素具有顺序地址)很重要,因为这就是数组在 C 和 C++ 中的布局方式;遍历一个数组相当于增加一个指针。ckmeans()的工作定义在 _ckmeans.pyx看起来像这样:

def ckmeans(
np.ndarray[np.float64_t] x,
int min_k,
int max_k,
np.ndarray[np.float64_t] weights
):
# Ensure input arrays are contiguous; if the input data is not
# already contiguous and in C order, this might make a copy!
x = np.ascontiguousarray(x, dtype=np.dtype('d'))
y = np.ascontiguousarray(weights, dtype=np.dtype('d'))

cdef int n_x = len(x)
cdef int n_weights = len(weights)

# Ouput: cluster membership for each element
cdef np.ndarray[int, ndim=1] clustering = np.ascontiguousarray(np.empty((n_x,), dtype=ctypes.c_int))

# Outputs: results for each cluster
# Pre-allocate these for max k, then truncate later
cdef np.ndarray[np.double_t, ndim=1] centers = np.ascontiguousarray(np.empty((max_k,), dtype=np.dtype('d')))
cdef np.ndarray[np.double_t, ndim=1] within_ss = np.ascontiguousarray(np.zeros((max_k,), dtype=np.dtype('d')))
cdef np.ndarray[int, ndim=1] sizes = np.ascontiguousarray(np.zeros((max_k,), dtype=ctypes.c_int))

# Outputs: overall clustering stats
cdef double total_ss = 0
cdef double between_ss = 0

# Call the 'cdef extern' function
_ckmeans.Ckmeans_1d_dp(
&x[0],
&n_x,
&weights[0],
&n_weights,
&min_k,
&max_k,
&clustering[0],
&centers[0],
&within_ss[0],
&sizes[0],
)

# Calculate overall clustering stats
if n_x == n_weights and y.sum() != 0:
total_ss = np.sum(y * (x - np.sum(x * weights) / weights.sum()) ** 2)
else:
total_ss = np.sum((x - x.sum() / n_x) ** 2)
between_ss = total_ss - within_ss.sum()

# Extract final the number of clusters from the results.
# We initialized sizes as a vector of 0's, and cluster size can never be
# zero, so we know that any 0 size element is an empty/unused cluster.
cdef int k = np.sum(sizes > 0)

# Truncate output arrays to remove unused clusters
centers = centers[:k]
within_ss = within_ss[:k]
sizes = sizes[:k]

# Change the clustering back to 0-indexed, because
# the R wrapper changes it to 1-indexed.
return (
clustering - 1,
k,
centers,
sizes,
within_ss,
total_ss,
between_ss
)
请注意,这个特定的 R 包现在有一个 Python 包装器: https://github.com/djdt/ckwrap .

关于python - 如何将 C++ 函数周围的 R 包装器转换为 Python/Numpy,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37978519/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com