r - 基于节点权重构建图的优化算法-6ren

r - 基于节点权重构建图的优化算法

转载作者：塔克拉玛干更新时间：2023-11-03 02:38:16

我正在尝试改进一个功能，以根据从某些节点属性计算出的分数来构建网络。该函数试图从最大化节点属性乘积的图中找到最佳子网。

该函数从一个随机节点开始，并开始在第一个邻居中搜索，如果有一些邻居的节点分数满足阈值，则将邻居添加到第一个节点并继续该过程，直到不再添加(添加邻居不会产生所需的分数增量)。如果第一个邻居中没有节点产生分数的增量，则该函数将查找第二个邻居。在这种情况下，很可能有几条路径连接节点(2 度邻居)，在这种特定情况下，选择的路径将是最短且权重最高的路径(节点属性之一)。

我可以对代码进行一些并行处理，尽管我不知道如何在此类函数中实现它。

函数如下:

build_network <-
function (G, seed, d= 2){
    net <- G
    d <- d

    score.fun<-function(g){
    Za <- sum(V(g)$weight*V(g)$RWRNodeweight)/sqrt(sum(V(g)$RWRNodeweight^2))
    k <- vcount(g)
    tmp <- genesets.length.null.stat[[as.character(k)]] # genesets.length.null.stat is a list with the median of Za and sd of Za calculated for 1000 replicates of networks of size k
    Sa <- (Za-tmp[1])/tmp[2]
    }

    best.fun<-function(in.nodes,out.nodes)  {
    score<-(-Inf); best<-character()
    for(node in out.nodes){
     subG.update<-induced.subgraph(net, c(in.nodes,node))
     if( score.fun(subG.update) > score ){
        score<-score.fun(subG.update)
        best<-node
        }
    }
    list("node"=best,"score"=score)
    }   

    subG <- induced.subgraph(net, seed)
    if (!is.connected(subG)) {          #the seed must be connected
        stop("Input seeds are disjoint")
    }

    while (TRUE) {
        in.nodes <- V(subG)$name
        node_num <- vcount(subG)
        subsum <- score.fun(subG)
        #subx <- V(subG)$name
        for (rad in 1:d) {
            tmp.neigh <- unlist(neighborhood(net, order = rad, nodes = V(subG)$name)) 
            pot.nodes <- V(net)[tmp.neigh]$name
            out.nodes <- setdiff(pot.nodes, in.nodes)
            if (length(out.nodes) == 0) break

            best_node<-best.fun(in.nodes, out.nodes) 
            new_score<-best_node$score
            best_node<-best_node$node 

            if (new_score > subsum + 0.01) {
                tmp <- unlist(lapply(best_node, function(x) node2treePath(net,V(subG)$name, x))) # node2treePath is a function to retrieve the shortest path with the highest node weights
                in.nodes <- c(tmp, V(subG)$name)
                subG <- induced.subgraph(net, in.nodes)
                break
            }
        }
        if (node_num == vcount(subG)) break
    }
    return(subG)
}

我正在尝试将此函数应用于约 10,000 个节点的图形。这里将是运行该函数的代码的近似值

### generate some example data
library(igraph)
my_graph <- erdos.renyi.game(10000, 0.0003)
V(my_graph)$name <- 1:vcount(my_graph)
V(my_graph)$weight <- rnorm(10000)
V(my_graph)$RWRNodeweight <- runif(10000, min=0, max=0.05)

### Run the function
sublist = list()
for (node in V(G)$name) {
    subnet <- build_network(G, node, d)
    sublist[[node]] <- subnet }

编辑:这里是 dput的 head(genesets.length.null.stat)

structure(list(`1` = c(1.01397367504035, 1.18858228819048), `2` = c(1.61970348041337, 1.30189433386605), `3` = c(2.11767222957028, 1.36222065695878), `4` = c(2.47710421934929, 1.36968129959296), `5` = c(2.776011866622, 1.36318885187196), `6` = c(3.16885126246671, 1.42577861995897)), .Names = c("1", "2", "3", "4", "5", "6"))

这是 node2treePath功能:

node2treePath <- function (G, Tnodes, node){

tmp.path <- get.all.shortest.paths(G, node, Tnodes)$res
tmp.l <- unlist(lapply(tmp.path, length))
index <- which(tmp.l == min(tmp.l))

tmp.path = tmp.path[index]
tmp.sum <- unlist(lapply(tmp.path, function(x)return(sum(V(G)[x]$weight))))
index <- which(tmp.sum == max(tmp.sum))

selected.path = tmp.path[index]
collect <- unlist(lapply(selected.path, function(x)return(V(G)[x]$name)))

return(collect)
}

最佳答案

对于您想要执行的逻辑(我想您可能希望以与上述答案不兼容的方式进行更改)，以下代码大约快 ~~10 倍~~ 30%。我使用了 Rprof 和 profr 并以简单的方式重新编码了一些慢位，例如不传递命名列表对，只是来自您的一个函数的匿名对。包含 genesets.length.null.stat 值对的数字命名列表效率非常低。我用两个数字向量替换了它。您还多次调用“V”函数，这是一个很大的时间消耗:如您所见，您可以调用它一次，然后根据需要查询结果。

# node2treePath is a function to retrieve the shortest path with the highest node weights
node2treePath_jw <- function(G, Tnodes, node){

  tmp.path <- get.all.shortest.paths(G, node, Tnodes)$res
  tmp.l <- vapply(tmp.path, length, integer(1))
  index <- which(tmp.l == min(tmp.l))

  tmp.path = tmp.path[index]
  Vg <- V(G)
  tmp.sum <- vapply(tmp.path, function(x) sum(Vg[x]$weight), numeric(1))
  index <- which(tmp.sum == max(tmp.sum))

  selected.path = tmp.path[index]
  sapply(selected.path, function(x) Vg[x]$name)
}

build_network_jw <- function(net, seed, d= 2){

  score.fun <- function(Vg, k){
    Za <- sum(Vg$weight * Vg$RWRNodeweight) / sqrt(sum(Vg$RWRNodeweight^2))
    (Za - genesets_jack_a[k]) / genesets_jack_b[k]
  }

  best.fun_jw <- function(in.nodes, out.nodes)  {
    score <- (-Inf)
    best <- character()
    for (node in out.nodes) {
      subG.update <- induced.subgraph(net, c(in.nodes,node))
      Vsgu <- V(subG.update)
      Vsgu_count <- vcount(subG.update)
      sf <- score.fun(Vsgu, Vsgu_count)
      if (sf > score) {
        score <- sf
        best <- node
      }
    }
    list(best, score)
  }

  subG <- induced.subgraph(net, seed)
  if (!is.connected(subG)) {          #the seed must be connected
    stop("Input seeds are disjoint")
  }

  while (TRUE) {
    VsubG <- V(subG)
    Vnet <- V(net)
    in.nodes <- VsubG$name
    node_num <- vcount(subG)
    subsum <- score.fun(VsubG, node_num)

    for (rad in 1:d) { # d = 2
      tmp.neigh <- unlist(neighborhood(net, order = rad, nodes = VsubG$name))
      pot.nodes <- Vnet[tmp.neigh]$name
      out.nodes <- setdiff(pot.nodes, in.nodes)
      if (length(out.nodes) == 0) break

      best_node <- best.fun_jw(in.nodes, out.nodes)
      new_score <- best_node[[2]]
      best_node <- best_node[[1]]

      if (new_score > subsum + 0.01) {
        tmp <- sapply(best_node, function(x) node2treePath_jw(net, VsubG$name, x))
        in.nodes <- c(tmp, VsubG$name)
        subG <- induced.subgraph(net, in.nodes)
        break
      }
    }
    if (node_num == vcount(subG)) break
  }
  subG
}

node2treePath <- function (G, Tnodes, node){

  tmp.path <- get.all.shortest.paths(G, node, Tnodes)$res
  tmp.l <- unlist(lapply(tmp.path, length))
  index <- which(tmp.l == min(tmp.l))

  tmp.path = tmp.path[index]
  tmp.sum <- unlist(lapply(tmp.path, function(x)return(sum(V(G)[x]$weight))))
  index <- which(tmp.sum == max(tmp.sum))

  selected.path = tmp.path[index]
  collect <- unlist(lapply(selected.path, function(x)return(V(G)[x]$name)))

  return(collect)
}


build_network <- function (net, seed, d= 2){

  #genesets.length.null.stat <- structure(list(`1` = c(1.01397367504035, 1.18858228819048), `2` = c(1.61970348041337, 1.30189433386605), `3` = c(2.11767222957028, 1.36222065695878), `4` = c(2.47710421934929, 1.36968129959296), `5` = c(2.776011866622, 1.36318885187196), `6` = c(3.16885126246671, 1.42577861995897)), .Names = c("1", "2", "3", "4", "5", "6"))
  genesets.length.null.stat <- lapply(1:500, function(x) c(runif(1)+x, runif(1)+x))
  names(genesets.length.null.stat) <- 1:500

  score.fun<-function(g){
    Za <- sum(V(g)$weight*V(g)$RWRNodeweight)/sqrt(sum(V(g)$RWRNodeweight^2))
    k <- vcount(g)
    tmp <- genesets.length.null.stat[[as.character(k)]] # genesets.length.null.stat is a list with the median of Za and sd of Za calculated for 1000 replicates of networks of size k
    Sa <- (Za-tmp[1])/tmp[2]
  }

  best.fun <- function(in.nodes,out.nodes)  {
    score<-(-Inf); best<-character()
    for (node in out.nodes){
      subG.update<-induced.subgraph(net, c(in.nodes,node))
      if (score.fun(subG.update) > score) {
        score<-score.fun(subG.update)
        best<-node
      }
    }
    list("node"=best,"score"=score)
  }

  subG <- induced.subgraph(net, seed)
  if (!is.connected(subG)) {          #the seed must be connected
    stop("Input seeds are disjoint")
  }

  while (TRUE) {
    in.nodes <- V(subG)$name
    node_num <- vcount(subG)
    subsum <- score.fun(subG)
    #subx <- V(subG)$name
    for (rad in 1:d) {
      tmp.neigh <- unlist(neighborhood(net, order = rad, nodes = V(subG)$name))
      pot.nodes <- V(net)[tmp.neigh]$name
      out.nodes <- setdiff(pot.nodes, in.nodes)
      if (length(out.nodes) == 0) break

      #message("length in.nodes = ", length(in.nodes))
      #message("length out.nodes = ", length(out.nodes))

      best_node<-best.fun(in.nodes, out.nodes)
      new_score<-best_node$score
      best_node<-best_node$node

      if (new_score > subsum + 0.01) {
        tmp <- unlist(lapply(best_node, function(x) node2treePath(net,V(subG)$name, x))) # node2treePath is a function to retrieve the shortest path with the highest node weights
        in.nodes <- c(tmp, V(subG)$name)
        subG <- induced.subgraph(net, in.nodes)
        break
      }
    }
    if (node_num == vcount(subG)) break
  }
  subG
}

library(igraph)
library(profr)



library(igraph)
library(profr)

#genesets.length.null.stat <- lapply(1:500, function(x) c(runif(1)+x, runif(1)+x))
#names(genesets.length.null.stat) <- 1:500

set.seed(1)
genesets_jack_a = runif(500) + 1:500
genesets_jack_b = runif(500) + 1:500

do_it_jw <- function(n = 1000){

  my_graph <- erdos.renyi.game(n, 0.0003)
  V(my_graph)$name <- 1:vcount(my_graph)
  V(my_graph)$weight <- rnorm(n)
  V(my_graph)$RWRNodeweight <- runif(n, min = 0, max = 0.05)

  ### Run the function
  sublist = list()
  Vmg <- V(my_graph)
  for (node in Vmg$name) {
    #message(node)
    subnet <- build_network_jw(my_graph, node, 2)
    sublist[[node]] <- subnet }
}

do_it <- function(n = 1000){

  my_graph <- erdos.renyi.game(n, 0.0003)
  V(my_graph)$name <- 1:vcount(my_graph)
  V(my_graph)$weight <- rnorm(n)
  V(my_graph)$RWRNodeweight <- runif(n, min = 0, max = 0.05)

  ### Run the function
  sublist = list()
  Vmg <- V(my_graph)
  for (node in Vmg$name) {
    #message(node)
    subnet <- build_network(my_graph, node, 2)
    sublist[[node]] <- subnet }
}

library(microbenchmark)
mb <- microbenchmark(do_it(1000), do_it_jw(1000), times = 5)
print(mb)

关于r - 基于节点权重构建图的优化算法，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/33240240/

文章推荐： javascript - 使用 jQuery 更改 schema.org 微数据？

文章推荐： .htaccess - Seo 301 绝对 URL 重定向，Prestashop

文章推荐： algorithm - apache spark 上的不相交集

c++ - 为什么 MSVC 不为 char 或 const char* 优化 cout 而为 int 优化？
比较代码: const char x = 'a'; std::cout > (0C310B0h) 00C3100B add esp,4 和 const i
Matlab 优化
您好，我正在使用 Matlab 优化求解器，但程序有问题。我收到此消息 fmincon 已停止，因为目标函数值小于目标函数限制的默认值，并且约束满足在约束容差的默认值范围内。我也收到以下消息。警告:矩
Eclipse 优化
处理Visual Studio optimizations的问题为我节省了大量启动和使用它的时间当我必须进行 J2EE 开发时，我很难回到 Eclipse。因此，我还想知道人们是否有任何提示或技巧可
Excel 优化
情况如下:在我的 Excel 工作表中，有一列包含 1-name 形式的条目。考虑到数字也可以是两位数，我想删除这些数字。这本身不是问题，我让它工作了，只是性能太糟糕了。现在我的程序每个单元格输入大约
jQuery 优化
这样做有什么区别吗: $(".topHorzNavLink").click(function() { var theHoverContainer = $("#hoverContainer");
jQuery $(this) 优化
这个问题已经有答案了: 已关闭11 年前。 Possible Duplicate: What is the cost of '$(this)'? 我经常在一些开发人员代码中看到$(this)引用同一个
jQuery 优化
我刚刚结束了一个大型开发项目。我们的时间紧迫，因此很多优化被“推迟”。既然我们已经达到了最后期限，我们将回去尝试优化事情。我的问题是:优化 jQuery 网站时您要寻找的最重要的东西是什么。或者，我
JavaScript 优化
所以我一直在用 JavaScript 编写游戏(不是网络游戏，而是使用 JavaScript 恰好是脚本语言的游戏引擎)。不幸的是，游戏引擎的 JavaScript 引擎是 SpiderMonkey
MYSQL查询、优化
这是我在正在构建的页面中使用的 SQL 查询。它目前运行大约 8 秒并返回 12000 条记录，这是正确的，但我想知道您是否可以就如何使其更快提出可能的建议？ SELECT DISTINCT Adve
SQL 优化
如何优化这个？ SELECT e.attr_id, e.sku, a.value FROM product_attr AS e, product_attr_text AS a WHERE e.attr
python - 优化 `in`
我正在使用这样的结构来测试是否按下了所需的键: def eventFilter(self, tableView, event): if event.type() == QtCore.QEven
JavaScript 优化
我正在使用 JavaScript 从给定的球员列表中计算出羽毛球 double 比赛的所有组合。每个玩家都与其他人组队。 EG。如果我有以下球员a、b、c、d。它们的组合可以是: a & b V c
Javascript 优化
我似乎无法弄清楚如何让这个 JS 工作。 scroll function 起作用但不能隐藏。还有没有办法用更少的代码行来做到这一点？我希望 .down-arrow 在 50px 之后 fade out
CSS高级最小化(优化)
我的问题是关于用于生产的高级优化级联样式表 (CSS) 文件。多么最新和最完整(准备在实时元素中使用)的 css 优化器/最小化器，它们不仅提供删除空格和换行符，还提供高级功能，如删除过多的属性、合
HTML 优化
我读过这个: 浏览器检索在中请求的所有资源开始呈现之前的 HTML 部分.如果您将请求放在中section 而不是，那么页面呈现和下载资源可以并行发生。您应该从移动尽可能多的资源请求。
C++ 优化
我正在处理一些现有的 C++ 代码，这些代码看起来写得不好，而且调用频率很高。我想知道我是否应该花时间更改它，或者编译器是否已经在优化问题。我正在使用 Visual Studio 2008。这是一
c++ - 优化
我正在尝试使用 OpenGL 渲染 3 个四边形(1 个背景图，2 个 Sprite )。我有以下代码: void GLRenderer::onDrawObjects(long p_dt) {
Java If 优化
我确实有以下声明: isEnabled = false; if(foo(arg) && isEnabled) { .... } public boolean foo(arg) { some re
SQL 优化
(一)深入浅出理解索引结构实际上，您可以把索引理解为一种特殊的目录。微软的SQL SERVER提供了两种索引：聚集索引(clustered index，也称聚类索引、簇集索引)和非聚集索引(no
CSS 优化、提高性能的方法有哪些？
一、写在前面 css的优化方案，之前没有提及，所以接下来进行总结一下。二、具体优化方案 2.1、加载性能 1、css压缩：将写好的css进行打包，可以减少很多的体积。 2、css单一样式：在需要下边

塔克拉玛干

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

r - 基于节点权重构建图的优化算法