r - 优化R中的空间查询 - 森林中冠位分布的生物量-6ren

r - 优化R中的空间查询 - 森林中冠位分布的生物量

转载作者：行者123 更新时间：2023-12-01 00:34:07

我正在尝试计算森林图中与平方网格单元格重叠的树冠面积。此后，一个可重现的示例:

# A. Define objects
    require(sp)
    require(raster)
    require(rgdal)
    require(rgeos)
    require(dismo)
    radius=25   # max search radius around 10 x 10 m cells
    res <- vector() # where to store results

    # Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
    set.seed(0) 
    survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))  
    coordinates(survey) <- ~x+y

    # Define 10 x 10 subplots
    grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10))) 
    survey$subplot <- over(survey,grid10)   

# B. Now find fraction of tree crown overlapping each subplot
    for (i in 1:100) {
        # Extract centroïd of each the ith cell
        centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
        corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))


        # Find trees in a max radius (define above)
        tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]


        # Define tree crown based on tree diameter
        tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter

        # Compute the distance from each tree to cell's borders
        pDist <- vector()
        for (k in 1:nrow(tem))  {
            pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
        }

        # Keeps only trees whose crown is lower than the above distance (=overlap)
        overlap.trees <- tem[which(pDist<=tem$crownr),]
        overlap.trees$crowna <-overlap.trees$crownr^2*pi  # compute crown area

        # Creat polygons from overlapping crowns
        c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
        crown <- polygons(c1)
        Crown <-    SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))

        # Create a fine grid points to retrieve the fraction of overlapping crowns
        max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search

        finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
        coordinates(finegrid) <- ~ x+y
        A <- extract(Crown,finegrid)
        Crown@data$ID <- seq(1,length(crown),1)
        B <- as.data.frame(table(A$poly.ID))
        if (nrow(B)>0) {
        B <- merge(B,Crown@data,by.x="Var1",by.y="ID",all.x=T)
        B$overlap <- B$Freq/B$crown.area
        B$overlap[B$overlap>1] <- 1
        res[i] <- sum(B$overlap) } else {
        res[i] <- 0 }
    }

# C. Check the result
    res  # sum of crown fraction overlapping each cell (works fine)

该算法运行 100 个单元大约需要 3 分钟。我有一个包含 35000 个单元格的大数据集，所以 150*7=1050 分钟或 17.5 小时。
任何有关固定和/或优化此算法的提示 ??

最佳答案

使用 profvis 进行快速分析后包，似乎可以通过更改几行来进行一些改进。这不是一个详尽的优化，我相信更多的改进仍然是可能的。

我变了

pDist <- vector()
for (k in 1:nrow(tem))  {
    pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
}

到

pDist <- rep(NA, nrow(tem))
my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
for (k in 1:nrow(tem))  {
  pDist[k] <- gDistance(tem[k,], my.poly)
}

因为没有必要创建 SpatialPolygons对象每次。这可能很昂贵，如下面的分析图像所示(顶部已优化)。

这是一些应该并行运行的代码。

# load only necessary package for code until parSapplyLB
# LB is load-balancing, which means it will distribute task to cores
# which are idle. This is great if jobs take an uneven amount of time
# to run.

library(parallel)
library(sp)

system.time({

  # prepare the cluster, default is PSOCK on windows but can be FORK form *nix
  cl <- makeCluster(4)
  # worker is just a new instance of fresh vanilla R so you need to load the 
  # necessary libraries to all the workers
  clusterEvalQ(cl = cl, library(sp))
  clusterEvalQ(cl = cl, library(raster))
  clusterEvalQ(cl = cl, library(rgdal))
  clusterEvalQ(cl = cl, library(rgeos))
  clusterEvalQ(cl = cl, library(dismo))

  radius <- 25   # max search radius around 10 x 10 m cells
  # res <- rep(NA, 100) # where to store results

  # Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
  set.seed(0) 
  survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))  
  coordinates(survey) <- ~x+y

  # Define 10 x 10 subplots
  grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10))) 
  survey$subplot <- over(survey,grid10)   

  # Export needed variables to workers
  clusterExport(cl = cl, varlist = c("survey", "radius"))

  # this function is your former for() loop, increase X = 1:100 to suit your needs

  res <- parSapplyLB(cl = cl, X = 1:100, FUN = function(i, survey) {
  # B. Now find fraction of tree crown overlapping each subplot
    # Extract centroïd of each the ith cell
    centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
    corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))

    # Find trees in a max radius (define above)
    tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]

    # Define tree crown based on tree diameter
    tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter

    # Compute the distance from each tree to cell's borders
    pDist <- vector()
    my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
    for (k in 1:nrow(tem))  {
      pDist[k] <- gDistance(tem[k,], my.poly)
    }

    # Keeps only trees whose crown is lower than the above distance (=overlap)
    overlap.trees <- tem[which(pDist<=tem$crownr),]
    overlap.trees$crowna <-overlap.trees$crownr^2*pi  # compute crown area

    # Creat polygons from overlapping crowns
    c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
    crown <- polygons(c1)
    Crown <-    SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))

    # Create a fine grid points to retrieve the fraction of overlapping crowns
    max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search

    finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
    coordinates(finegrid) <- ~ x+y
    A <- extract(Crown,finegrid)
    Crown@data$ID <- seq(1,length(crown),1)
    B <- as.data.frame(table(A$poly.ID))
    if (nrow(B)>0) {
      B <- merge(B,Crown@data,by.x="Var1",by.y="ID",all.x=T)
      B$overlap <- B$Freq/B$crown.area
      B$overlap[B$overlap>1] <- 1
      res <- sum(B$overlap) } else {
        res <- 0 }
  }, survey = survey)
stopCluster(cl = cl)
})