- r - 以节省内存的方式增长 data.frame
- ruby-on-rails - ruby/ruby on rails 内存泄漏检测
- android - 无法解析导入android.support.v7.app
- UNIX 域套接字与共享内存(映射文件)
我的 Google App Engine Go 项目在 Google Cloud Storage 的“文件夹”中创建了一个包含多个文件的 zip。当使用现已弃用和删除的文件 API 在 BlobStore 中实现时,它曾经非常快。我最近将代码转换为使用谷歌云存储,现在性能非常糟糕,有时会超时。正在压缩的文件大小在 1K 到 2M 之间。
我正在寻找任何改进压缩文件内容的建议。下面的代码是我为将云中的多个文件压缩为云中的新 zip 文件而编写的代码。它可能需要很长时间才能执行,并且需要在将每个文件写入 zip 之前将其全部内容(参见下面的性能问题)加载到内存中。必须有更好的方法。
// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) {
log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName)
srcFolder = fmt.Sprintf("%v/", srcFolder)
query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}
objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
if err != nil {
log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
return
}
totalFiles := len(objs.Results)
if totalFiles == 0 {
log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
return
}
// create storage file for writing
log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
// add optional content type and meta data
if len(contentType) > 0 { storageWriter.ContentType = contentType }
if metaData != nil { storageWriter.Metadata = *metaData }
// Create a buffer to write our archive to.
buf := new(bytes.Buffer)
// Create a new zip archive to memory buffer
zipWriter := zip.NewWriter(buf)
// go through each file in the folder
for _, obj := range objs.Results {
log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
//d.dumpStats(obj)
// read file in our source folder from storage - io.ReadCloser returned from storage
storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return
}
defer storageReader.Close()
// PERFORMANCE ISSUE: have to load the entire file into memory to get random access from the cloud
slurp, err := ioutil.ReadAll(storageReader)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read data from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return
}
// grab just the filename from directory listing (don't want to store paths in zip)
_, zipFileName := filepath.Split(obj.Name)
newFileName := strings.ToLower(zipFileName)
// add filename to zip
zipFile, err := zipWriter.Create(newFileName)
if err != nil {
log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return
}
// write entire file into zip archive
_, err = zipFile.Write(slurp)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return
}
// flush that to buffer so we can write it off now
//err = zipFile.Flush()
//if err != nil {
// d.errorf("pack: unable to flush write of zip file from bucket %q, file %q: %v", cloud.bucket, zipFileName, err)
// //return
//}
// now drain all that buffered zip data to the cloud storage file
log.Infof(cloud.c, "Writing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)
_, err = buf.WriteTo(storageWriter)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)
return
}
}
// Make sure to check the error on Close.
log.Infof(cloud.c, "Closing zip writer")
err = zipWriter.Close()
if err != nil {
log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
}
// write any leftover data
if buf.Len() > 0 {
// now drain all that buffered zip data to the cloud storage file
// log.Infof(cloud.c, "Packing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)
_, err := buf.WriteTo(storageWriter)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)
}
}
// close up final write file
//log.Infof(cloud.c, "Closing cloud storage file %v", fileName)
if err := storageWriter.Close(); err != nil {
log.Errorf(cloud.c, "Packing failed to close bucket %q file %q: %v", cloud.bucket, fileName, err)
return
}
// success!
log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)
}
最佳答案
感谢 Stephen 建议在写入 zip 时不要将文件加载到内存缓冲区中。下面是固定代码供引用:
// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) bool {
log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName)
srcFolder = fmt.Sprintf("%v/", srcFolder)
query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}
objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
if err != nil {
log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
return false
}
totalFiles := len(objs.Results)
if totalFiles == 0 {
log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
return false
}
// create storage file for writing
log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
defer storageWriter.Close()
// add optional content type and meta data
if len(contentType) > 0 { storageWriter.ContentType = contentType }
if metaData != nil { storageWriter.Metadata = *metaData }
// Create a new zip archive to memory buffer
zipWriter := zip.NewWriter(storageWriter)
// go through each file in the folder
for _, obj := range objs.Results {
log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
//d.dumpStats(obj)
// read file in our source folder from storage - io.ReadCloser returned from storage
storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return false
}
defer storageReader.Close()
// grab just the filename from directory listing (don't want to store paths in zip)
_, zipFileName := filepath.Split(obj.Name)
newFileName := strings.ToLower(zipFileName)
// add filename to zip
zipFile, err := zipWriter.Create(newFileName)
if err != nil {
log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return false
}
// copy from storage reader to zip writer
_, err = io.Copy(zipFile, storageReader)
if err != nil {
log.Errorf(cloud.c, "Failed to copy from storage reader to zip file: %v", err)
return false
}
}
// Make sure to check the error on Close.
log.Infof(cloud.c, "Closing zip writer")
err = zipWriter.Close()
if err != nil {
log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
return false
}
// success!
log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)
return true
}
关于google-app-engine - 使用 Go 在 Google Cloud Storage 中压缩文件夹的最佳方法?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31611616/
我有一个 Cloud Run 服务,它通过 SQLAlchemy 访问 Cloud SQL 实例.但是,在 Cloud Run 的日志中,我看到 CloudSQL connection failed.
关闭。这个问题是opinion-based .它目前不接受答案。 想改善这个问题吗?更新问题,以便可以通过 editing this post 用事实和引文回答问题. 4年前关闭。 Improve t
在将 docker 容器镜像部署到 Cloud Run 时,我可以选择一个区域,这很好。 Cloud Run 将构建委托(delegate)给 Cloud Build,后者显然会创建两个存储桶来实现这
我正在尝试将 Cloud Functions 用作由 PubSub 触发的异步后台工作程序,并进行更长时间的工作(以分钟为单位)。完整代码在这里https://github.com/zdenulo/c
这是/etc/cloud/cloud.cfg的内容Ubuntu云16.04镜像: # The top level settings are used as module # and system co
如何从 Google Cloud Function 启动 Cloud Dataflow 作业?我想使用 Google Cloud Functions 作为启用跨服务组合的机制。 最佳答案 我已经包含了
我想使用 Cloud Shell 在我的第二代 Cloud Sql 实例上运行数据库迁移。 我找到了一个 example in the docs关于如何使用 gcloud 进行连接.但是当我运行命令时
我正在尝试使用 Google Cloud PubSub和我的 Google Cloud Dataproc群集,我收到如下身份验证范围错误: { "code" : 403, "errors" :
这是我的用例。 我已经有一个以私有(private)模式部署的 Cloud Run 服务。 (与云功能相同的问题) 我正在开发使用此 Cloud Run 的新服务。我在应用程序中使用默认凭据进行身份验
如何连接到 Cloud SQL 上的数据库,而无需在容器中添加我的凭据文件? 最佳答案 使用 UNIX 域套接字 (Java) 从云运行(完全托管)连接到云 SQL At this time Clou
我有一个google-cloud-ml作业,需要从gs存储桶加载numpy .npz文件。我遵循了this example上关于如何从gs加载.npy文件的操作,但是由于.npz文件已压缩,因此它对我
我想创建链接到另一个项目中的 Cloud Source Repository 的 Cloud Build 触发器。但是当我在应该选择存储库的步骤中时,列表是空的。我尝试了不同的许可,但没有运气。谁能告
向 Twilio 发送 SMS 时,Twilio 会向指定的 URL 发送多个请求,以通过 Webhook 提供该 SMS 传送的状态。我想让这个回调异步,所以我开发了一个 Cloud Functio
我需要更改我的项目 ID,因为要验证的 Firebase 身份验证链接在链接上显示了项目 ID,并且由于品牌 reshape ,项目名称已更改。根据我发现的信息,更改项目 ID 似乎不太可能。我正在考
用于部署我的 Angular 应用程序的 CI/CD 管道已关闭,但我看到 Google Cloud Run 在容器镜像更新后没有部署新修订版。 我已将 Cloud Build 设置为在 GitHub
报价https://cloud.google.com/load-balancing/docs/https/setting-up-https-serverless#enabling While Goog
Cloud Spanner 提供了两种不同的 API。 Cloud Spanner 读取与 Cloud Spanner SQL API 之间有什么区别? 最佳答案 在幕后,它们都使用相同的执行机制,因
我是 GCP 堆栈的新手,所以我对用于存储数据的 GCP 技术数量感到非常困惑: https://cloud.google.com/products/storage 虽然上面的文章中没有提到googl
我发现 Google Cloud Functions 的网络出站费用令人惊讶,我正在尝试了解发生这种情况的原因以及如何避免这种情况。 Stackdriver 监控表明有问题的函数是我的 ingest
我使用 Prisma使用 Cloud Run 和 Cloud SQL。在向 prisma.schema 提供 DATABASE_URL 后,它会在运行时抛出一个错误。 Can't reach data
我是一名优秀的程序员,十分优秀!