gpt4 book ai didi

google-app-engine - 谷歌应用引擎 : Using Big Query on datastore?

转载 作者:太空宇宙 更新时间:2023-11-03 15:18:23 24 4
gpt4 key购买 nike

拥有一个包含数十万个对象的 GAE 数据存储类型。想做几个涉及查询(涉及计数查询)。 Big Query 似乎非常适合做这件事。

目前是否有使用 Big Query 查询实时 AppEngine 数据存储的简单方法?

最佳答案

您不能直接在 DataStore 实体上运行 BigQuery,但您可以编写一个 Mapper 管道从 DataStore 中读取实体,将它们写入 Google Cloud Storage 中的 CSV,然后将这些提取到 BigQuery - 您甚至可以自动化过程。这是使用 Mapper API 的示例仅 DataStore 到 CSV 步骤的类:

import re
import time
from datetime import datetime
import urllib
import httplib2
import pickle

from google.appengine.ext import blobstore
from google.appengine.ext import db
from google.appengine.ext import webapp

from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext.webapp import blobstore_handlers
from google.appengine.ext.webapp import util
from google.appengine.ext.webapp import template

from mapreduce.lib import files
from google.appengine.api import taskqueue
from google.appengine.api import users

from mapreduce import base_handler
from mapreduce import mapreduce_pipeline
from mapreduce import operation as op

from apiclient.discovery import build
from google.appengine.api import memcache
from oauth2client.appengine import AppAssertionCredentials


#Number of shards to use in the Mapper pipeline
SHARDS = 20

# Name of the project's Google Cloud Storage Bucket
GS_BUCKET = 'your bucket'

# DataStore Model
class YourEntity(db.Expando):
field1 = db.StringProperty() # etc, etc

ENTITY_KIND = 'main.YourEntity'


class MapReduceStart(webapp.RequestHandler):
"""Handler that provides link for user to start MapReduce pipeline.
"""
def get(self):
pipeline = IteratorPipeline(ENTITY_KIND)
pipeline.start()
path = pipeline.base_path + "/status?root=" + pipeline.pipeline_id
logging.info('Redirecting to: %s' % path)
self.redirect(path)


class IteratorPipeline(base_handler.PipelineBase):
""" A pipeline that iterates through datastore
"""
def run(self, entity_type):
output = yield mapreduce_pipeline.MapperPipeline(
"DataStore_to_Google_Storage_Pipeline",
"main.datastore_map",
"mapreduce.input_readers.DatastoreInputReader",
output_writer_spec="mapreduce.output_writers.FileOutputWriter",
params={
"input_reader":{
"entity_kind": entity_type,
},
"output_writer":{
"filesystem": "gs",
"gs_bucket_name": GS_BUCKET,
"output_sharding":"none",
}
},
shards=SHARDS)


def datastore_map(entity_type):
props = GetPropsFor(entity_type)
data = db.to_dict(entity_type)
result = ','.join(['"%s"' % str(data.get(k)) for k in props])
yield('%s\n' % result)


def GetPropsFor(entity_or_kind):
if (isinstance(entity_or_kind, basestring)):
kind = entity_or_kind
else:
kind = entity_or_kind.kind()
cls = globals().get(kind)
return cls.properties()


application = webapp.WSGIApplication(
[('/start', MapReduceStart)],
debug=True)

def main():
run_wsgi_app(application)

if __name__ == "__main__":
main()

如果您将其附加到 IteratorPipeline 类的末尾:yield CloudStorageToBigQuery(output),您可以将生成的 csv 文件句柄通过管道传输到 BigQuery 摄取管道中...像这样:

class CloudStorageToBigQuery(base_handler.PipelineBase):
"""A Pipeline that kicks off a BigQuery ingestion job.
"""
def run(self, output):

# BigQuery API Settings
SCOPE = 'https://www.googleapis.com/auth/bigquery'
PROJECT_ID = 'Some_ProjectXXXX'
DATASET_ID = 'Some_DATASET'

# Create a new API service for interacting with BigQuery
credentials = AppAssertionCredentials(scope=SCOPE)
http = credentials.authorize(httplib2.Http())
bigquery_service = build("bigquery", "v2", http=http)

jobs = bigquery_service.jobs()
table_name = 'datastore_dump_%s' % datetime.utcnow().strftime(
'%m%d%Y_%H%M%S')
files = [str(f.replace('/gs/', 'gs://')) for f in output]
result = jobs.insert(projectId=PROJECT_ID,
body=build_job_data(table_name,files)).execute()
logging.info(result)

def build_job_data(table_name, files):
return {"projectId": PROJECT_ID,
"configuration":{
"load": {
"sourceUris": files,
"schema":{
# put your schema here
"fields": fields
},
"destinationTable":{
"projectId": PROJECT_ID,
"datasetId": DATASET_ID,
"tableId": table_name,
},
}
}
}

关于google-app-engine - 谷歌应用引擎 : Using Big Query on datastore?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/10966841/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com