- android - 多次调用 OnPrimaryClipChangedListener
- android - 无法更新 RecyclerView 中的 TextView 字段
- android.database.CursorIndexOutOfBoundsException : Index 0 requested, 光标大小为 0
- android - 使用 AppCompat 时,我们是否需要明确指定其 UI 组件(Spinner、EditText)颜色
我尝试将 json 文件发布到 pubsub 并在流处理过程中使用云数据流写入数据存储。
from __future__ import absolute_import
import apache_beam as beam
import json
import logging
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import GoogleCloudOptions
from google.cloud.proto.datastore.v1 import entity_pb2
from apache_beam import window
from apache_beam.io.gcp.pubsub import ReadFromPubSub
from apache_beam.io.gcp.datastore.v1.datastoreio import WriteToDatastore
from googledatastore import helper as datastore_helper
class EntityWrapper(object):
def __init__(self, namespace, kind, ancestor):
self._namespace = namespace
self._kind = kind
self._ancestor = ancestor
def make_entity(self, content):
entity = entity_pb2.Entity()
if self._namespace is not None:
entity.key.partition_id.namespace_id = self._namespace
datastore_helper.add_key_path(entity.key, self._kind, self._ancestor, self._kind, str(uuid.uuid4()))
datastore_helper.add_properties(entity, {"content": unicode(content)})
return entity
pipeline_options = {
'project': PROJECT,
'staging_location': STAGING_LOCATION,
'runner': 'DataflowRunner',
'job_name': JOB_NAME,
'temp_location': TEMP_LOCATION,
'streaming': True}
options = PipelineOptions.from_dictionary(pipeline_options)
def run():
p = beam.Pipeline(options=options)
def parse_pubsub(line):
record = json.loads(line)
return record
(p | "Read from PubSub" >> ReadFromPubSub(topic=TOPIC)
| "PubSub message to Python object" >> beam.Map(parse_pubsub)
| "Windowing" >> beam.WindowInto(window.FixedWindows(10))
| "create entity" >> beam.Map(EntityWrapper(namespace=NAMESPACE, kind=KIND, ancestor=None).make_entity)
| "write to DataStore" >> WriteToDatastore(PROJECT))
result = p.run()
result.wait_until_finish()
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
run()
当我在 google cloud shell 上运行此代码时,它能够运行并创建像这样的管道。
但是,当我将 json 发布到 pubsub 时,它不起作用。
错误消息如下。
JOB_MESSAGE_ERROR: java.util.concurrent.ExecutionException: java.lang.RuntimeException: Error received from SDK harness for instruction -30: Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 134, in _execute
response = task()
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 169, in <lambda>
self._execute(lambda: worker.do_instruction(work), work)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 215, in do_instruction
request.instruction_id)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 237, in process_bundle
processor.process_bundle(instruction_id)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/bundle_processor.py", line 299, in process_bundle
input_op.process_encoded(data.data)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/bundle_processor.py", line 120, in process_encoded
self.output(decoded_value)
File "apache_beam/runners/worker/operations.py", line 166, in apache_beam.runners.worker.operations.Operation.output
def output(self, windowed_value, output_index=0):
File "apache_beam/runners/worker/operations.py", line 167, in apache_beam.runners.worker.operations.Operation.output
cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 87, in apache_beam.runners.worker.operations.ConsumerSet.receive
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 387, in apache_beam.runners.worker.operations.DoOperation.process
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 388, in apache_beam.runners.worker.operations.DoOperation.process
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 589, in apache_beam.runners.common.DoFnRunner.receive
self.process(windowed_value)
File "apache_beam/runners/common.py", line 595, in apache_beam.runners.common.DoFnRunner.process
self._reraise_augmented(exn)
File "apache_beam/runners/common.py", line 612, in apache_beam.runners.common.DoFnRunner._reraise_augmented
raise
File "apache_beam/runners/common.py", line 593, in apache_beam.runners.common.DoFnRunner.process
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 363, in apache_beam.runners.common.SimpleInvoker.invoke_process
output_processor.process_outputs(
File "apache_beam/runners/common.py", line 698, in apache_beam.runners.common._OutputProcessor.process_outputs
self.main_receivers.receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 87, in apache_beam.runners.worker.operations.ConsumerSet.receive
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 387, in apache_beam.runners.worker.operations.DoOperation.process
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 388, in apache_beam.runners.worker.operations.DoOperation.process
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 589, in apache_beam.runners.common.DoFnRunner.receive
self.process(windowed_value)
File "apache_beam/runners/common.py", line 595, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 612, in apache_beam.runners.common.DoFnRunner._reraise_augmented
raise
File "apache_beam/runners/common.py", line 593, in apache_beam.runners.common.DoFnRunner.process
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 472, in apache_beam.runners.common.PerWindowInvoker.invoke_process
self._invoke_per_window(
File "apache_beam/runners/common.py", line 522, in apache_beam.runners.common.PerWindowInvoker._invoke_per_window
output_processor.process_outputs(
File "apache_beam/runners/common.py", line 659, in apache_beam.runners.common._OutputProcessor.process_outputs
def process_outputs(self, windowed_input_element, results):
File "apache_beam/runners/common.py", line 698, in apache_beam.runners.common._OutputProcessor.process_outputs
self.main_receivers.receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 87, in apache_beam.runners.worker.operations.ConsumerSet.receive
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 387, in apache_beam.runners.worker.operations.DoOperation.process
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 388, in apache_beam.runners.worker.operations.DoOperation.process
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 589, in apache_beam.runners.common.DoFnRunner.receive
self.process(windowed_value)
File "apache_beam/runners/common.py", line 595, in apache_beam.runners.common.DoFnRunner.process
self._reraise_augmented(exn)
File "apache_beam/runners/common.py", line 612, in apache_beam.runners.common.DoFnRunner._reraise_augmented
raise
File "apache_beam/runners/common.py", line 593, in apache_beam.runners.common.DoFnRunner.process
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 364, in apache_beam.runners.common.SimpleInvoker.invoke_process
windowed_value, self.process_method(windowed_value.value))
File "/home/shinya_yaginuma/.local/lib/python2.7/site-packages/apache_beam/transforms/core.py", line 1035, in <lambda>
File "pubsub_to_datastore.py", line 21, in make_entity
NameError: global name 'entity_pb2' is not defined
java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
org.apache.beam.sdk.util.MoreFutures.get(MoreFutures.java:57)
com.google.cloud.dataflow.worker.fn.control.RegisterAndProcessBundleOperation.finish(RegisterAndProcessBundleOperation.java:274)
com.google.cloud.dataflow.worker.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:83)
com.google.cloud.dataflow.worker.fn.control.BeamFnMapTaskExecutor.execute(BeamFnMapTaskExecutor.java:101)
com.google.cloud.dataflow.worker.StreamingDataflowWorker.process(StreamingDataflowWorker.java:1227)
com.google.cloud.dataflow.worker.StreamingDataflowWorker.access$1000(StreamingDataflowWorker.java:136)
com.google.cloud.dataflow.worker.StreamingDataflowWorker$6.run(StreamingDataflowWorker.java:966)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
java.lang.Thread.run(Thread.java:745)
我检查所有库都已安装。所以,我不明白为什么会发生错误。
问候,
最佳答案
因此,您的导入工作正常,因此在您进行 pubsub 时发生错误,而不是在创建 DF 时发生错误。然而,当你的 make_entity 实际被调用时,entity_pb2 就会消失!
根据docs ,您需要导入实际使用的工作线程,或者您可以使导入持久化。尝试保存您的主 session :
pipeline_options = {
'project': PROJECT,
'staging_location': STAGING_LOCATION,
'runner': 'DataflowRunner',
'job_name': JOB_NAME,
'temp_location': TEMP_LOCATION,
'streaming': True,
'save_main_session': True} #
关于python - 如何使用 python 将流管道发布订阅到数据存储?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52771074/
我们正在使用 VSTS 构建和发布通过 Xamarin 创建的 iOS 和 Android 应用程序。通过 VSTS 将 Android 应用发布到商店相对简单。有人可以指导我或提供一些如何通过 VS
我一直在研究 Spring Social Facebook 的 publish(objectId, connectionName, data) API ,但不确定此 API 的用法(遗憾的是,由于缺少
我正在使用 django viewflow 创建一个发布流程: 用户创建对象 它进入审核流程,其状态为待处理(公众不可见) 经过审核和批准后,就会发布并公开可见。 如果用户编辑同一实体,则会再次进入审
我正在尝试进行 API 调用,并且 API 需要格式为 XML: Security GetSessionInfo 999999999999 0 2 {
我已经查看了所有 StackOverflow,但没有找到适合我的案例的解决方案我有 405 HttpStatusCode 调用 API/Regions/Create 操作这是我的 baseContro
如果我切换到新版本的SpringBoot,我在启动应用程序时会得到上面的错误信息。这是为什么? 最美好的祝愿史蒂文 pom.xml 4.0.0 de.xyz.microservice spring
我有一个场景,页面导航是从一个域到另一个域完成的。例如,导航是从 http://www.foo.com到 http://www.bar.com在 JavaScript 中单击按钮 重定向时,我需要将用
这半年来一直深耕包头,这个城市比较不错,但是推进项目的难度确实挺大的。与开发产品相比,后者更省心。但是光研发产品,没有项目
我正在阅读有关 Github 版本 的信息,它似乎很适合您的项目。因为我们需要决定将哪些功能用于生产,哪些不用于。 我无法理解的部分是,master 和 release 分支如何在其中发挥作用。 Sh
我将一些代码推送到远程存储库,然后在 GitHub 上创建了第一个版本,并将其命名为 'v0.0.1'。 GitHub 现在显示我现在有一个版本,并且还在“标签”中显示我有一个标签 “v0.0.1”。
如果我有一个具有以下文件/文件夹结构的 GitHub 存储库 github.com/@product/template: /build /fileA /fileB /src /genera
我有一个 Maven 多模块项目。 当代码开发完成后,我们想在 Jenkins 中编写一个分支构建作业,它分支代码,增加主干中的 pom 版本,并删除 -SNAPSHOT 来自分支中的 pom 版本。
我有一个非常大的集合(约 40000 个文档,包含约 20-25 个字段,包括包含一组约 500 个项目的数组字段)和约 2000 个订阅者(他们现在只是机器人)。 因此,当用户订阅整个集合(不包括服
如果我正在使用消息队列构建一个包含数十个发布者/订阅者的系统,那么我似乎有一些网络配置选项: 我可以拥有一个所有机器都使用的集群代理 - 每台机器都没有本地队列 我可以在每台机器上本地安装代理,并使用
我正在使用 Flash Develop,并且创建了一个 ActionScript 3.0 项目。它启动并读取一个 xml 文件,其中包含图像的 url。我已将 url 保留在与 swf 相同的文件夹中
如果我在一个句子中使用 alloc 和 retain 声明一个 NSArray 那么我应该释放 NSArray 对象两次(即[arrayObject release] 2次)? 最佳答案 如果您在同一
我正在尝试在 Node 中实现发布/订阅模式,但不使用 Redis。功能应该是相同的;您可以发布到 channel ,订阅 channel 并收听数据(如果您已订阅);以下是 Redis 功能: pu
编辑:这个问题、一些答案和一些评论,包含很多错误信息。见 how Meteor collections, publications and subscriptions work准确理解发布和订阅同一服
我正在开发一款 DirectX 游戏,我发现在发布版本中我的平均帧速率为 170fps,但是在调试版本中我的帧速率约为 20fps。 我想知道发布和调试版本之间的巨大差异是否正常,特别是因为在调试中我
是否有办法回滚 Windows Azure 网站和 SQL 部署/发布? 我发布了一个网站,现在它导致了很多错误,我想回到之前的状态并进一步处理代码。 这可能吗? 最佳答案 如果您使用 Git 或 T
我是一名优秀的程序员,十分优秀!