gpt4 book ai didi

python - 使用 Python 和 pyathenajdbc 连接 Athena

转载 作者:太空宇宙 更新时间:2023-11-03 13:11:17 27 4
gpt4 key购买 nike

我正在尝试使用 python 连接到 AWS Athena。我正在尝试使用 pyathenajdbc 来完成这项任务。我遇到的问题是获得连接。当我运行下面的代码时,我收到一条错误消息,指出找不到 AthenaDriver。 (java.lang.RuntimeException:找不到类 com.amazonaws.athena.jdbc.AthenaDriver)。我确实从 AWS 下载了这个文件,我已经确认它位于该目录中。

from mdpbi.rsi.config import *
from mdpbi.tools.functions import mdpLog
from pkg_resources import resource_string
import argparse
import os
import pyathenajdbc
import sys

SCRIPT_NAME = "Athena_Export"

ATHENA_JDBC_CLASSPATH = "/opt/amazon/athenajdbc/AthenaJDBC41-1.0.0.jar"
EXPORT_OUTFILE = "RSI_Export.txt"
EXPORT_OUTFILE_PATH = os.path.join(WORKINGDIR, EXPORT_OUTFILE)


def get_arg_parser():
"""This function returns the argument parser object to be used with this script"""
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

return parser


def main():
args = get_arg_parser().parse_args(sys.argv[1:])
logger = mdpLog(SCRIPT_NAME, LOGDIR)

SQL = resource_string("mdpbi.rsi.athena.resources", "athena.sql")

conn = pyathenajdbc.connect(
s3_staging_dir="s3://athena",
access_key=AWS_ACCESS_KEY_ID,
secret_key=AWS_SECRET_ACCESS_KEY,
region_name="us-east-1",
log_path=LOGDIR,
driver_path=ATHENA_JDBC_CLASSPATH
)
try:
with conn.cursor() as cursor:
cursor.execute(SQL)
logger.info(cursor.description)
logger.info(cursor.fetchall())
finally:
conn.close()

return 0


if __name__ == '__main__':
rtn = main()
sys.exit(rtn)

Traceback (most recent call last): File "/usr/lib64/python2.7/runpy.py", line 174, in _run_module_as_main "main", fname, loader, pkg_name) File "/usr/lib64/python2.7/runpy.py", line 72, in _run_code exec code in run_globals File "/home/ec2-user/jason_testing/mdpbi/rsi/athena/main.py", line 53, in rtn = main() File "/home/ec2-user/jason_testing/mdpbi/rsi/athena/main.py", line 39, in main driver_path=athena_jdbc_driver_path File "/opt/mdpbi/Python_Envs/2.7.10/local/lib/python2.7/dist-packages/pyathenajdbc/init.py", line 65, in connect driver_path, **kwargs) File "/opt/mdpbi/Python_Envs/2.7.10/local/lib/python2.7/dist-packages/pyathenajdbc/connection.py", line 68, in init jpype.JClass(ATHENA_DRIVER_CLASS_NAME) File "/opt/mdpbi/Python_Envs/2.7.10/lib64/python2.7/dist-packages/jpype/_jclass.py", line 55, in JClass raise _RUNTIMEEXCEPTION.PYEXC("Class %s not found" % name)

最佳答案

JDBC 驱动程序需要 Java 8。我目前运行的是 Java 7。我能够在 EC2 实例上安装另一个版本的 Java。

https://tecadmin.net/install-java-8-on-centos-rhel-and-fedora/#

我还必须在我的代码中设置 java 版本。通过这些更改,代码现在可以按预期运行。

from mdpbi.rsi.config import *
from mdpbi.tools.functions import mdpLog
from pkg_resources import resource_string
import argparse
import os
import pyathenajdbc
import sys

SCRIPT_NAME = "Athena_Export"


def get_arg_parser():
"""This function returns the argument parser object to be used with this script"""
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

return parser


def main():
args = get_arg_parser().parse_args(sys.argv[1:])
logger = mdpLog(SCRIPT_NAME, LOGDIR)

SQL = resource_string("mdpbi.rsi.athena.resources", "athena.sql")

os.environ["JAVA_HOME"] = "/opt/jdk1.8.0_121"
os.environ["JRE_HOME"] = "/opt/jdk1.8.0_121/jre"
os.environ["PATH"] = "/opt/jdk1.8.0_121/bin:/opt/jdk1.8.0_121/jre/bin"

conn = pyathenajdbc.connect(
s3_staging_dir="s3://mdpbi.data.rsi.out/",
access_key=AWS_ACCESS_KEY_ID,
secret_key=AWS_SECRET_ACCESS_KEY,
schema_name="rsi",
region_name="us-east-1"
)
try:
with conn.cursor() as cursor:
cursor.execute(SQL)
logger.info(cursor.description)
logger.info(cursor.fetchall())
finally:
conn.close()

return 0


if __name__ == '__main__':
rtn = main()
sys.exit(rtn)

关于python - 使用 Python 和 pyathenajdbc 连接 Athena,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43006368/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com