gpt4 book ai didi

python-3.x - 使用 SAS token 从 Azure Blob 存储下载 csv 文件列表的 Python 代码

转载 作者:行者123 更新时间:2023-12-03 07:02:57 27 4
gpt4 key购买 nike

我尝试使用共享 SAS token Azure Blob 存储 下载 csv 文件列表,但遇到各种错误。

我尝试查找此内容,并尝试了 Slackoverflow 和 Azure 文档上贡献者提供的多个代码示例。这是我从这些来源构建的代码示例的最终状态!它尝试以池化方式下载 csv 文件列表(blob 存储包含 200 个 csv 文件):

注意:我留下了注释代码片段以显示我尝试测试的不同方法。抱歉,如果它们令人困惑!

from itertools import tee
from multiprocessing import Process
from multiprocessing.pool import ThreadPool
import os
from azure.storage.blob import BlobServiceClient, BlobClient
from azure.storage.blob import ContentSettings, ContainerClient
#from azure.storage.blob import BlockBlobService

STORAGEACCOUNTURL = "https://myaccount.blob.core.windows.net"
STORAGEACCOUNTKEY = "sv=2020-08-04&si=blobpolicyXYZ&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
CONTAINERNAME = "mycontainer"
##BLOBNAME = "??"

sas_url = 'https://myaccount.blob.core.windows.net/mycontainer/mydir?sv=2020-08-04&si=blobpolicyXYZ&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
LOCAL_BLOB_PATH = "./downloads"

class AzureBlobFileDownloader:
def __init__(self):
print("Intializing AzureBlobFileDownloader")

# Initialize the connection to Azure storage account
self.blob_service_client_instance = ContainerClient.from_container_url #BlobClient.from_blob_url(sas_url) #BlobServiceClient(account_url=STORAGEACCOUNTURL, credential=STORAGEACCOUNTKEY)
#self.blob_client_instance = self.blob_service_client_instance.get_blob_client(CONTAINERNAME, BLOBNAME)
#self.blob_service_client = BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
#self.my_container = self.blob_service_client.get_container_client(MY_BLOB_CONTAINER)

#self.blob_service_client = BlockBlobService("storage_account",sas_token="?sv=2018-03-28&ss=bfqt&srt=sco&sp=rwdlacup&se=2019-04-24T10:01:58Z&st=2019-04-23T02:01:58Z&spr=https&sig=xxxxxxxxx")
#self.my_container = self.blob_service_client.get_blob_to_path("container_name","blob_name","local_file_path")


def save_blob(self,file_name,file_content):
# Get full path to the file
download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)

# for nested blobs, create local path as well!
os.makedirs(os.path.dirname(download_file_path), exist_ok=True)

with open(download_file_path, "wb") as file:
file.write(file_content)

def download_all_blobs_in_container(self):
# get a list of blobs
my_blobs = self.blob_service_client_instance.get_block_list() #list_blobs() #self.blob_client_instance.list_blobs() download_blob() #
print(my_blobs)

#iterate through the iterable object for testing purposes, maybe wrong approach!
result, result_backup = tee(my_blobs)
print("**first iterate**")
for i, r in enumerate(result):
print(r)

#start downloading my_blobs
result = self.run(my_blobs)
print(result)

def run(self,blobs):
# Download 3 files at a time!
with ThreadPool(processes=int(3)) as pool:
return pool.map(self.save_blob_locally, blobs)

def save_blob_locally(self,blob):
file_name = blob.name
print(file_name)
bytes = self.blob_service_client_instance.get_blob_client(CONTAINERNAME,blob).download_blob().readall()

# Get full path to the file
download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)
# for nested blobs, create local path as well!
os.makedirs(os.path.dirname(download_file_path), exist_ok=True)

with open(download_file_path, "wb") as file:
file.write(bytes)
return file_name

# Initialize class and download files
azure_blob_file_downloader = AzureBlobFileDownloader()
azure_blob_file_downloader.download_all_blobs_in_container()

有人可以帮我用 python 完成这个任务吗:

  • 获取 Blob 存储中所有文件的列表,这些文件名以 part- 为前缀
  • 将它们下载到本地文件夹

谢谢

最佳答案

could someone help me get to achieve this task in python:

  • get a list of all files in the blob storage, those files names are prefixed with part-

要列出前缀为“part-”的所有 blob,您可以使用 blob_service.list_blobs(<Container Name>, prefix="<Your Prefix>") 。下面是获取相同 blob 列表的代码。

print("\nList blobs in the container")
generator = blob_service.list_blobs(CONTAINER_NAME, prefix="part-")
for blob in generator:
print("\t Blob name: " + blob.name)
  • download them to a folder locally

要下载 blob,您可以使用 blob_client = blob_service.get_blob_to_path(<Container Name>,<Blob Name>,<File Path>) 。下面是根据您的要求下载 blob 的代码。

blob_client = blob_service.get_blob_to_path(CONTAINER_NAME,blob.name,fname)

下面是我们可以实现您的要求的完整代码。

import os
from azure.storage.blob import BlockBlobService

ACCOUNT_NAME = "<Your_ACCOUNT_NAME>"
ACCOUNT_KEY = "<YOUR_ACCOUNT_KEY>"
CONTAINER_NAME = "<YOUR_CONTAINER_NAME>"
LOCAL_BLOB_PATH = "C:\\<YOUR_PATH>\\downloadedfiles"

blob_service = BlockBlobService(ACCOUNT_NAME, ACCOUNT_KEY)

# Lists All Blobs which has a prefic of part-
print("\nList blobs in the container")
generator = blob_service.list_blobs(CONTAINER_NAME, prefix="part-")
for blob in generator:
print("\t Blob name: " + blob.name)

# Downloading the blob to a folder
for blob in generator:

# Adds blob name to the path
fname = os.path.join(LOCAL_BLOB_PATH, blob.name)
print(f'Downloading {blob.name} to {fname}')

# Downloading blob into file
blob_client = blob_service.get_blob_to_path(CONTAINER_NAME,blob.name,fname)

结果:

enter image description here

我的存储帐户中的文件

enter image description here

我的本​​地文件夹中的文件

enter image description here

更新答案

blob_service = BlockBlobService(account_name=ACCOUNT_NAME,account_key=None,sas_token=SAS_TOKEN)

# Lists All Blobs which has a prefic of part-
print("\nList blobs in the container")
generator = blob_service.list_blobs(CONTAINER_NAME, prefix="directory1"+"/"+"part-")
for blob in generator:
print("\t Blob name: " + blob.name)

# Downloading the blob to a folder
for blob in generator:

# Adds blob name to the path
fname = os.path.join(LOCAL_BLOB_PATH, blob.name)
print(f'Downloading {blob.name} to {fname}')

# Downloading blob into file
blob_client = blob_service.get_blob_to_path(CONTAINER_NAME,blob.name,fname)

enter image description here

关于python-3.x - 使用 SAS token 从 Azure Blob 存储下载 csv 文件列表的 Python 代码,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/72018539/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com