gpt4 book ai didi

azure-data-factory - Azure 数据工厂将数据流映射到 CSV 接收器导致零字节文件

转载 作者:行者123 更新时间:2023-12-03 22:30:09 25 4
gpt4 key购买 nike

我正在提高我的 Azure 数据工厂能力,比较复制 liveness 能与映射数据流写入 Azure Blob 存储中的单个 CSV 文件。

当我通过数据集 (azureBlobSingleCSVFileNameDataset) 通过 Azure Blob 存储链接服务 (azureBlobLinkedService) 写入单个 CSV 时,使用复制事件在我期望的 blob 存储容器中获取输出。例如,文件夹/output/csv/singleFiles 下的容器MyContainer 中的MyData.csv 的输出文件。

当我通过相同的 Blob 存储链接服务但通过不同的数据集 (azureBlobSingleCSVNoFileNameDataset) 写入单个 CSV 时,使用映射数据流,我得到以下信息:

  • MyContainer/output/csv/singleFiles(零长度文件)
  • MyContainer/output/csv/singleFiles/MyData.csv(包含我期望的数据)

  • 我不明白为什么在使用映射数据流时会生成零长度文件。

    这是我的源文件:

    链接服务/azureBlobLinkedService
    {
    "name": "azureBlobLinkedService",
    "type": "Microsoft.DataFactory/factories/linkedservices",
    "properties": {
    "type": "AzureBlobStorage",
    "parameters": {
    "azureBlobConnectionStringSecretName": {
    "type": "string"
    }
    },
    "annotations": [],
    "typeProperties": {
    "connectionString": {
    "type": "AzureKeyVaultSecret",
    "store": {
    "referenceName": "AzureKeyVaultLinkedService",
    "type": "LinkedServiceReference"
    },
    "secretName": "@{linkedService().azureBlobConnectionStringSecretName}"
    }
    }
    }
    }

    数据集/azureBlobSingleCSVFileNameDataset
    {
    "name": "azureBlobSingleCSVFileNameDataset",
    "properties": {
    "linkedServiceName": {
    "referenceName": "azureBlobLinkedService",
    "type": "LinkedServiceReference",
    "parameters": {
    "azureBlobConnectionStringSecretName": {
    "value": "@dataset().azureBlobConnectionStringSecretName",
    "type": "Expression"
    }
    }
    },
    "parameters": {
    "azureBlobConnectionStringSecretName": {
    "type": "string"
    },
    "azureBlobSingleCSVFileName": {
    "type": "string"
    },
    "azureBlobSingleCSVFolderPath": {
    "type": "string"
    },
    "azureBlobSingleCSVContainerName": {
    "type": "string"
    }
    },
    "annotations": [],
    "type": "DelimitedText",
    "typeProperties": {
    "location": {
    "type": "AzureBlobStorageLocation",
    "fileName": {
    "value": "@dataset().azureBlobSingleCSVFileName",
    "type": "Expression"
    },
    "folderPath": {
    "value": "@dataset().azureBlobSingleCSVFolderPath",
    "type": "Expression"
    },
    "container": {
    "value": "@dataset().azureBlobSingleCSVContainerName",
    "type": "Expression"
    }
    },
    "columnDelimiter": ",",
    "escapeChar": "\\",
    "firstRowAsHeader": true,
    "quoteChar": "\""
    },
    "schema": []
    },
    "type": "Microsoft.DataFactory/factories/datasets"
    }

    管道/Azure SQL 表到 Blob 单个 CSV 复制管道(这会产生预期的结果)
    {
    "name": "Azure SQL Table to Blob Single CSV Copy Pipeline",
    "properties": {
    "activities": [
    {
    "name": "Copy Azure SQL Table to Blob Single CSV",
    "type": "Copy",
    "dependsOn": [],
    "policy": {
    "timeout": "7.00:00:00",
    "retry": 0,
    "retryIntervalInSeconds": 30,
    "secureOutput": false,
    "secureInput": false
    },
    "userProperties": [],
    "typeProperties": {
    "source": {
    "type": "AzureSqlSource",
    "queryTimeout": "02:00:00"
    },
    "sink": {
    "type": "DelimitedTextSink",
    "storeSettings": {
    "type": "AzureBlobStorageWriteSettings"
    },
    "formatSettings": {
    "type": "DelimitedTextWriteSettings",
    "quoteAllText": true,
    "fileExtension": ".csv"
    }
    },
    "enableStaging": false
    },
    "inputs": [
    {
    "referenceName": "azureSqlDatabaseTableDataset",
    "type": "DatasetReference",
    "parameters": {
    "azureSqlDatabaseConnectionStringSecretName": {
    "value": "@pipeline().parameters.sourceAzureSqlDatabaseConnectionStringSecretName",
    "type": "Expression"
    },
    "azureSqlDatabaseTableSchemaName": {
    "value": "@pipeline().parameters.sourceAzureSqlDatabaseTableSchemaName",
    "type": "Expression"
    },
    "azureSqlDatabaseTableTableName": {
    "value": "@pipeline().parameters.sourceAzureSqlDatabaseTableTableName",
    "type": "Expression"
    }
    }
    }
    ],
    "outputs": [
    {
    "referenceName": "azureBlobSingleCSVFileNameDataset",
    "type": "DatasetReference",
    "parameters": {
    "azureBlobConnectionStringSecretName": {
    "value": "@pipeline().parameters.sinkAzureBlobConnectionStringSecretName",
    "type": "Expression"
    },
    "azureBlobSingleCSVFileName": {
    "value": "@pipeline().parameters.sinkAzureBlobSingleCSVFileName",
    "type": "Expression"
    },
    "azureBlobSingleCSVFolderPath": {
    "value": "@pipeline().parameters.sinkAzureBlobSingleCSVFolderPath",
    "type": "Expression"
    },
    "azureBlobSingleCSVContainerName": {
    "value": "@pipeline().parameters.sinkAzureBlobSingleCSVContainerName",
    "type": "Expression"
    }
    }
    }
    ]
    }
    ],
    "parameters": {
    "sourceAzureSqlDatabaseConnectionStringSecretName": {
    "type": "string"
    },
    "sourceAzureSqlDatabaseTableSchemaName": {
    "type": "string"
    },
    "sourceAzureSqlDatabaseTableTableName": {
    "type": "string"
    },
    "sinkAzureBlobConnectionStringSecretName": {
    "type": "string"
    },
    "sinkAzureBlobSingleCSVContainerName": {
    "type": "string"
    },
    "sinkAzureBlobSingleCSVFolderPath": {
    "type": "string"
    },
    "sinkAzureBlobSingleCSVFileName": {
    "type": "string"
    }
    },
    "annotations": []
    },
    "type": "Microsoft.DataFactory/factories/pipelines"
    }

    dataset/azureBlobSingleCSVNoFileNameDataset:(映射数据流需要的数据集中无文件名,映射数据流中设置)
    {
    "name": "azureBlobSingleCSVNoFileNameDataset",
    "properties": {
    "linkedServiceName": {
    "referenceName": "azureBlobLinkedService",
    "type": "LinkedServiceReference",
    "parameters": {
    "azureBlobConnectionStringSecretName": {
    "value": "@dataset().azureBlobConnectionStringSecretName",
    "type": "Expression"
    }
    }
    },
    "parameters": {
    "azureBlobConnectionStringSecretName": {
    "type": "string"
    },
    "azureBlobSingleCSVFolderPath": {
    "type": "string"
    },
    "azureBlobSingleCSVContainerName": {
    "type": "string"
    }
    },
    "annotations": [],
    "type": "DelimitedText",
    "typeProperties": {
    "location": {
    "type": "AzureBlobStorageLocation",
    "folderPath": {
    "value": "@dataset().azureBlobSingleCSVFolderPath",
    "type": "Expression"
    },
    "container": {
    "value": "@dataset().azureBlobSingleCSVContainerName",
    "type": "Expression"
    }
    },
    "columnDelimiter": ",",
    "escapeChar": "\\",
    "firstRowAsHeader": true,
    "quoteChar": "\""
    },
    "schema": []
    },
    "type": "Microsoft.DataFactory/factories/datasets"
    }

    数据流/azureSqlDatabaseTableToAzureBlobSingleCSVDataFlow
    {
    "name": "azureSqlDatabaseTableToAzureBlobSingleCSVDataFlow",
    "properties": {
    "type": "MappingDataFlow",
    "typeProperties": {
    "sources": [
    {
    "dataset": {
    "referenceName": "azureSqlDatabaseTableDataset",
    "type": "DatasetReference"
    },
    "name": "readFromAzureSqlDatabase"
    }
    ],
    "sinks": [
    {
    "dataset": {
    "referenceName": "azureBlobSingleCSVNoFileNameDataset",
    "type": "DatasetReference"
    },
    "name": "writeToAzureBlobSingleCSV"
    }
    ],
    "transformations": [
    {
    "name": "enrichWithRuntimeMetadata"
    }
    ],
    "script": "\nparameters{\n\tsourceConnectionSecretName as string,\n\tsinkConnectionStringSecretName as string,\n\tsourceObjectName as string,\n\tsinkObjectName as string,\n\tdataFactoryName as string,\n\tdataFactoryPipelineName as string,\n\tdataFactoryPipelineRunId as string,\n\tsinkFileNameNoPath as string\n}\nsource(allowSchemaDrift: true,\n\tvalidateSchema: false,\n\tisolationLevel: 'READ_UNCOMMITTED',\n\tformat: 'table') ~> readFromAzureSqlDatabase\nreadFromAzureSqlDatabase derive({__sourceConnectionStringSecretName} = $sourceConnectionSecretName,\n\t\t{__sinkConnectionStringSecretName} = $sinkConnectionStringSecretName,\n\t\t{__sourceObjectName} = $sourceObjectName,\n\t\t{__sinkObjectName} = $sinkObjectName,\n\t\t{__dataFactoryName} = $dataFactoryName,\n\t\t{__dataFactoryPipelineName} = $dataFactoryPipelineName,\n\t\t{__dataFactoryPipelineRunId} = $dataFactoryPipelineRunId) ~> enrichWithRuntimeMetadata\nenrichWithRuntimeMetadata sink(allowSchemaDrift: true,\n\tvalidateSchema: false,\n\tpartitionFileNames:[($sinkFileNameNoPath)],\n\tpartitionBy('hash', 1),\n\tquoteAll: true) ~> writeToAzureBlobSingleCSV"
    }
    }
    }

    管道/Azure SQL 表到 Blob 单个 CSV 数据流管道(这会产生预期的结果,加上文件夹路径中的零字节文件。)
    {
    "name": "Azure SQL Table to Blob Single CSV Data Flow Pipeline",
    "properties": {
    "activities": [
    {
    "name": "Copy Sql Database Table To Blob Single CSV Data Flow",
    "type": "ExecuteDataFlow",
    "dependsOn": [],
    "policy": {
    "timeout": "7.00:00:00",
    "retry": 0,
    "retryIntervalInSeconds": 30,
    "secureOutput": false,
    "secureInput": false
    },
    "userProperties": [],
    "typeProperties": {
    "dataflow": {
    "referenceName": "azureSqlDatabaseTableToAzureBlobSingleCSVDataFlow",
    "type": "DataFlowReference",
    "parameters": {
    "sourceConnectionSecretName": {
    "value": "'@{pipeline().parameters.sourceAzureSqlDatabaseConnectionStringSecretName}'",
    "type": "Expression"
    },
    "sinkConnectionStringSecretName": {
    "value": "'@{pipeline().parameters.sinkAzureBlobConnectionStringSecretName}'",
    "type": "Expression"
    },
    "sourceObjectName": {
    "value": "'@{concat('[', pipeline().parameters.sourceAzureSqlDatabaseTableSchemaName, '].[', pipeline().parameters.sourceAzureSqlDatabaseTableTableName, ']')}'",
    "type": "Expression"
    },
    "sinkObjectName": {
    "value": "'@{concat(pipeline().parameters.sinkAzureBlobSingleCSVContainerName, '/', pipeline().parameters.sinkAzureBlobSingleCSVFolderPath, '/', \npipeline().parameters.sinkAzureBlobSingleCSVFileName)}'",
    "type": "Expression"
    },
    "dataFactoryName": {
    "value": "'@{pipeline().DataFactory}'",
    "type": "Expression"
    },
    "dataFactoryPipelineName": {
    "value": "'@{pipeline().Pipeline}'",
    "type": "Expression"
    },
    "dataFactoryPipelineRunId": {
    "value": "'@{pipeline().RunId}'",
    "type": "Expression"
    },
    "sinkFileNameNoPath": {
    "value": "'@{pipeline().parameters.sinkAzureBlobSingleCSVFileName}'",
    "type": "Expression"
    }
    },
    "datasetParameters": {
    "readFromAzureSqlDatabase": {
    "azureSqlDatabaseConnectionStringSecretName": {
    "value": "@pipeline().parameters.sourceAzureSqlDatabaseConnectionStringSecretName",
    "type": "Expression"
    },
    "azureSqlDatabaseTableSchemaName": {
    "value": "@pipeline().parameters.sourceAzureSqlDatabaseTableSchemaName",
    "type": "Expression"
    },
    "azureSqlDatabaseTableTableName": {
    "value": "@pipeline().parameters.sourceAzureSqlDatabaseTableTableName",
    "type": "Expression"
    }
    },
    "writeToAzureBlobSingleCSV": {
    "azureBlobConnectionStringSecretName": {
    "value": "@pipeline().parameters.sinkAzureBlobConnectionStringSecretName",
    "type": "Expression"
    },
    "azureBlobSingleCSVFolderPath": {
    "value": "@pipeline().parameters.sinkAzureBlobSingleCSVFolderPath",
    "type": "Expression"
    },
    "azureBlobSingleCSVContainerName": {
    "value": "@pipeline().parameters.sinkAzureBlobSingleCSVContainerName",
    "type": "Expression"
    }
    }
    }
    },
    "compute": {
    "coreCount": 8,
    "computeType": "General"
    }
    }
    }
    ],
    "parameters": {
    "sourceAzureSqlDatabaseConnectionStringSecretName": {
    "type": "string"
    },
    "sourceAzureSqlDatabaseTableSchemaName": {
    "type": "string"
    },
    "sourceAzureSqlDatabaseTableTableName": {
    "type": "string"
    },
    "sinkAzureBlobConnectionStringSecretName": {
    "type": "string"
    },
    "sinkAzureBlobSingleCSVContainerName": {
    "type": "string"
    },
    "sinkAzureBlobSingleCSVFolderPath": {
    "type": "string"
    },
    "sinkAzureBlobSingleCSVFileName": {
    "type": "string"
    }
    },
    "annotations": []
    },
    "type": "Microsoft.DataFactory/factories/pipelines"
    }

    最佳答案

    获取 0 长度(字节)文件的原因意味着虽然您的管道可能已成功运行,但它没有返回或产生任何输出。

    更好的技术之一是预览每个阶段的输出,以确保每个阶段都有预期的输出。

    关于azure-data-factory - Azure 数据工厂将数据流映射到 CSV 接收器导致零字节文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59457647/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com