gpt4 book ai didi

apache-spark - 如何使用 Terraform 部署 EMR Terraform,一个开箱即用的简单工作示例

转载 作者:行者123 更新时间:2023-12-05 02:02:16 24 4
gpt4 key购买 nike

我正在使用地形 v0.14.5并尝试官方Terraform example具有指定的版本控制:

terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "3.25.0"
}
}
}

provider "aws" {
region = var.region
}

在“解决”了几个明显的bug之后

更改:从 allow_all 到 allow_access(安全组的名称)

aws_security_group.allow_access.id

更改:从 allow_all 到 allow_access(安全组的名称)

使 cidr_blocks 成为一个列表

更新emr版本

aws_security_group.allow_access.id

cidr_blocks = [aws_vpc.main.cidr_block]

release_label = "emr-6.2.0"

我设法启动和计划但未能申请

Error: Error waiting for EMR Cluster state to be "WAITING" or "RUNNING": TERMINATING: BOOTSTRAP_FAILURE: Master instance (i-07e34ac1b04ebde01) failed attempting to download bootstrap action 1 file from S3

错误似乎来自:

  bootstrap_action {
path = "s3://elasticmapreduce/bootstrap-actions/run-if"
name = "runif"
args = ["instance.isMaster=true", "echo running on master node"]
}

所以我下载了文件

aws s3 cp s3://elasticmapreduce/bootstrap-actions/run-if .

并在本地添加:


bootstrap_action {
path = "file://${path.module}/run-if"
// path = "s3://elasticmapreduce/bootstrap-actions/run-if"

name = "runif"
args = ["instance.isMaster=true", "echo running on master node"]
}

这是完整的代码:


terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "3.25.0"
}
}
}

provider "aws" {
region = var.region
}

resource "aws_emr_cluster" "cluster" {
name = "emr-test-arn"
release_label = "emr-6.2.0"
applications = ["Spark", "Zeppelin"]

ec2_attributes {
subnet_id = aws_subnet.main.id
emr_managed_master_security_group = aws_security_group.allow_access.id
emr_managed_slave_security_group = aws_security_group.allow_access.id
instance_profile = aws_iam_instance_profile.emr_profile.arn
}

master_instance_group {
instance_type = "m5.xlarge"
}

core_instance_group {
instance_count = 1
instance_type = "m5.xlarge"
}

tags = {
role = "rolename"
dns_zone = "env_zone"
env = "env"
name = "name-env"
}

bootstrap_action {
// path = "s3://elasticmapreduce/bootstrap-actions/run-if"
path = "file://${path.module}/run-if"
name = "runif"
args = ["instance.isMaster=true", "echo running on master node"]
}

configurations_json = <<EOF
[
{
"Classification": "hadoop-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
},
{
"Classification": "spark-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
}
]
EOF

service_role = aws_iam_role.iam_emr_service_role.arn
}

resource "aws_security_group" "allow_access" {
name = "allow_access"
description = "Allow inbound traffic"
vpc_id = aws_vpc.main.id

ingress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = [aws_vpc.main.cidr_block]
}

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}

depends_on = [aws_subnet.main]

lifecycle {
ignore_changes = [
ingress,
egress,
]
}

tags = {
name = "emr_test"
}
}

resource "aws_vpc" "main" {
cidr_block = "168.31.0.0/16"
enable_dns_hostnames = true

tags = {
name = "emr_test"
}
}

resource "aws_subnet" "main" {
vpc_id = aws_vpc.main.id
cidr_block = "168.31.0.0/20"

tags = {
name = "emr_test"
}
}

resource "aws_internet_gateway" "gw" {
vpc_id = aws_vpc.main.id
}

resource "aws_route_table" "r" {
vpc_id = aws_vpc.main.id

route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.gw.id
}
}

resource "aws_main_route_table_association" "a" {
vpc_id = aws_vpc.main.id
route_table_id = aws_route_table.r.id
}

###

# IAM Role setups

###

# IAM role for EMR Service
resource "aws_iam_role" "iam_emr_service_role" {
name = "iam_emr_service_role"

assume_role_policy = <<EOF
{
"Version": "2008-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "elasticmapreduce.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
EOF
}

resource "aws_iam_role_policy" "iam_emr_service_policy" {
name = "iam_emr_service_policy"
role = aws_iam_role.iam_emr_service_role.id

policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Resource": "*",
"Action": [
"ec2:AuthorizeSecurityGroupEgress",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:CancelSpotInstanceRequests",
"ec2:CreateNetworkInterface",
"ec2:CreateSecurityGroup",
"ec2:CreateTags",
"ec2:DeleteNetworkInterface",
"ec2:DeleteSecurityGroup",
"ec2:DeleteTags",
"ec2:DescribeAvailabilityZones",
"ec2:DescribeAccountAttributes",
"ec2:DescribeDhcpOptions",
"ec2:DescribeInstanceStatus",
"ec2:DescribeInstances",
"ec2:DescribeKeyPairs",
"ec2:DescribeNetworkAcls",
"ec2:DescribeNetworkInterfaces",
"ec2:DescribePrefixLists",
"ec2:DescribeRouteTables",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSpotInstanceRequests",
"ec2:DescribeSpotPriceHistory",
"ec2:DescribeSubnets",
"ec2:DescribeVpcAttribute",
"ec2:DescribeVpcEndpoints",
"ec2:DescribeVpcEndpointServices",
"ec2:DescribeVpcs",
"ec2:DetachNetworkInterface",
"ec2:ModifyImageAttribute",
"ec2:ModifyInstanceAttribute",
"ec2:RequestSpotInstances",
"ec2:RevokeSecurityGroupEgress",
"ec2:RunInstances",
"ec2:TerminateInstances",
"ec2:DeleteVolume",
"ec2:DescribeVolumeStatus",
"ec2:DescribeVolumes",
"ec2:DetachVolume",
"iam:GetRole",
"iam:GetRolePolicy",
"iam:ListInstanceProfiles",
"iam:ListRolePolicies",
"iam:PassRole",
"s3:CreateBucket",
"s3:Get*",
"s3:List*",
"sdb:BatchPutAttributes",
"sdb:Select",
"sqs:CreateQueue",
"sqs:Delete*",
"sqs:GetQueue*",
"sqs:PurgeQueue",
"sqs:ReceiveMessage"
]
}]
}
EOF
}

# IAM Role for EC2 Instance Profile
resource "aws_iam_role" "iam_emr_profile_role" {
name = "iam_emr_profile_role"

assume_role_policy = <<EOF
{
"Version": "2008-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
EOF
}

resource "aws_iam_instance_profile" "emr_profile" {
name = "emr_profile"
role = aws_iam_role.iam_emr_profile_role.name
}

resource "aws_iam_role_policy" "iam_emr_profile_policy" {
name = "iam_emr_profile_policy"
role = aws_iam_role.iam_emr_profile_role.id

policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Resource": "*",
"Action": [
"cloudwatch:*",
"dynamodb:*",
"ec2:Describe*",
"elasticmapreduce:Describe*",
"elasticmapreduce:ListBootstrapActions",
"elasticmapreduce:ListClusters",
"elasticmapreduce:ListInstanceGroups",
"elasticmapreduce:ListInstances",
"elasticmapreduce:ListSteps",
"kinesis:CreateStream",
"kinesis:DeleteStream",
"kinesis:DescribeStream",
"kinesis:GetRecords",
"kinesis:GetShardIterator",
"kinesis:MergeShards",
"kinesis:PutRecord",
"kinesis:SplitShard",
"rds:Describe*",
"s3:*",
"sdb:*",
"sns:*",
"sqs:*"
]
}]
}
EOF
}

谁能推荐一个工作示例或帮助解决 VPC 错误?

最佳答案

原来我有一个新版本的 terraform CLI,即 14.5,它不适用于网络上的大多数示例。因此,对于未入门者(对版本没有特殊知识的人......),我添加了一个关于如何启动和运行集群的特定“recepie”。

我用了https://github.com/cloudposse/terraform-aws-emr-cluster.git因为它最先出现并被持续维护。请记住,它使用了许多远程模块,尽管它们在 Github 上,但它们具有版本依赖性,维护它们本身就是一个挑战。还要记住,就我而言,这只是一个“Hello World”。

  • 安装 tfswitch。在 Mac 上:
brew install tfswitch

tfswitch 0.13.5

  • 克隆 repo,cd inside 和 init。我使用了这个特定的提交:ed81e4259ae66178e6cbb7dcea75596f1701fe61,所以如果你需要检查它,你可以有一个理智的起点。
git clone https://github.com/cloudposse/terraform-aws-emr-cluster.git
cd /terraform-aws-emr-cluster/examples/complete/
terraform init

这将从 Github 下载源代码

  • 通过复制和编辑文件配置变量:
cp fixtures.us-east-2.tfvars terraform.tfvars
  • 创建一个 secrets 目录并确保路径配置到它
mkdir <path of your choice>secrets

ssh_public_key_path = <path of your choice>secrets
  • 配置 EMR 集群:
terraform plan
terraform apply -auto-approve

这应该会生成一个 EMR 集群。

附言

我想要的只是一个 POC 来测试架构解决方案。过去需要我 20 分钟的事情对于外行来说变得非常复杂和具有挑战性。 DevOps Babylon Tower 和安全性的特殊性似乎损害了基础设施即代码、声明性代码、简单性、干净代码和简单健全性的原则。

关于apache-spark - 如何使用 Terraform 部署 EMR Terraform,一个开箱即用的简单工作示例,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/65943872/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com