feat: add force_upgrade feature to speed up infra deployments

This commit is contained in:
Pierre Mavro
2021-04-07 06:21:39 +02:00
committed by Pierre Mavro
parent 8b8166a92e
commit 0938037171
40 changed files with 211 additions and 190 deletions

View File

@@ -6,12 +6,6 @@ resource "helm_release" "alertmanager_discord" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "replicaCount"
value = "1"
@@ -43,6 +37,11 @@ resource "helm_release" "alertmanager_discord" {
value = "50Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.aws_vpc_cni,

View File

@@ -40,12 +40,6 @@ resource "helm_release" "iam_eks_user_mapper" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "aws.accessKey"
value = aws_iam_access_key.iam_eks_user_mapper.id
@@ -87,6 +81,11 @@ resource "helm_release" "iam_eks_user_mapper" {
value = "32Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.aws_vpc_cni,

View File

@@ -5,12 +5,6 @@ resource "helm_release" "aws_node_term_handler" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "nameOverride"
value = "aws-node-term-handler"
@@ -56,6 +50,11 @@ resource "helm_release" "aws_node_term_handler" {
value = "120"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.aws_vpc_cni,

View File

@@ -26,12 +26,6 @@ resource "helm_release" "aws_vpc_cni" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "image.region"
value = var.region
@@ -88,6 +82,11 @@ resource "helm_release" "aws_vpc_cni" {
value = "128Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
null_resource.delete_aws_managed_cni,

View File

@@ -6,10 +6,9 @@ resource "helm_release" "calico" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [

View File

@@ -9,11 +9,6 @@ resource "helm_release" "cert_manager" {
values = [file("chart_values/cert-manager.yaml")]
set {
name = "fake"
value = timestamp()
}
set {
name = "installCRDs"
value = "true"
@@ -103,6 +98,11 @@ resource "helm_release" "cert_manager" {
value = "1Gi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.cluster_autoscaler,
@@ -139,6 +139,11 @@ resource "helm_release" "cert_manager_config" {
value = "{{ managed_dns_domains_terraform_format }}"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
{% if external_dns_provider == "cloudflare" %}
set {
name = "provider.cloudflare.apiToken"

View File

@@ -44,12 +44,6 @@ resource "helm_release" "cluster_autoscaler" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "cloudProvider"
value = "aws"
@@ -125,6 +119,11 @@ resource "helm_release" "cluster_autoscaler" {
value = "300Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_iam_user.iam_eks_cluster_autoscaler,
aws_iam_access_key.iam_eks_cluster_autoscaler,

View File

@@ -21,12 +21,6 @@ resource "helm_release" "coredns-config" {
max_history = 50
force_update = true
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "managed_dns"
value = "{{ managed_dns_domains_terraform_format }}"
@@ -37,6 +31,11 @@ resource "helm_release" "coredns-config" {
value = "{{ managed_dns_resolvers_terraform_format }}"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
provisioner "local-exec" {
command = <<EOT
kubectl -n kube-system rollout restart deployment coredns

View File

@@ -7,12 +7,6 @@ resource "helm_release" "externaldns" {
values = [file("chart_values/external-dns.yaml")]
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "resources.limits.cpu"
value = "50m"
@@ -33,6 +27,11 @@ resource "helm_release" "externaldns" {
value = "50Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.cluster_autoscaler,

View File

@@ -105,10 +105,9 @@ resource "helm_release" "grafana" {
local.cloudflare_datasources,
]
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [

View File

@@ -76,12 +76,6 @@ resource "helm_release" "loki" {
values = [file("chart_values/loki.yaml")]
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "config.storage_config.aws.s3"
value = "s3://${urlencode(aws_iam_access_key.iam_eks_loki.id)}:${urlencode(aws_iam_access_key.iam_eks_loki.secret)}@${var.region}/${aws_s3_bucket.loki_bucket.bucket}"
@@ -127,6 +121,11 @@ resource "helm_release" "loki" {
value = "1Gi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_iam_user.iam_eks_loki,
aws_iam_access_key.iam_eks_loki,

View File

@@ -5,12 +5,6 @@ resource "helm_release" "metrics_server" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "resources.limits.cpu"
value = "250m"
@@ -31,6 +25,11 @@ resource "helm_release" "metrics_server" {
value = "256Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.aws_vpc_cni,

View File

@@ -37,12 +37,6 @@ resource "helm_release" "nginx_ingress" {
timeout = 300
values = [file("chart_values/nginx-ingress.yaml")]
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
# Controller resources
set {
name = "controller.resources.limits.cpu"
@@ -85,6 +79,11 @@ resource "helm_release" "nginx_ingress" {
value = "32Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_iam_role_policy.eks_cluster_ingress_loadbalancer_creation,
aws_eks_cluster.eks_cluster,

View File

@@ -34,12 +34,6 @@ resource "helm_release" "pleco" {
values = [local.pleco_config]
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "environmentVariables.AWS_ACCESS_KEY_ID"
value = "{{ aws_access_key }}"
@@ -55,6 +49,11 @@ resource "helm_release" "pleco" {
value = "debug"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.aws_vpc_cni,

View File

@@ -5,12 +5,6 @@ resource "helm_release" "prometheus-adapter" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "metricsRelistInterval"
value = "30s"
@@ -53,6 +47,11 @@ resource "helm_release" "prometheus-adapter" {
value = "128Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.aws_vpc_cni,

View File

@@ -6,12 +6,6 @@ resource "helm_release" "promtail" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "loki.serviceName"
value = "loki"
@@ -44,6 +38,11 @@ resource "helm_release" "promtail" {
value = "128Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.aws_vpc_cni,

View File

@@ -5,10 +5,9 @@ resource "helm_release" "q_storageclass" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [

View File

@@ -21,12 +21,6 @@ resource "helm_release" "qovery_agent_resources" {
force_update = true
recreate_pods = true
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "image.tag"
value = data.external.get_agent_version_to_use.result.version
@@ -103,6 +97,11 @@ resource "helm_release" "qovery_agent_resources" {
value = "500Mi"
}
set {
name = "forced_upgrade"
value = timestamp()
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.aws_vpc_cni,

View File

@@ -13,12 +13,6 @@ resource "helm_release" "qovery_engine_resources" {
timeout = 600
recreate_pods = true
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
// need kubernetes 1.18, should be well tested before activating it
set {
name = "autoscaler.enabled"
@@ -117,6 +111,11 @@ resource "helm_release" "qovery_engine_resources" {
value = "4Gi"
}
set {
name = "forced_upgrade"
value = timestamp()
}
depends_on = [
aws_eks_cluster.eks_cluster,
helm_release.aws_vpc_cni,

View File

@@ -274,6 +274,13 @@ variable "discord_api_key" {
type = string
}
# Force helm upgrade
variable "forced_upgrade" {
description = "Force upgrade"
default = {% if force_upgrade %}timestamp(){% else %}"false"{% endif %}
type = string
}
{%- if resource_expiration_in_seconds is defined %}
# Pleco ttl
variable "resource_expiration_in_seconds" {
@@ -281,4 +288,4 @@ variable "resource_expiration_in_seconds" {
default = {{ resource_expiration_in_seconds }}
type = number
}
{% endif %}
{% endif %}

View File

@@ -6,12 +6,6 @@ resource "helm_release" "alertmanager_discord" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "replicaCount"
value = "1"
@@ -43,6 +37,11 @@ resource "helm_release" "alertmanager_discord" {
value = "50Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster,
helm_release.prometheus_operator,

View File

@@ -8,11 +8,6 @@ resource "helm_release" "cert_manager" {
values = [file("chart_values/cert-manager.yaml")]
set {
name = "fake"
value = timestamp()
}
set {
name = "installCRDs"
value = "true"
@@ -102,6 +97,11 @@ resource "helm_release" "cert_manager" {
value = "1Gi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster,
helm_release.prometheus_operator,
@@ -137,6 +137,11 @@ resource "helm_release" "cert_manager_config" {
value = "{{ managed_dns_domains_terraform_format }}"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
{% if external_dns_provider == "cloudflare" %}
set {
name = "provider.cloudflare.apiToken"

View File

@@ -22,12 +22,6 @@ resource "helm_release" "coredns-config" {
max_history = 50
force_update = true
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "managed_dns"
value = "{{ managed_dns_domains_terraform_format }}"
@@ -38,6 +32,11 @@ resource "helm_release" "coredns-config" {
value = "{{ managed_dns_resolvers_terraform_format }}"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
provisioner "local-exec" {
command = <<EOT
kubectl -n kube-system rollout restart deployment coredns

View File

@@ -7,12 +7,6 @@ resource "helm_release" "externaldns" {
values = [file("chart_values/external-dns.yaml")]
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "resources.limits.cpu"
value = "50m"
@@ -33,6 +27,11 @@ resource "helm_release" "externaldns" {
value = "50Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster
]

View File

@@ -32,10 +32,9 @@ resource "helm_release" "grafana" {
local.cloudflare_datasources,
]
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [

View File

@@ -6,12 +6,6 @@ resource "helm_release" "k8s_token_rotate" {
max_history = 50
force_update = true
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "environmentVariables.DO_API_TOKEN"
value = "{{ digitalocean_token }}"
@@ -47,6 +41,11 @@ resource "helm_release" "k8s_token_rotate" {
value = digitalocean_kubernetes_cluster.kubernetes_cluster.id
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster
]

View File

@@ -14,12 +14,6 @@ resource "helm_release" "loki" {
values = [file("chart_values/loki.yaml")]
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "config.storage_config.aws.endpoint"
value = "${var.region}.digitaloceanspaces.com"
@@ -66,6 +60,11 @@ resource "helm_release" "loki" {
value = "1Gi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_spaces_bucket.loki_space,
digitalocean_kubernetes_cluster.kubernetes_cluster,

View File

@@ -5,12 +5,6 @@ resource "helm_release" "metrics_server" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "resources.limits.cpu"
value = "250m"
@@ -31,6 +25,11 @@ resource "helm_release" "metrics_server" {
value = "256Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster,
helm_release.q_storageclass,

View File

@@ -10,12 +10,6 @@ resource "helm_release" "nginx_ingress" {
timeout = 300
values = [file("chart_values/nginx-ingress.yaml")]
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
# Controller resources
set {
name = "controller.resources.limits.cpu"
@@ -58,6 +52,11 @@ resource "helm_release" "nginx_ingress" {
value = "32Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster,
]

View File

@@ -7,12 +7,6 @@ resource "helm_release" "pleco" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "enabledFeatures.disableDryRun"
value = "true"
@@ -23,6 +17,11 @@ resource "helm_release" "pleco" {
value = "debug"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster
]

View File

@@ -5,12 +5,6 @@ resource "helm_release" "prometheus-adapter" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "metricsRelistInterval"
value = "30s"
@@ -53,6 +47,11 @@ resource "helm_release" "prometheus-adapter" {
value = "128Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster,
helm_release.prometheus_operator,

View File

@@ -6,12 +6,6 @@ resource "helm_release" "promtail" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "loki.serviceName"
value = "loki"
@@ -45,6 +39,11 @@ resource "helm_release" "promtail" {
value = "128Mi"
}
set {
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster,
]

View File

@@ -5,10 +5,9 @@ resource "helm_release" "q_storageclass" {
atomic = true
max_history = 50
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
name = "forced_upgrade"
value = var.forced_upgrade
}
depends_on = [

View File

@@ -21,12 +21,6 @@ resource "helm_release" "qovery_agent_resources" {
force_update = true
recreate_pods = true
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
set {
name = "image.tag"
value = data.external.get_agent_version_to_use.result.version
@@ -103,6 +97,11 @@ resource "helm_release" "qovery_agent_resources" {
value = "500Mi"
}
set {
name = "forced_upgrade"
value = timestamp()
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster
]

View File

@@ -13,12 +13,6 @@ resource "helm_release" "qovery_engine_resources" {
timeout = 600
recreate_pods = true
// make a fake arg to avoid TF to validate update on failure because of the atomic option
set {
name = "fake"
value = timestamp()
}
// need kubernetes 1.18, should be well tested before activating it
set {
name = "autoscaler.enabled"
@@ -117,6 +111,11 @@ resource "helm_release" "qovery_engine_resources" {
value = "4Gi"
}
set {
name = "forced_upgrade"
value = timestamp()
}
depends_on = [
digitalocean_kubernetes_cluster.kubernetes_cluster,
helm_release.prometheus-adapter,

View File

@@ -150,3 +150,10 @@ variable "discord_api_key" {
default = "{{ discord_api_key }}"
type = string
}
# Force helm upgrade
variable "forced_upgrade" {
description = "Force upgrade"
default = {% if force_upgrade %}timestamp(){% else %}"false"{% endif %}
type = string
}

View File

@@ -219,6 +219,7 @@ impl<'a> EKS<'a> {
&self.context.resource_expiration_in_seconds(),
)
}
context.insert("force_upgrade", &self.context.requires_forced_upgrade());
// DNS configuration
context.insert("managed_dns", &managed_dns_list);

View File

@@ -1006,6 +1006,16 @@ impl Context {
}
}
pub fn requires_forced_upgrade(&self) -> bool {
match &self.metadata {
Some(meta) => match meta.forced_upgrade {
Some(true) => true,
_ => false,
},
_ => false,
}
}
pub fn is_test_cluster(&self) -> bool {
self.test_cluster
}
@@ -1038,6 +1048,7 @@ pub struct Metadata {
pub dry_run_deploy: Option<bool>,
pub resource_expiration_in_seconds: Option<u32>,
pub docker_build_options: Option<String>,
pub forced_upgrade: Option<bool>,
}
impl Metadata {
@@ -1045,11 +1056,13 @@ impl Metadata {
dry_run_deploy: Option<bool>,
resource_expiration_in_seconds: Option<u32>,
docker_build_options: Option<String>,
forced_upgrade: Option<bool>,
) -> Self {
Metadata {
dry_run_deploy,
resource_expiration_in_seconds,
docker_build_options,
forced_upgrade,
}
}
}

View File

@@ -49,6 +49,12 @@ pub fn context() -> Context {
}
},
docker_build_options: Some("--network host".to_string()),
forced_upgrade: Option::from({
match env::var_os("forced_upgrade") {
Some(_) => true,
None => false,
}
}),
};
Context::new(execution_id, home_dir, lib_root_dir, true, None, Option::from(metadata))

View File

@@ -25,9 +25,18 @@ Others option will also be necessary and can be found in the `FuncTestsSecrets`
* VAULT_TOKEN=<vault_token>
### TTL
By default all deployed tests resources are tagged with a TTL, to be automatically cleaned with [Pleco](https://github.com/Qovery/pleco) if a test fail for some reasons.
By default, all deployed tests resources are tagged with a TTL, to be automatically cleaned with [Pleco](https://github.com/Qovery/pleco) if a test fail for some reasons.
This ttl is set by default to 1h, but you can override it with a `ttl` environment variable in seconds like: `ttl=7200`.
### Terraform dry run
If you just want to render Terraform without applying changes, you can set `dry_run_deploy` environment variable to anything to enable it like `dry_run_deploy=true`.
If you just want to render Terraform without applying changes, you can set `dry_run_deploy` environment variable to anything to enable it like `dry_run_deploy=true`.
### Forced upgrade
By default, helm charts are applied only when they do not exist or when they receive an update.
During chart upgrade or atomic rollback, Terraform is not able to catch those changes and requires an upgrade.
In order to perform it, you need the variable `forced_upgrade` to `true` to ensure everything is up to date.
The advantage of having it set to `false` by default, is the deployment speed as only helm changes are applied, the drawback is you can't
be 100% sure of what you've deployed is what you asked for on your infra.