diff --git a/.gitignore b/.gitignore index a1913e40..7ddac5f2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ **/target *.iml +*.orig .idea .qovery-workspace .terraform/ diff --git a/Cargo.toml b/Cargo.toml index b4bf09ce..2154f995 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -106,3 +106,6 @@ test-all-whole-enchilada = ["test-aws-whole-enchilada", "test-do-whole-enchilada test-aws-all = ["test-aws-infra", "test-aws-managed-services", "test-aws-self-hosted", "test-aws-whole-enchilada"] test-do-all = ["test-do-infra", "test-do-managed-services", "test-do-self-hosted", "test-do-whole-enchilada"] test-scw-all = ["test-scw-infra", "test-scw-managed-services", "test-scw-self-hosted", "test-scw-whole-enchilada"] + +# functionnal test with only a k8s cluster as a dependency +test-with-kube = [] diff --git a/lib/common/bootstrap/charts/ingress-nginx/values.yaml b/lib/common/bootstrap/charts/ingress-nginx/values.yaml index f5496eb6..0da4ff98 100644 --- a/lib/common/bootstrap/charts/ingress-nginx/values.yaml +++ b/lib/common/bootstrap/charts/ingress-nginx/values.yaml @@ -505,7 +505,7 @@ controller: admissionWebhooks: annotations: {} - enabled: true + enabled: false failurePolicy: Fail # timeoutSeconds: 10 port: 8443 diff --git a/lib/common/bootstrap/charts/kube-prometheus-stack/values.yaml b/lib/common/bootstrap/charts/kube-prometheus-stack/values.yaml index 0b2a607b..07483cf9 100644 --- a/lib/common/bootstrap/charts/kube-prometheus-stack/values.yaml +++ b/lib/common/bootstrap/charts/kube-prometheus-stack/values.yaml @@ -1342,7 +1342,7 @@ prometheusOperator: ## rules from making their way into prometheus and potentially preventing the container from starting admissionWebhooks: failurePolicy: Fail - enabled: true + enabled: false ## A PEM encoded CA bundle which will be used to validate the webhook's server certificate. ## If unspecified, system trust roots on the apiserver are used. caBundle: "" @@ -1377,7 +1377,7 @@ prometheusOperator: # Use certmanager to generate webhook certs certManager: - enabled: false + enabled: true # issuerRef: # name: "issuer" # kind: "ClusterIssuer" diff --git a/lib/common/bootstrap/charts/pleco/Chart.yaml b/lib/common/bootstrap/charts/pleco/Chart.yaml index 3c601188..224f4ebd 100644 --- a/lib/common/bootstrap/charts/pleco/Chart.yaml +++ b/lib/common/bootstrap/charts/pleco/Chart.yaml @@ -1,9 +1,9 @@ apiVersion: v2 -appVersion: 0.10.1 +appVersion: 0.10.4 description: Automatically removes Cloud managed services and Kubernetes resources based on tags with TTL home: https://github.com/Qovery/pleco icon: https://github.com/Qovery/pleco/raw/main/assets/pleco_logo.png name: pleco type: application -version: 0.10.1 +version: 0.10.4 diff --git a/lib/common/bootstrap/charts/pleco/templates/deployment.yaml b/lib/common/bootstrap/charts/pleco/templates/deployment.yaml index 87854543..89f3b959 100644 --- a/lib/common/bootstrap/charts/pleco/templates/deployment.yaml +++ b/lib/common/bootstrap/charts/pleco/templates/deployment.yaml @@ -148,8 +148,6 @@ spec: {{ end }} {{- end }} env: - - name: "AWS_EXECUTION_ENV" - value: "pleco_{{ .Values.image.plecoImageTag }}_{{ .Values.environmentVariables.PLECO_IDENTIFIER }}" {{ range $key, $value := .Values.environmentVariables -}} - name: "{{ $key }}" valueFrom: diff --git a/lib/common/bootstrap/charts/pleco/values.yaml b/lib/common/bootstrap/charts/pleco/values.yaml index 480879c0..09b4e135 100644 --- a/lib/common/bootstrap/charts/pleco/values.yaml +++ b/lib/common/bootstrap/charts/pleco/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: qoveryrd/pleco pullPolicy: IfNotPresent - plecoImageTag: "0.10.1" + plecoImageTag: "0.10.4" cloudProvider: "" diff --git a/lib/helm-freeze.yaml b/lib/helm-freeze.yaml index 35434bfc..9035b942 100644 --- a/lib/helm-freeze.yaml +++ b/lib/helm-freeze.yaml @@ -70,7 +70,7 @@ charts: dest: services no_sync: true - name: pleco - version: 0.10.1 + version: 0.10.4 repo_name: pleco - name: do-k8s-token-rotate version: 0.1.3 diff --git a/lib/scaleway/bootstrap/ks-workers-nodes.j2.tf b/lib/scaleway/bootstrap/ks-workers-nodes.j2.tf index b23d5c8a..3c6ddbb9 100644 --- a/lib/scaleway/bootstrap/ks-workers-nodes.j2.tf +++ b/lib/scaleway/bootstrap/ks-workers-nodes.j2.tf @@ -1,23 +1,34 @@ {% for scw_ks_worker_node in scw_ks_worker_nodes %} resource "scaleway_k8s_pool" "kubernetes_cluster_workers_{{ loop.index }}" { cluster_id = scaleway_k8s_cluster.kubernetes_cluster.id - name = "${var.kubernetes_cluster_id}_{{ loop.index }}" + name = "${var.kubernetes_cluster_id}_{{ scw_ks_worker_node.instance_type }}_{{ loop.index }}" node_type = "{{ scw_ks_worker_node.instance_type }}" region = var.region zone = var.zone # use Scaleway built-in cluster autoscaler - autoscaling = {{ scw_ks_pool_autoscale }} - autohealing = true - size = "{{ scw_ks_worker_node.min_nodes }}" - min_size = "{{ scw_ks_worker_node.min_nodes }}" - max_size = "{{ scw_ks_worker_node.max_nodes }}" + autoscaling = {{ scw_ks_pool_autoscale }} + autohealing = true + size = "{{ scw_ks_worker_node.min_nodes }}" + min_size = "{{ scw_ks_worker_node.min_nodes }}" + max_size = "{{ scw_ks_worker_node.max_nodes }}" + wait_for_pool_ready = true + + # Not yet available: https://github.com/scaleway/terraform-provider-scaleway/issues/998 + #timeouts { + # create = "60m" + # delete = "60m" + # update = "60m" + #} depends_on = [ scaleway_k8s_cluster.kubernetes_cluster, ] - tags = concat(local.tags_ks_list, ["QoveryNodeGroupName:{{ scw_ks_worker_node.name }}", "QoveryNodeGroupId:${var.kubernetes_cluster_id}-{{ loop.index }}"]) + lifecycle { + create_before_destroy = true + } + tags = concat(local.tags_ks_list, ["QoveryNodeGroupName:{{ scw_ks_worker_node.name }}", "QoveryNodeGroupId:${var.kubernetes_cluster_id}_{{ scw_ks_worker_node.instance_type }}_{{ loop.index }}"]) } -{% endfor %} \ No newline at end of file +{% endfor %} diff --git a/lib/scaleway/bootstrap/tf-default-vars.j2.tf b/lib/scaleway/bootstrap/tf-default-vars.j2.tf index 09467d3c..73900b51 100644 --- a/lib/scaleway/bootstrap/tf-default-vars.j2.tf +++ b/lib/scaleway/bootstrap/tf-default-vars.j2.tf @@ -77,7 +77,7 @@ variable "kubernetes_cluster_id" { variable "kubernetes_cluster_name" { description = "Kubernetes cluster name" - default = "qovery-{{ kubernetes_cluster_id }}" # TODO(benjaminch): handle name creation in code + default = "{{ kubernetes_cluster_name }}" type = string } diff --git a/lib/scaleway/bootstrap/tf-providers.j2.tf b/lib/scaleway/bootstrap/tf-providers.j2.tf index 7ec6853a..5157bc29 100644 --- a/lib/scaleway/bootstrap/tf-providers.j2.tf +++ b/lib/scaleway/bootstrap/tf-providers.j2.tf @@ -9,7 +9,7 @@ terraform { required_providers { scaleway = { source = "scaleway/scaleway" - version = "~> 2.1.0" + version = "~> 2.2.0" } aws = { source = "hashicorp/aws" @@ -28,7 +28,7 @@ terraform { version = "~> 2.24.1" } } - required_version = ">= 0.13" + required_version = ">= 0.14" } diff --git a/src/cloud_provider/aws/kubernetes/helm_charts.rs b/src/cloud_provider/aws/kubernetes/helm_charts.rs index 08d386f1..78279ba8 100644 --- a/src/cloud_provider/aws/kubernetes/helm_charts.rs +++ b/src/cloud_provider/aws/kubernetes/helm_charts.rs @@ -1,7 +1,7 @@ use crate::cloud_provider::aws::kubernetes::{Options, VpcQoveryNetworkMode}; use crate::cloud_provider::helm::{ get_chart_for_shell_agent, get_engine_helm_action_from_location, ChartInfo, ChartPayload, ChartSetValue, - ChartValuesGenerated, CommonChart, CoreDNSConfigChart, HelmAction, HelmChart, HelmChartNamespaces, + ChartValuesGenerated, CommonChart, CoreDNSConfigChart, HelmChart, HelmChartNamespaces, PrometheusOperatorConfigChart, ShellAgentContext, }; use crate::cloud_provider::qovery::{get_qovery_app_version, EngineLocation, QoveryAgent, QoveryAppName, QoveryEngine}; @@ -458,6 +458,7 @@ pub fn aws_helm_charts( }, }; + /* Example to delete an old install let old_prometheus_operator = PrometheusOperatorConfigChart { chart_info: ChartInfo { name: "prometheus-operator".to_string(), @@ -465,7 +466,7 @@ pub fn aws_helm_charts( action: HelmAction::Destroy, ..Default::default() }, - }; + };*/ let kube_prometheus_stack = PrometheusOperatorConfigChart { chart_info: ChartInfo { @@ -704,7 +705,9 @@ datasources: }, ChartSetValue { key: "prometheus.servicemonitor.enabled".to_string(), - value: chart_config_prerequisites.ff_metrics_history_enabled.to_string(), + // Due to cycle, prometheus need tls certificate from cert manager, and enabling this will require + // prometheus to be already installed + value: "false".to_string(), }, ChartSetValue { key: "prometheus.servicemonitor.prometheusInstance".to_string(), @@ -730,11 +733,11 @@ datasources: // Webhooks resources limits ChartSetValue { key: "webhook.resources.limits.cpu".to_string(), - value: "20m".to_string(), + value: "200m".to_string(), }, ChartSetValue { key: "webhook.resources.requests.cpu".to_string(), - value: "20m".to_string(), + value: "50m".to_string(), }, ChartSetValue { key: "webhook.resources.limits.memory".to_string(), @@ -1153,26 +1156,27 @@ datasources: Box::new(q_storage_class), Box::new(coredns_config), Box::new(aws_vpc_cni_chart), - Box::new(old_prometheus_operator), ]; - let mut level_2: Vec> = vec![]; + let level_2: Vec> = vec![Box::new(cert_manager)]; - let mut level_3: Vec> = vec![ + let mut level_3: Vec> = vec![]; + + let mut level_4: Vec> = vec![ Box::new(cluster_autoscaler), Box::new(aws_iam_eks_user_mapper), Box::new(aws_calico), ]; - let mut level_4: Vec> = vec![ + let mut level_5: Vec> = vec![ Box::new(metrics_server), Box::new(aws_node_term_handler), Box::new(external_dns), ]; - let mut level_5: Vec> = vec![Box::new(nginx_ingress), Box::new(cert_manager)]; + let mut level_6: Vec> = vec![Box::new(nginx_ingress)]; - let mut level_6: Vec> = vec![ + let mut level_7: Vec> = vec![ Box::new(cert_manager_config), Box::new(qovery_agent), Box::new(shell_agent), @@ -1181,26 +1185,26 @@ datasources: // observability if chart_config_prerequisites.ff_metrics_history_enabled { - level_2.push(Box::new(kube_prometheus_stack)); - level_4.push(Box::new(prometheus_adapter)); - level_4.push(Box::new(kube_state_metrics)); + level_3.push(Box::new(kube_prometheus_stack)); + level_5.push(Box::new(prometheus_adapter)); + level_5.push(Box::new(kube_state_metrics)); } if chart_config_prerequisites.ff_log_history_enabled { - level_3.push(Box::new(promtail)); - level_4.push(Box::new(loki)); + level_4.push(Box::new(promtail)); + level_5.push(Box::new(loki)); } if chart_config_prerequisites.ff_metrics_history_enabled || chart_config_prerequisites.ff_log_history_enabled { - level_6.push(Box::new(grafana)) + level_7.push(Box::new(grafana)) }; // pleco if !chart_config_prerequisites.disable_pleco { - level_5.push(Box::new(pleco)); + level_6.push(Box::new(pleco)); } info!("charts configuration preparation finished"); - Ok(vec![level_1, level_2, level_3, level_4, level_5, level_6]) + Ok(vec![level_1, level_2, level_3, level_4, level_5, level_6, level_7]) } // AWS CNI diff --git a/src/cloud_provider/aws/kubernetes/mod.rs b/src/cloud_provider/aws/kubernetes/mod.rs index 0c698c54..d4a4dd86 100644 --- a/src/cloud_provider/aws/kubernetes/mod.rs +++ b/src/cloud_provider/aws/kubernetes/mod.rs @@ -14,7 +14,7 @@ use crate::cloud_provider::aws::kubernetes::node::AwsInstancesType; use crate::cloud_provider::aws::kubernetes::roles::get_default_roles_to_create; use crate::cloud_provider::aws::regions::{AwsRegion, AwsZones}; use crate::cloud_provider::environment::Environment; -use crate::cloud_provider::helm::deploy_charts_levels; +use crate::cloud_provider::helm::{deploy_charts_levels, ChartInfo}; use crate::cloud_provider::kubernetes::{ is_kubernetes_upgrade_required, send_progress_on_long_task, uninstall_cert_manager, Kind, Kubernetes, KubernetesNodesType, KubernetesUpgradeStatus, ProviderOptions, @@ -24,11 +24,11 @@ use crate::cloud_provider::qovery::EngineLocation; use crate::cloud_provider::utilities::print_action; use crate::cloud_provider::{kubernetes, CloudProvider}; use crate::cmd; +use crate::cmd::helm::{to_engine_error, Helm}; use crate::cmd::kubectl::{ kubectl_exec_api_custom_metrics, kubectl_exec_get_all_namespaces, kubectl_exec_get_events, kubectl_exec_scale_replicas, ScalingKind, }; -use crate::cmd::structs::HelmChart; use crate::cmd::terraform::{terraform_exec, terraform_init_validate_plan_apply, terraform_init_validate_state_list}; use crate::deletion_utilities::{get_firsts_namespaces_to_delete, get_qovery_managed_namespaces}; use crate::dns_provider; @@ -947,7 +947,10 @@ impl<'a> EKS<'a> { }; if tf_workers_resources.is_empty() { - return Err(EngineError::new_cluster_has_no_worker_nodes(event_details.clone())); + return Err(EngineError::new_cluster_has_no_worker_nodes( + event_details.clone(), + None, + )); } let kubernetes_config_file_path = self.get_kubeconfig_file_path()?; @@ -1244,15 +1247,14 @@ impl<'a> EKS<'a> { ); // delete custom metrics api to avoid stale namespaces on deletion - let _ = cmd::helm::helm_uninstall_list( + let helm = Helm::new( &kubernetes_config_file_path, - vec![HelmChart { - name: "metrics-server".to_string(), - namespace: "kube-system".to_string(), - version: None, - }], - self.cloud_provider().credentials_environment_variables(), - ); + &self.cloud_provider.credentials_environment_variables(), + ) + .map_err(|e| to_engine_error(&event_details, e))?; + let chart = ChartInfo::new_from_release_name("metrics-server", "kube-system"); + helm.uninstall(&chart, &vec![]) + .map_err(|e| to_engine_error(&event_details, e))?; // required to avoid namespace stuck on deletion uninstall_cert_manager( @@ -1272,50 +1274,27 @@ impl<'a> EKS<'a> { let qovery_namespaces = get_qovery_managed_namespaces(); for qovery_namespace in qovery_namespaces.iter() { - let charts_to_delete = cmd::helm::helm_list( - &kubernetes_config_file_path, - self.cloud_provider().credentials_environment_variables(), - Some(qovery_namespace), - ); - match charts_to_delete { - Ok(charts) => { - for chart in charts { - match cmd::helm::helm_exec_uninstall( - &kubernetes_config_file_path, - &chart.namespace, - &chart.name, - self.cloud_provider().credentials_environment_variables(), - ) { - Ok(_) => self.logger().log( - LogLevel::Info, - EngineEvent::Deleting( - event_details.clone(), - EventMessage::new_from_safe(format!("Chart `{}` deleted", chart.name)), - ), - ), - Err(e) => { - let message_safe = format!("Can't delete chart `{}`", chart.name); - self.logger().log( - LogLevel::Error, - EngineEvent::Deleting( - event_details.clone(), - EventMessage::new(message_safe, Some(e.message())), - ), - ) - } - } - } - } - Err(e) => { - if !(e.message().contains("not found")) { + let charts_to_delete = helm + .list_release(Some(qovery_namespace), &vec![]) + .map_err(|e| to_engine_error(&event_details, e))?; + + for chart in charts_to_delete { + let chart_info = ChartInfo::new_from_release_name(&chart.name, &chart.namespace); + match helm.uninstall(&chart_info, &vec![]) { + Ok(_) => self.logger().log( + LogLevel::Info, + EngineEvent::Deleting( + event_details.clone(), + EventMessage::new_from_safe(format!("Chart `{}` deleted", chart.name)), + ), + ), + Err(e) => { + let message_safe = format!("Can't delete chart `{}`: {}", &chart.name, e); self.logger().log( LogLevel::Error, EngineEvent::Deleting( event_details.clone(), - EventMessage::new_from_safe(format!( - "Can't delete the namespace {}", - qovery_namespace - )), + EventMessage::new(message_safe, Some(e.to_string())), ), ) } @@ -1370,18 +1349,11 @@ impl<'a> EKS<'a> { ), ); - match cmd::helm::helm_list( - &kubernetes_config_file_path, - self.cloud_provider().credentials_environment_variables(), - None, - ) { + match helm.list_release(None, &vec![]) { Ok(helm_charts) => { for chart in helm_charts { - match cmd::helm::helm_uninstall_list( - &kubernetes_config_file_path, - vec![chart.clone()], - self.cloud_provider().credentials_environment_variables(), - ) { + let chart_info = ChartInfo::new_from_release_name(&chart.name, &chart.namespace); + match helm.uninstall(&chart_info, &vec![]) { Ok(_) => self.logger().log( LogLevel::Info, EngineEvent::Deleting( @@ -1390,12 +1362,12 @@ impl<'a> EKS<'a> { ), ), Err(e) => { - let message_safe = format!("Error deleting chart `{}` deleted", chart.name); + let message_safe = format!("Error deleting chart `{}`: {}", chart.name, e); self.logger().log( LogLevel::Error, EngineEvent::Deleting( event_details.clone(), - EventMessage::new(message_safe, e.message), + EventMessage::new(message_safe, Some(e.to_string())), ), ) } @@ -1408,7 +1380,7 @@ impl<'a> EKS<'a> { LogLevel::Error, EngineEvent::Deleting( event_details.clone(), - EventMessage::new(message_safe.to_string(), Some(e.message())), + EventMessage::new(message_safe.to_string(), Some(e.to_string())), ), ) } diff --git a/src/cloud_provider/aws/kubernetes/node.rs b/src/cloud_provider/aws/kubernetes/node.rs index ea3e8d47..8f0bdd0f 100644 --- a/src/cloud_provider/aws/kubernetes/node.rs +++ b/src/cloud_provider/aws/kubernetes/node.rs @@ -87,6 +87,7 @@ mod tests { NodeGroups::new("".to_string(), 2, 2, "t2.large".to_string(), 20).unwrap(), NodeGroups { name: "".to_string(), + id: None, min_nodes: 2, max_nodes: 2, instance_type: "t2.large".to_string(), diff --git a/src/cloud_provider/aws/router.rs b/src/cloud_provider/aws/router.rs index 8cf44389..47dfdfc7 100644 --- a/src/cloud_provider/aws/router.rs +++ b/src/cloud_provider/aws/router.rs @@ -1,5 +1,6 @@ use tera::Context as TeraContext; +use crate::cloud_provider::helm::ChartInfo; use crate::cloud_provider::models::{CustomDomain, CustomDomainDataTemplate, Route, RouteDataTemplate}; use crate::cloud_provider::service::{ default_tera_context, delete_router, deploy_stateless_service_error, send_progress_on_long_task, Action, Create, @@ -7,8 +8,9 @@ use crate::cloud_provider::service::{ }; use crate::cloud_provider::utilities::{check_cname_for, print_action, sanitize_name}; use crate::cloud_provider::DeploymentTarget; -use crate::cmd::helm::Timeout; -use crate::error::{EngineError, EngineErrorCause, EngineErrorScope}; +use crate::cmd::helm; +use crate::cmd::helm::{to_engine_error, Timeout}; +use crate::error::{EngineError, EngineErrorScope}; use crate::errors::EngineError as NewEngineError; use crate::events::{EnvironmentStep, Stage, ToTransmitter, Transmitter}; use crate::models::{Context, Listen, Listener, Listeners}; @@ -325,25 +327,26 @@ impl Create for Router { } // do exec helm upgrade and return the last deployment status - let helm_history_row = crate::cmd::helm::helm_exec_with_upgrade_history( - kubernetes_config_file_path.as_str(), - environment.namespace(), - helm_release_name.as_str(), - self.selector(), - workspace_dir.as_str(), - self.start_timeout(), - kubernetes.cloud_provider().credentials_environment_variables(), - self.service_type(), + let helm = helm::Helm::new( + &kubernetes_config_file_path, + &kubernetes.cloud_provider().credentials_environment_variables(), ) - .map_err(|e| { - NewEngineError::new_helm_charts_upgrade_error(event_details.clone(), e).to_legacy_engine_error() - })?; + .map_err(|e| to_engine_error(&event_details, e).to_legacy_engine_error())?; + let chart = ChartInfo::new_from_custom_namespace( + helm_release_name, + workspace_dir.clone(), + environment.namespace().to_string(), + 600_i64, + match self.service_type() { + ServiceType::Database(_) => vec![format!("{}/q-values.yaml", &workspace_dir)], + _ => vec![], + }, + false, + self.selector(), + ); - if helm_history_row.is_none() || !helm_history_row.unwrap().is_successfully_deployed() { - return Err(self.engine_error(EngineErrorCause::Internal, "Router has failed to be deployed".into())); - } - - Ok(()) + helm.upgrade(&chart, &vec![]) + .map_err(|e| NewEngineError::new_helm_error(event_details.clone(), e).to_legacy_engine_error()) } fn on_create_check(&self) -> Result<(), EngineError> { diff --git a/src/cloud_provider/digitalocean/kubernetes/helm_charts.rs b/src/cloud_provider/digitalocean/kubernetes/helm_charts.rs index 3127c1b4..88c6e0f1 100644 --- a/src/cloud_provider/digitalocean/kubernetes/helm_charts.rs +++ b/src/cloud_provider/digitalocean/kubernetes/helm_charts.rs @@ -1,8 +1,7 @@ use crate::cloud_provider::digitalocean::kubernetes::DoksOptions; use crate::cloud_provider::helm::{ get_chart_for_shell_agent, get_engine_helm_action_from_location, ChartInfo, ChartSetValue, ChartValuesGenerated, - CommonChart, CoreDNSConfigChart, HelmAction, HelmChart, HelmChartNamespaces, PrometheusOperatorConfigChart, - ShellAgentContext, + CommonChart, CoreDNSConfigChart, HelmChart, HelmChartNamespaces, PrometheusOperatorConfigChart, ShellAgentContext, }; use crate::cloud_provider::qovery::{get_qovery_app_version, EngineLocation, QoveryAgent, QoveryAppName, QoveryEngine}; use crate::errors::CommandError; @@ -309,6 +308,7 @@ pub fn do_helm_charts( }, }; + /* let old_prometheus_operator = PrometheusOperatorConfigChart { chart_info: ChartInfo { name: "prometheus-operator".to_string(), @@ -316,7 +316,7 @@ pub fn do_helm_charts( action: HelmAction::Destroy, ..Default::default() }, - }; + };*/ let kube_prometheus_stack = PrometheusOperatorConfigChart { chart_info: ChartInfo { @@ -544,7 +544,9 @@ datasources: }, ChartSetValue { key: "prometheus.servicemonitor.enabled".to_string(), - value: chart_config_prerequisites.ff_metrics_history_enabled.to_string(), + // Due to cycle, prometheus need tls certificate from cert manager, and enabling this will require + // prometheus to be already installed + value: "false".to_string(), }, ChartSetValue { key: "prometheus.servicemonitor.prometheusInstance".to_string(), @@ -570,11 +572,11 @@ datasources: // Webhooks resources limits ChartSetValue { key: "webhook.resources.limits.cpu".to_string(), - value: "20m".to_string(), + value: "200m".to_string(), }, ChartSetValue { key: "webhook.resources.requests.cpu".to_string(), - value: "20m".to_string(), + value: "50m".to_string(), }, ChartSetValue { key: "webhook.resources.limits.memory".to_string(), @@ -1027,19 +1029,15 @@ datasources: }; // chart deployment order matters!!! - let level_1: Vec> = vec![ - Box::new(q_storage_class), - Box::new(coredns_config), - Box::new(old_prometheus_operator), - ]; + let level_1: Vec> = vec![Box::new(q_storage_class), Box::new(coredns_config)]; - let mut level_2: Vec> = vec![Box::new(container_registry_secret)]; + let mut level_2: Vec> = vec![Box::new(container_registry_secret), Box::new(cert_manager)]; let mut level_3: Vec> = vec![]; let mut level_4: Vec> = vec![Box::new(metrics_server), Box::new(external_dns)]; - let mut level_5: Vec> = vec![Box::new(nginx_ingress), Box::new(cert_manager)]; + let mut level_5: Vec> = vec![Box::new(nginx_ingress)]; let mut level_6: Vec> = vec![ Box::new(cert_manager_config), diff --git a/src/cloud_provider/digitalocean/kubernetes/mod.rs b/src/cloud_provider/digitalocean/kubernetes/mod.rs index 98a86757..9a3e7466 100644 --- a/src/cloud_provider/digitalocean/kubernetes/mod.rs +++ b/src/cloud_provider/digitalocean/kubernetes/mod.rs @@ -26,11 +26,10 @@ use crate::cloud_provider::models::NodeGroups; use crate::cloud_provider::qovery::EngineLocation; use crate::cloud_provider::utilities::{print_action, VersionsNumber}; use crate::cloud_provider::{kubernetes, CloudProvider}; -use crate::cmd::helm::{helm_exec_upgrade_with_chart_info, helm_upgrade_diff_with_chart_info}; +use crate::cmd::helm::{to_engine_error, Helm}; use crate::cmd::kubectl::{ do_kubectl_exec_get_loadbalancer_id, kubectl_exec_get_all_namespaces, kubectl_exec_get_events, }; -use crate::cmd::structs::HelmChart; use crate::cmd::terraform::{terraform_exec, terraform_init_validate_plan_apply, terraform_init_validate_state_list}; use crate::deletion_utilities::{get_firsts_namespaces_to_delete, get_qovery_managed_namespaces}; use crate::dns_provider::DnsProvider; @@ -816,18 +815,16 @@ impl<'a> DOKS<'a> { ..Default::default() }; - let _ = helm_upgrade_diff_with_chart_info( - &kubeconfig_path, - &credentials_environment_variables, - &load_balancer_dns_hostname, - ); - - helm_exec_upgrade_with_chart_info( + let helm = Helm::new( &kubeconfig_path, &self.cloud_provider.credentials_environment_variables(), - &load_balancer_dns_hostname, ) - .map_err(|e| EngineError::new_helm_charts_deploy_error(event_details.clone(), e)) + .map_err(|e| EngineError::new_helm_error(event_details.clone(), e))?; + + // This will ony print the diff on stdout + let _ = helm.upgrade_diff(&load_balancer_dns_hostname, &vec![]); + helm.upgrade(&load_balancer_dns_hostname, &vec![]) + .map_err(|e| EngineError::new_helm_error(event_details.clone(), e)) } fn create_error(&self) -> Result<(), EngineError> { @@ -1096,15 +1093,14 @@ impl<'a> DOKS<'a> { ); // delete custom metrics api to avoid stale namespaces on deletion - let _ = cmd::helm::helm_uninstall_list( + let helm = Helm::new( &kubernetes_config_file_path, - vec![HelmChart { - name: "metrics-server".to_string(), - namespace: "kube-system".to_string(), - version: None, - }], - self.cloud_provider().credentials_environment_variables(), - ); + &self.cloud_provider.credentials_environment_variables(), + ) + .map_err(|e| to_engine_error(&event_details, e))?; + let chart = ChartInfo::new_from_release_name("metrics-server", "kube-system"); + helm.uninstall(&chart, &vec![]) + .map_err(|e| to_engine_error(&event_details, e))?; // required to avoid namespace stuck on deletion uninstall_cert_manager( @@ -1124,50 +1120,27 @@ impl<'a> DOKS<'a> { let qovery_namespaces = get_qovery_managed_namespaces(); for qovery_namespace in qovery_namespaces.iter() { - let charts_to_delete = cmd::helm::helm_list( - &kubernetes_config_file_path, - self.cloud_provider().credentials_environment_variables(), - Some(qovery_namespace), - ); - match charts_to_delete { - Ok(charts) => { - for chart in charts { - match cmd::helm::helm_exec_uninstall( - &kubernetes_config_file_path, - &chart.namespace, - &chart.name, - self.cloud_provider().credentials_environment_variables(), - ) { - Ok(_) => self.logger().log( - LogLevel::Info, - EngineEvent::Deleting( - event_details.clone(), - EventMessage::new_from_safe(format!("Chart `{}` deleted", chart.name)), - ), - ), - Err(e) => { - let message_safe = format!("Can't delete chart `{}`", chart.name); - self.logger().log( - LogLevel::Error, - EngineEvent::Deleting( - event_details.clone(), - EventMessage::new(message_safe, Some(e.message())), - ), - ) - } - } - } - } - Err(e) => { - if !(e.message().contains("not found")) { + let charts_to_delete = helm + .list_release(Some(qovery_namespace), &vec![]) + .map_err(|e| to_engine_error(&event_details, e))?; + + for chart in charts_to_delete { + let chart_info = ChartInfo::new_from_release_name(&chart.name, &chart.namespace); + match helm.uninstall(&chart_info, &vec![]) { + Ok(_) => self.logger().log( + LogLevel::Info, + EngineEvent::Deleting( + event_details.clone(), + EventMessage::new_from_safe(format!("Chart `{}` deleted", chart.name)), + ), + ), + Err(e) => { + let message_safe = format!("Can't delete chart `{}`", chart.name); self.logger().log( LogLevel::Error, EngineEvent::Deleting( event_details.clone(), - EventMessage::new_from_safe(format!( - "Can't delete the namespace {}", - qovery_namespace - )), + EventMessage::new(message_safe, Some(e.to_string())), ), ) } @@ -1222,18 +1195,11 @@ impl<'a> DOKS<'a> { ), ); - match cmd::helm::helm_list( - &kubernetes_config_file_path, - self.cloud_provider().credentials_environment_variables(), - None, - ) { + match helm.list_release(None, &vec![]) { Ok(helm_charts) => { for chart in helm_charts { - match cmd::helm::helm_uninstall_list( - &kubernetes_config_file_path, - vec![chart.clone()], - self.cloud_provider().credentials_environment_variables(), - ) { + let chart_info = ChartInfo::new_from_release_name(&chart.name, &chart.namespace); + match helm.uninstall(&chart_info, &vec![]) { Ok(_) => self.logger().log( LogLevel::Info, EngineEvent::Deleting( @@ -1242,12 +1208,12 @@ impl<'a> DOKS<'a> { ), ), Err(e) => { - let message_safe = format!("Error deleting chart `{}` deleted", chart.name); + let message_safe = format!("Error deleting chart `{}`: {}", chart.name, e); self.logger().log( LogLevel::Error, EngineEvent::Deleting( event_details.clone(), - EventMessage::new(message_safe, e.message), + EventMessage::new(message_safe, Some(e.to_string())), ), ) } @@ -1260,7 +1226,7 @@ impl<'a> DOKS<'a> { LogLevel::Error, EngineEvent::Deleting( event_details.clone(), - EventMessage::new(message_safe.to_string(), Some(e.message())), + EventMessage::new(message_safe.to_string(), Some(e.to_string())), ), ) } diff --git a/src/cloud_provider/digitalocean/kubernetes/node.rs b/src/cloud_provider/digitalocean/kubernetes/node.rs index b90a4d9b..3a5bb7a5 100644 --- a/src/cloud_provider/digitalocean/kubernetes/node.rs +++ b/src/cloud_provider/digitalocean/kubernetes/node.rs @@ -133,6 +133,7 @@ mod tests { NodeGroups::new("".to_string(), 2, 2, "s-2vcpu-4gb".to_string(), 20).unwrap(), NodeGroups { name: "".to_string(), + id: None, min_nodes: 2, max_nodes: 2, instance_type: "s-2vcpu-4gb".to_string(), diff --git a/src/cloud_provider/digitalocean/router.rs b/src/cloud_provider/digitalocean/router.rs index b695c9e3..ab48f336 100644 --- a/src/cloud_provider/digitalocean/router.rs +++ b/src/cloud_provider/digitalocean/router.rs @@ -1,5 +1,6 @@ use tera::Context as TeraContext; +use crate::cloud_provider::helm::ChartInfo; use crate::cloud_provider::models::{CustomDomain, CustomDomainDataTemplate, Route, RouteDataTemplate}; use crate::cloud_provider::service::{ default_tera_context, delete_router, deploy_stateless_service_error, send_progress_on_long_task, Action, Create, @@ -7,6 +8,7 @@ use crate::cloud_provider::service::{ }; use crate::cloud_provider::utilities::{check_cname_for, print_action, sanitize_name}; use crate::cloud_provider::DeploymentTarget; +use crate::cmd::helm; use crate::cmd::helm::Timeout; use crate::error::{EngineError, EngineErrorCause, EngineErrorScope}; use crate::errors::EngineError as NewEngineError; @@ -345,25 +347,26 @@ impl Create for Router { } // do exec helm upgrade and return the last deployment status - let helm_history_row = crate::cmd::helm::helm_exec_with_upgrade_history( - kubernetes_config_file_path.as_str(), - environment.namespace(), - helm_release_name.as_str(), - self.selector(), - workspace_dir.as_str(), - self.start_timeout(), - kubernetes.cloud_provider().credentials_environment_variables(), - self.service_type(), + let helm = helm::Helm::new( + &kubernetes_config_file_path, + &kubernetes.cloud_provider().credentials_environment_variables(), ) - .map_err(|e| { - NewEngineError::new_helm_charts_upgrade_error(event_details.clone(), e).to_legacy_engine_error() - })?; + .map_err(|e| helm::to_engine_error(&event_details, e).to_legacy_engine_error())?; + let chart = ChartInfo::new_from_custom_namespace( + helm_release_name, + workspace_dir.clone(), + environment.namespace().to_string(), + 600_i64, + match self.service_type() { + ServiceType::Database(_) => vec![format!("{}/q-values.yaml", &workspace_dir)], + _ => vec![], + }, + false, + self.selector(), + ); - if helm_history_row.is_none() || !helm_history_row.unwrap().is_successfully_deployed() { - return Err(self.engine_error(EngineErrorCause::Internal, "Router has failed to be deployed".into())); - } - - Ok(()) + helm.upgrade(&chart, &vec![]) + .map_err(|e| helm::to_engine_error(&event_details, e).to_legacy_engine_error()) } fn on_create_check(&self) -> Result<(), EngineError> { diff --git a/src/cloud_provider/helm.rs b/src/cloud_provider/helm.rs index 12877216..f78fe1c1 100644 --- a/src/cloud_provider/helm.rs +++ b/src/cloud_provider/helm.rs @@ -1,10 +1,7 @@ use crate::cloud_provider::helm::HelmAction::Deploy; use crate::cloud_provider::helm::HelmChartNamespaces::KubeSystem; use crate::cloud_provider::qovery::{get_qovery_app_version, EngineLocation, QoveryAppName, QoveryShellAgent}; -use crate::cmd::helm::{ - helm_destroy_chart_if_breaking_changes_version_detected, helm_exec_uninstall_with_chart_info, - helm_exec_upgrade_with_chart_info, helm_upgrade_diff_with_chart_info, is_chart_deployed, -}; +use crate::cmd::helm::{to_command_error, Helm}; use crate::cmd::kubectl::{ kubectl_delete_crash_looping_pods, kubectl_exec_delete_crd, kubectl_exec_get_configmap, kubectl_exec_get_events, kubectl_exec_rollout_restart_deployment, kubectl_exec_with_output, @@ -20,7 +17,7 @@ use thread::spawn; use tracing::{span, Level}; use uuid::Uuid; -#[derive(Clone)] +#[derive(Clone, PartialEq, Eq)] pub enum HelmAction { Deploy, Destroy, @@ -108,6 +105,15 @@ impl ChartInfo { } } + pub fn new_from_release_name(name: &str, custom_namespace: &str) -> ChartInfo { + ChartInfo { + name: name.to_string(), + namespace: HelmChartNamespaces::Custom, + custom_namespace: Some(custom_namespace.to_string()), + ..Default::default() + } + } + pub fn get_namespace_string(&self) -> String { match self.namespace { HelmChartNamespaces::Custom => self @@ -130,7 +136,7 @@ impl Default for ChartInfo { atomic: true, force_upgrade: false, last_breaking_version_requiring_restart: None, - timeout_in_seconds: 300, + timeout_in_seconds: 600, dry_run: false, wait: true, values: Vec::new(), @@ -216,36 +222,22 @@ pub trait HelmChart: Send { ) -> Result, CommandError> { let environment_variables: Vec<(&str, &str)> = envs.iter().map(|x| (x.0.as_str(), x.1.as_str())).collect(); let chart_info = self.get_chart_info(); + let helm = Helm::new(kubernetes_config, &environment_variables).map_err(to_command_error)?; + match chart_info.action { HelmAction::Deploy => { - if let Err(e) = helm_destroy_chart_if_breaking_changes_version_detected( - kubernetes_config, - &environment_variables, - chart_info, - ) { + if let Err(e) = helm.uninstall_chart_if_breaking_version(chart_info, &vec![]) { warn!( "error while trying to destroy chart if breaking change is detected: {:?}", - e.message() + e.to_string() ); } - helm_exec_upgrade_with_chart_info(kubernetes_config, &environment_variables, chart_info)? + helm.upgrade(&chart_info, &vec![]).map_err(to_command_error)?; } HelmAction::Destroy => { let chart_info = self.get_chart_info(); - match is_chart_deployed( - kubernetes_config, - environment_variables.clone(), - Some(chart_info.get_namespace_string().as_str()), - chart_info.name.clone(), - ) { - Ok(deployed) => { - if deployed { - helm_exec_uninstall_with_chart_info(kubernetes_config, &environment_variables, chart_info)? - } - } - Err(e) => return Err(e), - }; + helm.uninstall(&chart_info, &vec![]).map_err(to_command_error)?; } HelmAction::Skip => {} } @@ -303,24 +295,31 @@ fn deploy_parallel_charts( handles.push(handle); } + let mut errors: Vec> = vec![]; for handle in handles { match handle.join() { Ok(helm_run_ret) => { if let Err(e) = helm_run_ret { - return Err(e); + errors.push(Err(e)); } } Err(e) => { let safe_message = "Thread panicked during parallel charts deployments."; - return Err(CommandError::new( + let error = Err(CommandError::new( format!("{}, error: {:?}", safe_message.to_string(), e), Some(safe_message.to_string()), )); + errors.push(error); } } } - Ok(()) + if errors.is_empty() { + Ok(()) + } else { + error!("Deployments of charts failed with: {:?}", errors); + errors.remove(0) + } } pub fn deploy_charts_levels( @@ -330,24 +329,24 @@ pub fn deploy_charts_levels( dry_run: bool, ) -> Result<(), CommandError> { // first show diff - for level in &charts { - for chart in level { + let envs_ref: Vec<(&str, &str)> = envs.iter().map(|(x, y)| (x.as_str(), y.as_str())).collect(); + let helm = Helm::new(&kubernetes_config, &envs_ref).map_err(to_command_error)?; + + for level in charts { + // Show diff for all chart in this state + for chart in &level { let chart_info = chart.get_chart_info(); - match chart_info.action { - // don't do diff on destroy or skip - HelmAction::Deploy => { - let _ = helm_upgrade_diff_with_chart_info(&kubernetes_config, envs, chart.get_chart_info()); - } - _ => {} + // don't do diff on destroy or skip + if chart_info.action == HelmAction::Deploy { + let _ = helm.upgrade_diff(chart_info, &vec![]); } } - } - // then apply - if dry_run { - return Ok(()); - } - for level in charts.into_iter() { + // Skip actual deployment if dry run + if dry_run { + continue; + } + if let Err(e) = deploy_parallel_charts(&kubernetes_config, &envs, level) { return Err(e); } @@ -591,47 +590,36 @@ impl HelmChart for PrometheusOperatorConfigChart { ) -> Result, CommandError> { let environment_variables: Vec<(&str, &str)> = envs.iter().map(|x| (x.0.as_str(), x.1.as_str())).collect(); let chart_info = self.get_chart_info(); + let helm = Helm::new(kubernetes_config, &environment_variables).map_err(to_command_error)?; + match chart_info.action { HelmAction::Deploy => { - if let Err(e) = helm_destroy_chart_if_breaking_changes_version_detected( - kubernetes_config, - &environment_variables, - chart_info, - ) { + if let Err(e) = helm.uninstall_chart_if_breaking_version(chart_info, &vec![]) { warn!( "error while trying to destroy chart if breaking change is detected: {}", - e.message() + e.to_string() ); } - helm_exec_upgrade_with_chart_info(kubernetes_config, &environment_variables, chart_info)? + helm.upgrade(&chart_info, &vec![]).map_err(to_command_error)?; } HelmAction::Destroy => { let chart_info = self.get_chart_info(); - match is_chart_deployed( - kubernetes_config, - environment_variables.clone(), - Some(chart_info.get_namespace_string().as_str()), - chart_info.name.clone(), - ) { - Ok(deployed) => { - if deployed { - let prometheus_crds = [ - "prometheuses.monitoring.coreos.com", - "prometheusrules.monitoring.coreos.com", - "servicemonitors.monitoring.coreos.com", - "podmonitors.monitoring.coreos.com", - "alertmanagers.monitoring.coreos.com", - "thanosrulers.monitoring.coreos.com", - ]; - helm_exec_uninstall_with_chart_info(kubernetes_config, &environment_variables, chart_info)?; - for crd in &prometheus_crds { - kubectl_exec_delete_crd(kubernetes_config, crd, environment_variables.clone())?; - } - } + if helm.check_release_exist(&chart_info, &vec![]).is_ok() { + helm.uninstall(&chart_info, &vec![]).map_err(to_command_error)?; + + let prometheus_crds = [ + "prometheuses.monitoring.coreos.com", + "prometheusrules.monitoring.coreos.com", + "servicemonitors.monitoring.coreos.com", + "podmonitors.monitoring.coreos.com", + "alertmanagers.monitoring.coreos.com", + "thanosrulers.monitoring.coreos.com", + ]; + for crd in &prometheus_crds { + let _ = kubectl_exec_delete_crd(kubernetes_config, crd, environment_variables.clone()); } - Err(e) => return Err(e), - }; + } } HelmAction::Skip => {} } diff --git a/src/cloud_provider/kubernetes.rs b/src/cloud_provider/kubernetes.rs index 9fa0ac4c..a234fd96 100644 --- a/src/cloud_provider/kubernetes.rs +++ b/src/cloud_provider/kubernetes.rs @@ -34,7 +34,7 @@ use crate::fs::workspace_directory; use crate::logger::{LogLevel, Logger}; use crate::models::ProgressLevel::Info; use crate::models::{ - Action, Context, Listen, ListenersHelper, ProgressInfo, ProgressLevel, ProgressScope, QoveryIdentifier, + Action, Context, Listen, ListenersHelper, ProgressInfo, ProgressLevel, ProgressScope, QoveryIdentifier, StringPath, }; use crate::object_storage::ObjectStorage; use crate::unit_conversion::{any_to_mi, cpu_string_to_float}; @@ -78,29 +78,65 @@ pub trait Kubernetes: Listen { ) } + fn get_kubeconfig_filename(&self) -> String { + format!("{}.yaml", self.id()) + } + fn get_kubeconfig_file(&self) -> Result<(String, File), EngineError> { + let event_details = self.get_event_details(Infrastructure(InfrastructureStep::LoadConfiguration)); let bucket_name = format!("qovery-kubeconfigs-{}", self.id()); - let object_key = format!("{}.yaml", self.id()); + let object_key = self.get_kubeconfig_filename(); let stage = Stage::General(GeneralStep::RetrieveClusterConfig); - let (string_path, file) = match self - .config_file_store() - .get(bucket_name.as_str(), object_key.as_str(), true) - { - Ok((path, file)) => (path, file), - Err(err) => { - let error = EngineError::new_cannot_retrieve_cluster_config_file( - self.get_event_details(stage), - CommandError::new_from_safe_message( - format!( - "Error getting file from store, error: {}", - err.message.unwrap_or_else(|| "no details.".to_string()) - ) - .to_string(), - ), - ); - self.logger().log(LogLevel::Error, EngineEvent::Error(error.clone())); - return Err(error); + // check if kubeconfig locally exists + let local_kubeconfig = match self.get_temp_dir(event_details) { + Ok(x) => { + let local_kubeconfig_folder_path = format!("{}/{}", &x, &bucket_name); + let local_kubeconfig_generated = format!("{}/{}", &local_kubeconfig_folder_path, &object_key); + if Path::new(&local_kubeconfig_generated).exists() { + match File::open(&local_kubeconfig_generated) { + Ok(_) => Some(local_kubeconfig_generated), + Err(_) => { + debug!("couldn't open {} file", &local_kubeconfig_generated); + None + } + } + } else { + None + } + } + Err(_) => None, + }; + + // otherwise, try to get it from object storage + let (string_path, file) = match local_kubeconfig { + Some(local_kubeconfig_generated) => { + let kubeconfig_file = + File::open(&local_kubeconfig_generated).expect("couldn't read kubeconfig file, but file exists"); + + (StringPath::from(&local_kubeconfig_generated), kubeconfig_file) + } + None => { + match self + .config_file_store() + .get(bucket_name.as_str(), object_key.as_str(), true) + { + Ok((path, file)) => (path, file), + Err(err) => { + let error = EngineError::new_cannot_retrieve_cluster_config_file( + self.get_event_details(stage), + CommandError::new_from_safe_message( + format!( + "Error getting file from store, error: {}", + err.message.unwrap_or_else(|| "no details.".to_string()) + ) + .to_string(), + ), + ); + self.logger().log(LogLevel::Error, EngineEvent::Error(error.clone())); + return Err(error); + } + } } }; @@ -233,11 +269,12 @@ pub trait Kubernetes: Listen { where Self: Sized, { + let kubeconfig = match self.get_kubeconfig_file() { + Ok((path, _)) => path, + Err(e) => return Err(CommandError::new(e.message(), None)), + }; send_progress_on_long_task(self, Action::Create, || { - check_workers_status( - self.get_kubeconfig_file_path().expect("Unable to get Kubeconfig"), - self.cloud_provider().credentials_environment_variables(), - ) + check_workers_status(&kubeconfig, self.cloud_provider().credentials_environment_variables()) }) } fn upgrade_with_status(&self, kubernetes_upgrade_status: KubernetesUpgradeStatus) -> Result<(), EngineError>; @@ -436,11 +473,29 @@ pub fn deploy_environment( "deployment", CheckAction::Deploy, )?; + } - // Quick fix: adding 100 ms delay to avoid race condition on service status update - thread::sleep(std::time::Duration::from_millis(100)); + // Quick fix: adding 100 ms delay to avoid race condition on service status update + thread::sleep(std::time::Duration::from_millis(100)); - // check all deployed services + // check all deployed services + for service in &environment.stateful_services { + let _ = service::check_kubernetes_service_error( + service.exec_check_action(), + kubernetes, + service, + event_details.clone(), + &stateless_deployment_target, + &listeners_helper, + "check deployment", + CheckAction::Deploy, + )?; + } + + // Quick fix: adding 100 ms delay to avoid race condition on service status update + thread::sleep(std::time::Duration::from_millis(100)); + + for service in &environment.stateless_services { let _ = service::check_kubernetes_service_error( service.exec_check_action(), kubernetes, @@ -1192,6 +1247,7 @@ impl NodeGroups { Ok(NodeGroups { name: group_name, + id: None, min_nodes, max_nodes, instance_type, diff --git a/src/cloud_provider/models.rs b/src/cloud_provider/models.rs index 97d8f1c3..a02585b6 100644 --- a/src/cloud_provider/models.rs +++ b/src/cloud_provider/models.rs @@ -65,6 +65,7 @@ pub struct CpuLimits { #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] pub struct NodeGroups { pub name: String, + pub id: Option, pub min_nodes: i32, pub max_nodes: i32, pub instance_type: String, diff --git a/src/cloud_provider/scaleway/kubernetes/helm_charts.rs b/src/cloud_provider/scaleway/kubernetes/helm_charts.rs index e2b02d44..98bb8f3e 100644 --- a/src/cloud_provider/scaleway/kubernetes/helm_charts.rs +++ b/src/cloud_provider/scaleway/kubernetes/helm_charts.rs @@ -1,7 +1,6 @@ use crate::cloud_provider::helm::{ get_chart_for_shell_agent, get_engine_helm_action_from_location, ChartInfo, ChartSetValue, ChartValuesGenerated, - CommonChart, CoreDNSConfigChart, HelmAction, HelmChart, HelmChartNamespaces, PrometheusOperatorConfigChart, - ShellAgentContext, + CommonChart, CoreDNSConfigChart, HelmChart, HelmChartNamespaces, PrometheusOperatorConfigChart, ShellAgentContext, }; use crate::cloud_provider::qovery::{get_qovery_app_version, EngineLocation, QoveryAgent, QoveryAppName, QoveryEngine}; use crate::cloud_provider::scaleway::application::{ScwRegion, ScwZone}; @@ -283,6 +282,7 @@ pub fn scw_helm_charts( }, }; + /* Example to delete an old chart let old_prometheus_operator = PrometheusOperatorConfigChart { chart_info: ChartInfo { name: "prometheus-operator".to_string(), @@ -290,7 +290,7 @@ pub fn scw_helm_charts( action: HelmAction::Destroy, ..Default::default() }, - }; + };*/ let kube_prometheus_stack = PrometheusOperatorConfigChart { chart_info: ChartInfo { @@ -493,7 +493,9 @@ datasources: }, ChartSetValue { key: "prometheus.servicemonitor.enabled".to_string(), - value: chart_config_prerequisites.ff_metrics_history_enabled.to_string(), + // Due to cycle, prometheus need tls certificate from cert manager, and enabling this will require + // prometheus to be already installed + value: "false".to_string(), }, ChartSetValue { key: "prometheus.servicemonitor.prometheusInstance".to_string(), @@ -519,11 +521,11 @@ datasources: // Webhooks resources limits ChartSetValue { key: "webhook.resources.limits.cpu".to_string(), - value: "20m".to_string(), + value: "200m".to_string(), }, ChartSetValue { key: "webhook.resources.requests.cpu".to_string(), - value: "20m".to_string(), + value: "50m".to_string(), }, ChartSetValue { key: "webhook.resources.limits.memory".to_string(), @@ -856,21 +858,19 @@ datasources: }; // chart deployment order matters!!! - let level_1: Vec> = vec![ - Box::new(q_storage_class), - Box::new(coredns_config), - Box::new(old_prometheus_operator), - ]; + let level_1: Vec> = vec![Box::new(q_storage_class), Box::new(coredns_config)]; - let mut level_2: Vec> = vec![]; + let level_2: Vec> = vec![Box::new(cert_manager)]; let mut level_3: Vec> = vec![]; - let mut level_4: Vec> = vec![Box::new(external_dns)]; + let mut level_4: Vec> = vec![]; - let mut level_5: Vec> = vec![Box::new(nginx_ingress), Box::new(cert_manager)]; + let mut level_5: Vec> = vec![Box::new(external_dns)]; - let mut level_6: Vec> = vec![ + let mut level_6: Vec> = vec![Box::new(nginx_ingress)]; + + let mut level_7: Vec> = vec![ Box::new(cert_manager_config), Box::new(qovery_agent), Box::new(shell_agent), @@ -879,24 +879,24 @@ datasources: // // observability if chart_config_prerequisites.ff_metrics_history_enabled { - level_2.push(Box::new(kube_prometheus_stack)); - level_4.push(Box::new(prometheus_adapter)); - level_4.push(Box::new(kube_state_metrics)); + level_3.push(Box::new(kube_prometheus_stack)); + level_5.push(Box::new(prometheus_adapter)); + level_5.push(Box::new(kube_state_metrics)); } if chart_config_prerequisites.ff_log_history_enabled { - level_3.push(Box::new(promtail)); - level_4.push(Box::new(loki)); + level_4.push(Box::new(promtail)); + level_5.push(Box::new(loki)); } if chart_config_prerequisites.ff_metrics_history_enabled || chart_config_prerequisites.ff_log_history_enabled { - level_6.push(Box::new(grafana)) + level_7.push(Box::new(grafana)) }; // pleco if !chart_config_prerequisites.disable_pleco { - level_5.push(Box::new(pleco)); + level_6.push(Box::new(pleco)); } info!("charts configuration preparation finished"); - Ok(vec![level_1, level_2, level_3, level_4, level_5, level_6]) + Ok(vec![level_1, level_2, level_3, level_4, level_5, level_6, level_7]) } diff --git a/src/cloud_provider/scaleway/kubernetes/mod.rs b/src/cloud_provider/scaleway/kubernetes/mod.rs index cc2b1493..ea977e8c 100644 --- a/src/cloud_provider/scaleway/kubernetes/mod.rs +++ b/src/cloud_provider/scaleway/kubernetes/mod.rs @@ -3,7 +3,7 @@ pub mod node; use crate::cloud_provider::aws::regions::AwsZones; use crate::cloud_provider::environment::Environment; -use crate::cloud_provider::helm::deploy_charts_levels; +use crate::cloud_provider::helm::{deploy_charts_levels, ChartInfo}; use crate::cloud_provider::kubernetes::{ is_kubernetes_upgrade_required, send_progress_on_long_task, uninstall_cert_manager, Kind, Kubernetes, KubernetesUpgradeStatus, ProviderOptions, @@ -12,11 +12,11 @@ use crate::cloud_provider::models::{NodeGroups, NodeGroupsFormat}; use crate::cloud_provider::qovery::EngineLocation; use crate::cloud_provider::scaleway::application::ScwZone; use crate::cloud_provider::scaleway::kubernetes::helm_charts::{scw_helm_charts, ChartsConfigPrerequisites}; -use crate::cloud_provider::scaleway::kubernetes::node::ScwInstancesType; +use crate::cloud_provider::scaleway::kubernetes::node::{ScwInstancesType, ScwNodeGroup}; use crate::cloud_provider::utilities::print_action; use crate::cloud_provider::{kubernetes, CloudProvider}; +use crate::cmd::helm::{to_engine_error, Helm}; use crate::cmd::kubectl::{kubectl_exec_api_custom_metrics, kubectl_exec_get_all_namespaces, kubectl_exec_get_events}; -use crate::cmd::structs::HelmChart; use crate::cmd::terraform::{terraform_exec, terraform_init_validate_plan_apply, terraform_init_validate_state_list}; use crate::deletion_utilities::{get_firsts_namespaces_to_delete, get_qovery_managed_namespaces}; use crate::dns_provider::DnsProvider; @@ -29,18 +29,32 @@ use crate::models::{ }; use crate::object_storage::scaleway_object_storage::{BucketDeleteStrategy, ScalewayOS}; use crate::object_storage::ObjectStorage; +use crate::runtime::block_on; use crate::string::terraform_list_format; use crate::{cmd, dns_provider}; use ::function_name::named; +use reqwest::StatusCode; use retry::delay::{Fibonacci, Fixed}; use retry::Error::Operation; use retry::OperationResult; +use scaleway_api_rs::apis::Error; +use scaleway_api_rs::models::ScalewayK8sV1Cluster; use serde::{Deserialize, Serialize}; use std::env; use std::path::Path; use std::str::FromStr; use tera::Context as TeraContext; +#[derive(PartialEq)] +pub enum ScwNodeGroupErrors { + CloudProviderApiError(CommandError), + ClusterDoesNotExists(CommandError), + MultipleClusterFound, + NoNodePoolFound(CommandError), + MissingNodePoolInfo, + NodeGroupValidationError(CommandError), +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct KapsuleOptions { // Qovery @@ -193,6 +207,210 @@ impl<'a> Kapsule<'a> { }) } + fn get_configuration(&self) -> scaleway_api_rs::apis::configuration::Configuration { + scaleway_api_rs::apis::configuration::Configuration { + api_key: Some(scaleway_api_rs::apis::configuration::ApiKey { + key: self.options.scaleway_secret_key.clone(), + prefix: None, + }), + ..scaleway_api_rs::apis::configuration::Configuration::default() + } + } + + fn get_scw_cluster_info(&self) -> Result, EngineError> { + let event_details = self.get_event_details(Infrastructure(InfrastructureStep::LoadConfiguration)); + + // get cluster info + let cluster_info = match block_on(scaleway_api_rs::apis::clusters_api::list_clusters( + &self.get_configuration(), + self.region().as_str(), + None, + Some(self.options.scaleway_project_id.as_str()), + None, + None, + None, + Some(self.cluster_name().as_str()), + None, + None, + )) { + Ok(x) => x, + Err(e) => { + let msg = format!("wasn't able to retrieve SCW cluster information from the API. {:?}", e); + return Err(EngineError::new_cannot_get_cluster_error( + event_details.clone(), + CommandError::new(msg.clone(), Some(msg)), + )); + } + }; + + // if no cluster exists + let cluster_info_content = cluster_info.clusters.unwrap(); + if &cluster_info_content.len() == &(0 as usize) { + return Ok(None); + } else if &cluster_info_content.len() != &(1 as usize) { + let msg = format!( + "too many clusters found with this name, where 1 was expected. {:?}", + &cluster_info_content.len() + ); + return Err(EngineError::new_multiple_cluster_found_expected_one_error( + event_details, + CommandError::new(msg.clone(), Some(msg)), + )); + } + + Ok(Some(cluster_info_content[0].clone())) + } + + fn get_existing_sanitized_node_groups( + &self, + cluster_info: ScalewayK8sV1Cluster, + ) -> Result, ScwNodeGroupErrors> { + let error_cluster_id = format!("expected cluster id for this Scaleway cluster"); + let cluster_id = match cluster_info.id { + None => { + return Err(ScwNodeGroupErrors::NodeGroupValidationError( + CommandError::new_from_safe_message(error_cluster_id), + )) + } + Some(x) => x, + }; + + let pools = match block_on(scaleway_api_rs::apis::pools_api::list_pools( + &self.get_configuration(), + self.region().as_str(), + cluster_id.as_str(), + None, + None, + None, + None, + None, + )) { + Ok(x) => x, + Err(e) => { + let msg = format!("error while trying to get SCW pool info from cluster {}", &cluster_id); + let msg_with_error = format!("{}. {:?}", msg.clone(), e); + return Err(ScwNodeGroupErrors::CloudProviderApiError(CommandError::new( + msg_with_error, + Some(msg), + ))); + } + }; + + // ensure pool are present + if pools.pools.is_none() { + let msg = format!( + "No SCW pool found from the SCW API for cluster {}/{}", + &cluster_id, + &cluster_info.name.unwrap_or("unknown cluster".to_string()) + ); + return Err(ScwNodeGroupErrors::NoNodePoolFound(CommandError::new( + msg.clone(), + Some(msg), + ))); + } + + // create sanitized nodegroup pools + let mut nodegroup_pool: Vec = Vec::with_capacity(pools.total_count.unwrap_or(0 as f32) as usize); + for ng in pools.pools.unwrap() { + if ng.id.is_none() { + let msg = format!( + "error while trying to validate SCW pool ID from cluster {}", + &cluster_id + ); + return Err(ScwNodeGroupErrors::NodeGroupValidationError(CommandError::new( + msg.clone(), + Some(msg), + ))); + } + let ng_sanitized = self.get_node_group_info(ng.id.unwrap().as_str())?; + nodegroup_pool.push(ng_sanitized) + } + + Ok(nodegroup_pool) + } + + fn get_node_group_info(&self, pool_id: &str) -> Result { + let pool = match block_on(scaleway_api_rs::apis::pools_api::get_pool( + &self.get_configuration(), + self.region().as_str(), + pool_id, + )) { + Ok(x) => x, + Err(e) => { + return Err(match e { + Error::ResponseError(x) => { + let msg_with_error = format!( + "Error code while getting node group: {}, API message: {} ", + x.status, x.content + ); + match x.status { + StatusCode::NOT_FOUND => ScwNodeGroupErrors::NoNodePoolFound(CommandError::new( + msg_with_error, + Some("No node pool found".to_string()), + )), + _ => ScwNodeGroupErrors::CloudProviderApiError(CommandError::new( + msg_with_error, + Some("Scaleway API error while trying to get node group".to_string()), + )), + } + } + _ => { + let msg = "This Scaleway API error is not supported in the engine, please add it to better support it".to_string(); + ScwNodeGroupErrors::NodeGroupValidationError(CommandError::new(msg.clone(), Some(msg))) + } + }) + } + }; + + // ensure there is no missing info + if let Err(e) = self.check_missing_nodegroup_info(&pool.name, "name") { + return Err(e); + }; + if let Err(e) = self.check_missing_nodegroup_info(&pool.min_size, "min_size") { + return Err(e); + }; + if let Err(e) = self.check_missing_nodegroup_info(&pool.max_size, "max_size") { + return Err(e); + }; + if let Err(e) = self.check_missing_nodegroup_info(&pool.status, "status") { + return Err(e); + }; + + match ScwNodeGroup::new( + pool.id, + pool.name.unwrap(), + pool.min_size.unwrap() as i32, + pool.max_size.unwrap() as i32, + pool.node_type, + pool.size as i32, + pool.status.unwrap(), + ) { + Ok(x) => Ok(x), + Err(e) => Err(ScwNodeGroupErrors::NodeGroupValidationError(e)), + } + } + + fn check_missing_nodegroup_info(&self, item: &Option, name: &str) -> Result<(), ScwNodeGroupErrors> { + let event_details = self.get_event_details(Infrastructure(InfrastructureStep::LoadConfiguration)); + + self.logger.log( + LogLevel::Error, + EngineEvent::Error(EngineError::new_missing_workers_group_info_error( + event_details, + CommandError::new_from_safe_message(format!( + "Missing node pool info {} for cluster {}", + name, + self.context.cluster_id() + )), + )), + ); + + if item.is_none() { + return Err(ScwNodeGroupErrors::MissingNodePoolInfo); + }; + Ok(()) + } + fn kubeconfig_bucket_name(&self) -> String { format!("qovery-kubeconfigs-{}", self.id()) } @@ -256,7 +474,7 @@ impl<'a> Kapsule<'a> { // Kubernetes context.insert("test_cluster", &self.context.is_test_cluster()); context.insert("kubernetes_cluster_id", self.id()); - context.insert("kubernetes_cluster_name", self.name()); + context.insert("kubernetes_cluster_name", self.cluster_name().as_str()); context.insert("kubernetes_cluster_version", self.version()); // Qovery @@ -579,6 +797,192 @@ impl<'a> Kapsule<'a> { return Err(error); } + let cluster_info = self.get_scw_cluster_info()?; + if cluster_info.is_none() { + let msg = "no cluster found from the Scaleway API".to_string(); + return Err(EngineError::new_no_cluster_found_error( + event_details.clone(), + CommandError::new(msg.clone(), Some(msg)), + )); + } + + let current_nodegroups = match self + .get_existing_sanitized_node_groups(cluster_info.expect("A cluster should be present at this create stage")) + { + Ok(x) => x, + Err(e) => { + match e { + ScwNodeGroupErrors::CloudProviderApiError(c) => { + return Err(EngineError::new_missing_api_info_from_cloud_provider_error( + event_details.clone(), + Some(c), + )) + } + ScwNodeGroupErrors::ClusterDoesNotExists(_) => self.logger().log( + LogLevel::Info, + EngineEvent::Deploying( + event_details.clone(), + EventMessage::new_from_safe( + "cluster do not exists, no node groups can be retrieved for upgrade check".to_string(), + ), + ), + ), + ScwNodeGroupErrors::MultipleClusterFound => { + let msg = "multiple clusters found, can't match the correct node groups".to_string(); + return Err(EngineError::new_multiple_cluster_found_expected_one_error( + event_details.clone(), + CommandError::new(msg.clone(), Some(msg)), + )); + } + ScwNodeGroupErrors::NoNodePoolFound(_) => self.logger().log( + LogLevel::Info, + EngineEvent::Deploying( + event_details.clone(), + EventMessage::new_from_safe( + "cluster exists, but no node groups found for upgrade check".to_string(), + ), + ), + ), + ScwNodeGroupErrors::MissingNodePoolInfo => { + let msg = format!("Error with Scaleway API while trying to retrieve node pool info"); + return Err(EngineError::new_missing_api_info_from_cloud_provider_error( + event_details.clone(), + Some(CommandError::new_from_safe_message(msg)), + )); + } + ScwNodeGroupErrors::NodeGroupValidationError(c) => { + return Err(EngineError::new_missing_api_info_from_cloud_provider_error( + event_details.clone(), + Some(c), + )); + } + }; + Vec::with_capacity(0) + } + }; + + // ensure all node groups are in ready state Scaleway side + self.logger.log( + LogLevel::Info, + EngineEvent::Deploying( + event_details.clone(), + EventMessage::new_from_safe( + "ensuring all groups nodes are in ready state from the Scaleway API".to_string(), + ), + ), + ); + + for ng in current_nodegroups { + let res = retry::retry( + // retry 10 min max per nodegroup until they are ready + Fixed::from_millis(15000).take(40), + || { + self.logger().log( + LogLevel::Info, + EngineEvent::Deploying( + event_details.clone(), + EventMessage::new_from_safe(format!( + "checking node group {}/{:?}, current status: {:?}", + &ng.name, + &ng.id.as_ref().unwrap_or(&"unknown".to_string()), + &ng.status + )), + ), + ); + let pool_id = match &ng.id { + None => { + let msg = + "node group id was expected to get info, but not found from Scaleway API".to_string(); + return OperationResult::Retry( + EngineError::new_missing_api_info_from_cloud_provider_error( + event_details.clone(), + Some(CommandError::new_from_safe_message(msg)), + ), + ); + } + Some(x) => x, + }; + let scw_ng = match self.get_node_group_info(pool_id.as_str()) { + Ok(x) => x, + Err(e) => { + return match e { + ScwNodeGroupErrors::CloudProviderApiError(c) => { + let current_error = EngineError::new_missing_api_info_from_cloud_provider_error( + event_details.clone(), + Some(c), + ); + self.logger + .log(LogLevel::Error, EngineEvent::Error(current_error.clone())); + OperationResult::Retry(current_error) + } + ScwNodeGroupErrors::ClusterDoesNotExists(c) => { + let current_error = + EngineError::new_no_cluster_found_error(event_details.clone(), c); + self.logger + .log(LogLevel::Error, EngineEvent::Error(current_error.clone())); + OperationResult::Retry(current_error) + } + ScwNodeGroupErrors::MultipleClusterFound => { + OperationResult::Retry(EngineError::new_multiple_cluster_found_expected_one_error( + event_details.clone(), + CommandError::new_from_safe_message( + "Multiple cluster found while one was expected".to_string(), + ), + )) + } + ScwNodeGroupErrors::NoNodePoolFound(_) => OperationResult::Ok(()), + ScwNodeGroupErrors::MissingNodePoolInfo => { + OperationResult::Retry(EngineError::new_missing_api_info_from_cloud_provider_error( + event_details.clone(), + None, + )) + } + ScwNodeGroupErrors::NodeGroupValidationError(c) => { + let current_error = EngineError::new_missing_api_info_from_cloud_provider_error( + event_details.clone(), + Some(c), + ); + self.logger + .log(LogLevel::Error, EngineEvent::Error(current_error.clone())); + OperationResult::Retry(current_error) + } + } + } + }; + match scw_ng.status == scaleway_api_rs::models::scaleway_k8s_v1_pool::Status::Ready { + true => OperationResult::Ok(()), + false => OperationResult::Retry(EngineError::new_k8s_node_not_ready( + event_details.clone(), + CommandError::new_from_safe_message(format!( + "waiting for node group {} to be ready, current status: {:?}", + &scw_ng.name, scw_ng.status + )), + )), + } + }, + ); + match res { + Ok(_) => {} + Err(Operation { error, .. }) => return Err(error), + Err(retry::Error::Internal(msg)) => { + return Err(EngineError::new_k8s_node_not_ready( + event_details.clone(), + CommandError::new(msg, Some("Waiting for too long worker nodes to be ready".to_string())), + )) + } + } + } + self.logger.log( + LogLevel::Info, + EngineEvent::Deploying( + event_details.clone(), + EventMessage::new_from_safe( + "all node groups for this cluster are ready from cloud provider API".to_string(), + ), + ), + ); + + // ensure all nodes are ready on Kubernetes match self.check_workers_on_create() { Ok(_) => { self.send_to_customer( @@ -793,7 +1197,10 @@ impl<'a> Kapsule<'a> { }; if tf_workers_resources.is_empty() { - return Err(EngineError::new_cluster_has_no_worker_nodes(event_details.clone())); + return Err(EngineError::new_cluster_has_no_worker_nodes( + event_details.clone(), + None, + )); } let kubernetes_config_file_path = self.get_kubeconfig_file_path()?; @@ -1090,15 +1497,14 @@ impl<'a> Kapsule<'a> { ); // delete custom metrics api to avoid stale namespaces on deletion - let _ = cmd::helm::helm_uninstall_list( + let helm = Helm::new( &kubernetes_config_file_path, - vec![HelmChart { - name: "metrics-server".to_string(), - namespace: "kube-system".to_string(), - version: None, - }], - self.cloud_provider().credentials_environment_variables(), - ); + &self.cloud_provider.credentials_environment_variables(), + ) + .map_err(|e| to_engine_error(&event_details, e))?; + let chart = ChartInfo::new_from_release_name("metrics-server", "kube-system"); + helm.uninstall(&chart, &vec![]) + .map_err(|e| to_engine_error(&event_details, e))?; // required to avoid namespace stuck on deletion uninstall_cert_manager( @@ -1118,50 +1524,27 @@ impl<'a> Kapsule<'a> { let qovery_namespaces = get_qovery_managed_namespaces(); for qovery_namespace in qovery_namespaces.iter() { - let charts_to_delete = cmd::helm::helm_list( - &kubernetes_config_file_path, - self.cloud_provider().credentials_environment_variables(), - Some(qovery_namespace), - ); - match charts_to_delete { - Ok(charts) => { - for chart in charts { - match cmd::helm::helm_exec_uninstall( - &kubernetes_config_file_path, - &chart.namespace, - &chart.name, - self.cloud_provider().credentials_environment_variables(), - ) { - Ok(_) => self.logger().log( - LogLevel::Info, - EngineEvent::Deleting( - event_details.clone(), - EventMessage::new_from_safe(format!("Chart `{}` deleted", chart.name)), - ), - ), - Err(e) => { - let message_safe = format!("Can't delete chart `{}`", chart.name); - self.logger().log( - LogLevel::Error, - EngineEvent::Deleting( - event_details.clone(), - EventMessage::new(message_safe, Some(e.message())), - ), - ) - } - } - } - } - Err(e) => { - if !(e.message().contains("not found")) { + let charts_to_delete = helm + .list_release(Some(qovery_namespace), &vec![]) + .map_err(|e| to_engine_error(&event_details, e))?; + + for chart in charts_to_delete { + let chart_info = ChartInfo::new_from_release_name(&chart.name, &chart.namespace); + match helm.uninstall(&chart_info, &vec![]) { + Ok(_) => self.logger().log( + LogLevel::Info, + EngineEvent::Deleting( + event_details.clone(), + EventMessage::new_from_safe(format!("Chart `{}` deleted", chart.name)), + ), + ), + Err(e) => { + let message_safe = format!("Can't delete chart `{}`", chart.name); self.logger().log( LogLevel::Error, EngineEvent::Deleting( event_details.clone(), - EventMessage::new_from_safe(format!( - "Can't delete the namespace {}", - qovery_namespace - )), + EventMessage::new(message_safe, Some(e.to_string())), ), ) } @@ -1216,18 +1599,11 @@ impl<'a> Kapsule<'a> { ), ); - match cmd::helm::helm_list( - &kubernetes_config_file_path, - self.cloud_provider().credentials_environment_variables(), - None, - ) { + match helm.list_release(None, &vec![]) { Ok(helm_charts) => { for chart in helm_charts { - match cmd::helm::helm_uninstall_list( - &kubernetes_config_file_path, - vec![chart.clone()], - self.cloud_provider().credentials_environment_variables(), - ) { + let chart_info = ChartInfo::new_from_release_name(&chart.name, &chart.namespace); + match helm.uninstall(&chart_info, &vec![]) { Ok(_) => self.logger().log( LogLevel::Info, EngineEvent::Deleting( @@ -1236,12 +1612,12 @@ impl<'a> Kapsule<'a> { ), ), Err(e) => { - let message_safe = format!("Error deleting chart `{}` deleted", chart.name); + let message_safe = format!("Error deleting chart `{}`: {}", chart.name, e); self.logger().log( LogLevel::Error, EngineEvent::Deleting( event_details.clone(), - EventMessage::new(message_safe, e.message), + EventMessage::new(message_safe, Some(e.to_string())), ), ) } @@ -1254,7 +1630,7 @@ impl<'a> Kapsule<'a> { LogLevel::Error, EngineEvent::Deleting( event_details.clone(), - EventMessage::new(message_safe.to_string(), Some(e.message())), + EventMessage::new(message_safe.to_string(), Some(e.to_string())), ), ) } diff --git a/src/cloud_provider/scaleway/kubernetes/node.rs b/src/cloud_provider/scaleway/kubernetes/node.rs index e1da625d..e1d85bd5 100644 --- a/src/cloud_provider/scaleway/kubernetes/node.rs +++ b/src/cloud_provider/scaleway/kubernetes/node.rs @@ -88,6 +88,47 @@ impl FromStr for ScwInstancesType { } } +#[derive(Clone)] +pub struct ScwNodeGroup { + pub name: String, + pub id: Option, + pub min_nodes: i32, + pub max_nodes: i32, + pub instance_type: String, + pub disk_size_in_gib: i32, + pub status: scaleway_api_rs::models::scaleway_k8s_v1_pool::Status, +} + +impl ScwNodeGroup { + pub fn new( + id: Option, + group_name: String, + min_nodes: i32, + max_nodes: i32, + instance_type: String, + disk_size_in_gib: i32, + status: scaleway_api_rs::models::scaleway_k8s_v1_pool::Status, + ) -> Result { + if min_nodes > max_nodes { + let msg = format!( + "The number of minimum nodes ({}) for group name {} is higher than maximum nodes ({})", + &group_name, &min_nodes, &max_nodes + ); + return Err(CommandError::new_from_safe_message(msg)); + } + + Ok(ScwNodeGroup { + name: group_name, + id, + min_nodes, + max_nodes, + instance_type, + disk_size_in_gib, + status, + }) + } +} + #[cfg(test)] mod tests { #[cfg(test)] @@ -104,6 +145,7 @@ mod tests { NodeGroups::new("".to_string(), 2, 2, "dev1-l".to_string(), 20).unwrap(), NodeGroups { name: "".to_string(), + id: None, min_nodes: 2, max_nodes: 2, instance_type: "dev1-l".to_string(), diff --git a/src/cloud_provider/scaleway/router.rs b/src/cloud_provider/scaleway/router.rs index 3769b7c1..53a60057 100644 --- a/src/cloud_provider/scaleway/router.rs +++ b/src/cloud_provider/scaleway/router.rs @@ -1,5 +1,6 @@ use tera::Context as TeraContext; +use crate::cloud_provider::helm::ChartInfo; use crate::cloud_provider::models::{CustomDomain, CustomDomainDataTemplate, Route, RouteDataTemplate}; use crate::cloud_provider::service::{ default_tera_context, delete_router, deploy_stateless_service_error, send_progress_on_long_task, Action, Create, @@ -7,8 +8,9 @@ use crate::cloud_provider::service::{ }; use crate::cloud_provider::utilities::{check_cname_for, print_action, sanitize_name}; use crate::cloud_provider::DeploymentTarget; +use crate::cmd::helm; use crate::cmd::helm::Timeout; -use crate::error::{EngineError, EngineErrorCause, EngineErrorScope}; +use crate::error::{EngineError, EngineErrorScope}; use crate::errors::EngineError as NewEngineError; use crate::events::{EnvironmentStep, Stage, ToTransmitter, Transmitter}; use crate::models::{Context, Listen, Listener, Listeners}; @@ -293,25 +295,26 @@ impl Create for Router { } // do exec helm upgrade and return the last deployment status - let helm_history_row = crate::cmd::helm::helm_exec_with_upgrade_history( - kubernetes_config_file_path.as_str(), - environment.namespace(), - helm_release_name.as_str(), - self.selector(), - workspace_dir.as_str(), - self.start_timeout(), - kubernetes.cloud_provider().credentials_environment_variables(), - self.service_type(), + let helm = helm::Helm::new( + &kubernetes_config_file_path, + &kubernetes.cloud_provider().credentials_environment_variables(), ) - .map_err(|e| { - NewEngineError::new_helm_charts_upgrade_error(event_details.clone(), e).to_legacy_engine_error() - })?; + .map_err(|e| helm::to_engine_error(&event_details, e).to_legacy_engine_error())?; + let chart = ChartInfo::new_from_custom_namespace( + helm_release_name, + workspace_dir.clone(), + environment.namespace().to_string(), + 600_i64, + match self.service_type() { + ServiceType::Database(_) => vec![format!("{}/q-values.yaml", &workspace_dir)], + _ => vec![], + }, + false, + self.selector(), + ); - if helm_history_row.is_none() || !helm_history_row.unwrap().is_successfully_deployed() { - return Err(self.engine_error(EngineErrorCause::Internal, "Router has failed to be deployed".into())); - } - - Ok(()) + helm.upgrade(&chart, &vec![]) + .map_err(|e| helm::to_engine_error(&event_details, e).to_legacy_engine_error()) } fn on_create_check(&self) -> Result<(), EngineError> { diff --git a/src/cloud_provider/service.rs b/src/cloud_provider/service.rs index 0384eb2f..e0606f44 100644 --- a/src/cloud_provider/service.rs +++ b/src/cloud_provider/service.rs @@ -8,9 +8,12 @@ use tera::Context as TeraContext; use crate::build_platform::Image; use crate::cloud_provider::environment::Environment; +use crate::cloud_provider::helm::ChartInfo; use crate::cloud_provider::kubernetes::Kubernetes; use crate::cloud_provider::utilities::check_domain_for; use crate::cloud_provider::DeploymentTarget; +use crate::cmd; +use crate::cmd::helm; use crate::cmd::helm::Timeout; use crate::cmd::kubectl::ScalingKind::Statefulset; use crate::cmd::kubectl::{kubectl_exec_delete_secret, kubectl_exec_scale_replicas_by_selector, ScalingKind}; @@ -365,30 +368,11 @@ pub fn deploy_user_stateless_service(target: &DeploymentTarget, service: &T) where T: Service + Helm, { - deploy_stateless_service( - target, - service, - service.engine_error( - EngineErrorCause::User( - "Your application has failed to start. \ - Ensure you can run it without issues with `qovery run` and check its logs from the web interface or the CLI with `qovery log`. \ - This issue often occurs due to ports misconfiguration. Make sure you exposed the correct port (using EXPOSE statement in Dockerfile or via Qovery configuration).", - ), - format!( - "{} {} has failed to start ⤬", - service.service_type().name(), - service.name_with_id() - ), - ), - ) + deploy_stateless_service(target, service) } /// deploy a stateless service (app, router, database...) on Kubernetes -pub fn deploy_stateless_service( - target: &DeploymentTarget, - service: &T, - thrown_error: EngineError, -) -> Result<(), EngineError> +pub fn deploy_stateless_service(target: &DeploymentTarget, service: &T) -> Result<(), EngineError> where T: Service + Helm, { @@ -441,26 +425,26 @@ where })?; // do exec helm upgrade and return the last deployment status - let helm_history_row = crate::cmd::helm::helm_exec_with_upgrade_history( - kubernetes_config_file_path.as_str(), - environment.namespace(), - helm_release_name.as_str(), - service.selector(), - workspace_dir.as_str(), - service.start_timeout(), - kubernetes.cloud_provider().credentials_environment_variables(), - service.service_type(), + let helm = helm::Helm::new( + &kubernetes_config_file_path, + &kubernetes.cloud_provider().credentials_environment_variables(), ) - .map_err(|e| NewEngineError::new_helm_charts_upgrade_error(event_details.clone(), e).to_legacy_engine_error())?; + .map_err(|e| helm::to_engine_error(&event_details, e).to_legacy_engine_error())?; + let chart = ChartInfo::new_from_custom_namespace( + helm_release_name, + workspace_dir.clone(), + environment.namespace().to_string(), + 600_i64, + match service.service_type() { + ServiceType::Database(_) => vec![format!("{}/q-values.yaml", &workspace_dir)], + _ => vec![], + }, + false, + service.selector(), + ); - // check deployment status - if helm_history_row.is_none() - || !helm_history_row - .expect("Error getting helm history row") - .is_successfully_deployed() - { - return Err(thrown_error); - } + helm.upgrade(&chart, &vec![]) + .map_err(|e| helm::to_engine_error(&event_details, e).to_legacy_engine_error())?; crate::cmd::kubectl::kubectl_exec_is_pod_ready_with_retry( kubernetes_config_file_path.as_str(), @@ -482,48 +466,12 @@ where } /// do specific operations on a stateless service deployment error -pub fn deploy_stateless_service_error(target: &DeploymentTarget, service: &T) -> Result<(), EngineError> +pub fn deploy_stateless_service_error(_target: &DeploymentTarget, _service: &T) -> Result<(), EngineError> where T: Service + Helm, { - let kubernetes = target.kubernetes; - let environment = target.environment; - let helm_release_name = service.helm_release_name(); - let event_details = service.get_event_details(Stage::Environment(EnvironmentStep::Deploy)); - let kubernetes_config_file_path = match kubernetes.get_kubeconfig_file_path() { - Ok(path) => path, - Err(e) => return Err(e.to_legacy_engine_error()), - }; - - let history_rows = crate::cmd::helm::helm_exec_history( - kubernetes_config_file_path.as_str(), - environment.namespace(), - helm_release_name.as_str(), - &kubernetes.cloud_provider().credentials_environment_variables(), - ) - .map_err(|e| { - NewEngineError::new_helm_chart_history_error( - event_details.clone(), - helm_release_name.to_string(), - environment.namespace().to_string(), - e, - ) - .to_legacy_engine_error() - })?; - - if history_rows.len() == 1 { - crate::cmd::helm::helm_exec_uninstall( - kubernetes_config_file_path.as_str(), - environment.namespace(), - helm_release_name.as_str(), - kubernetes.cloud_provider().credentials_environment_variables(), - ) - .map_err(|e| { - NewEngineError::new_helm_chart_uninstall_error(event_details.clone(), helm_release_name.to_string(), e) - .to_legacy_engine_error() - })?; - } - + // Nothing to do as we sait --atomic on chart release that we do + // So helm rollback for us if a deployment fails Ok(()) } @@ -789,30 +737,26 @@ where })?; // do exec helm upgrade and return the last deployment status - let helm_history_row = crate::cmd::helm::helm_exec_with_upgrade_history( - kubernetes_config_file_path.to_string(), - environment.namespace(), - service.helm_release_name().as_str(), - service.selector(), - workspace_dir.to_string(), - service.start_timeout(), - kubernetes.cloud_provider().credentials_environment_variables(), - service.service_type(), + let helm = helm::Helm::new( + &kubernetes_config_file_path, + &kubernetes.cloud_provider().credentials_environment_variables(), ) - .map_err(|e| { - NewEngineError::new_helm_charts_upgrade_error(event_details.clone(), e).to_legacy_engine_error() - })?; + .map_err(|e| helm::to_engine_error(&event_details, e).to_legacy_engine_error())?; + let chart = ChartInfo::new_from_custom_namespace( + service.helm_release_name(), + workspace_dir.clone(), + environment.namespace().to_string(), + 600_i64, + match service.service_type() { + ServiceType::Database(_) => vec![format!("{}/q-values.yaml", &workspace_dir)], + _ => vec![], + }, + false, + service.selector(), + ); - // check deployment status - if helm_history_row.is_none() || !helm_history_row.unwrap().is_successfully_deployed() { - return Err(service.engine_error( - EngineErrorCause::Internal, - format!( - "{} service fails to be deployed (before start)", - service.service_type().name() - ), - )); - } + helm.upgrade(&chart, &vec![]) + .map_err(|e| helm::to_engine_error(&event_details, e).to_legacy_engine_error())?; // check app status match crate::cmd::kubectl::kubectl_exec_is_pod_ready_with_retry( @@ -1306,34 +1250,15 @@ pub fn helm_uninstall_release( .get_kubeconfig_file_path() .map_err(|e| e.to_legacy_engine_error())?; - let history_rows = crate::cmd::helm::helm_exec_history( - kubernetes_config_file_path.as_str(), - environment.namespace(), - helm_release_name, + let helm = cmd::helm::Helm::new( + &kubernetes_config_file_path, &kubernetes.cloud_provider().credentials_environment_variables(), ) - .map_err(|e| { - NewEngineError::new_k8s_history(event_details.clone(), environment.namespace().to_string(), e) - .to_legacy_engine_error() - })?; + .map_err(|e| NewEngineError::new_helm_error(event_details.clone(), e).to_legacy_engine_error())?; - // if there is no valid history - then delete the helm chart - let first_valid_history_row = history_rows.iter().find(|x| x.is_successfully_deployed()); - - if first_valid_history_row.is_some() { - crate::cmd::helm::helm_exec_uninstall( - kubernetes_config_file_path.as_str(), - environment.namespace(), - helm_release_name, - kubernetes.cloud_provider().credentials_environment_variables(), - ) - .map_err(|e| { - NewEngineError::new_helm_chart_uninstall_error(event_details.clone(), helm_release_name.to_string(), e) - .to_legacy_engine_error() - })?; - } - - Ok(()) + let chart = ChartInfo::new_from_release_name(helm_release_name, environment.namespace()); + helm.uninstall(&chart, &vec![]) + .map_err(|e| NewEngineError::new_helm_error(event_details.clone(), e).to_legacy_engine_error()) } /// This function call (start|pause|delete)_in_progress function every 10 seconds when a diff --git a/src/cloud_provider/utilities.rs b/src/cloud_provider/utilities.rs index 1407b3c3..c21bc489 100644 --- a/src/cloud_provider/utilities.rs +++ b/src/cloud_provider/utilities.rs @@ -319,16 +319,23 @@ impl fmt::Display for VersionsNumber { } } -fn cloudflare_dns_resolver() -> Resolver { +fn google_dns_resolver() -> Resolver { let mut resolver_options = ResolverOpts::default(); // We want to avoid cache and using host file of the host, as some provider force caching // which lead to stale response resolver_options.cache_size = 0; - resolver_options.use_hosts_file = false; + resolver_options.use_hosts_file = true; + //resolver_options.ip_strategy = LookupIpStrategy::Ipv4Only; + //let dns = IpAddr::V4(Ipv4Addr::new(192, 168, 1, 254)); + //let resolver = ResolverConfig::from_parts( + // None, + // vec![], + // NameServerConfigGroup::from_ips_clear(&vec![dns], 53, true), + //); - Resolver::new(ResolverConfig::cloudflare(), resolver_options) - .expect("Invalid cloudflare DNS resolver configuration") + //Resolver::new(resolver, resolver_options).unwrap() + Resolver::new(ResolverConfig::google(), resolver_options).expect("Invalid google DNS resolver configuration") } fn get_cname_record_value(resolver: &Resolver, cname: &str) -> Option { @@ -352,7 +359,7 @@ pub fn check_cname_for( cname_to_check: &str, execution_id: &str, ) -> Result { - let resolver = cloudflare_dns_resolver(); + let resolver = google_dns_resolver(); let listener_helper = ListenersHelper::new(listeners); let send_deployment_progress = |msg: &str| { @@ -420,7 +427,7 @@ pub fn check_domain_for( execution_id: &str, context_id: &str, ) -> Result<(), EngineError> { - let resolver = cloudflare_dns_resolver(); + let resolver = google_dns_resolver(); for domain in domains_to_check { listener_helper.deployment_in_progress(ProgressInfo::new( @@ -578,7 +585,7 @@ pub fn print_action(cloud_provider_name: &str, struct_name: &str, fn_name: &str, mod tests { use crate::cloud_provider::models::CpuLimits; use crate::cloud_provider::utilities::{ - cloudflare_dns_resolver, convert_k8s_cpu_value_to_f32, get_cname_record_value, + convert_k8s_cpu_value_to_f32, get_cname_record_value, google_dns_resolver, validate_k8s_required_cpu_and_burstable, VersionsNumber, }; use crate::error::StringError; @@ -626,7 +633,7 @@ mod tests { #[test] pub fn test_cname_resolution() { - let resolver = cloudflare_dns_resolver(); + let resolver = google_dns_resolver(); let cname = get_cname_record_value(&resolver, "ci-test-no-delete.qovery.io"); assert_eq!(cname, Some(String::from("qovery.io."))); diff --git a/src/cmd/helm.rs b/src/cmd/helm.rs index 7a59f1ce..c118d829 100644 --- a/src/cmd/helm.rs +++ b/src/cmd/helm.rs @@ -1,27 +1,23 @@ use std::io::{Error, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; -use tracing::{error, info, span, Level}; +use tracing::{error, info}; -use crate::cloud_provider::helm::{deploy_charts_levels, ChartInfo, CommonChart}; -use crate::cloud_provider::service::ServiceType; -use crate::cmd::helm::HelmLockErrors::{IncorrectFormatDate, NotYetExpired, ParsingError}; -use crate::cmd::kubectl::{kubectl_exec_delete_secret, kubectl_exec_get_secrets}; -use crate::cmd::structs::{HelmChart, HelmHistoryRow, HelmListItem, Secrets}; +use crate::cloud_provider::helm::ChartInfo; +use crate::cmd::helm::HelmCommand::{LIST, ROLLBACK, STATUS, UNINSTALL, UPGRADE}; +use crate::cmd::helm::HelmError::{CannotRollback, CmdError, InvalidKubeConfig, ReleaseDoesNotExist}; +use crate::cmd::structs::{HelmChart, HelmListItem}; use crate::cmd::utilities::QoveryCommand; -use crate::error::{SimpleError, SimpleErrorKind}; -use crate::errors::CommandError; -use chrono::{DateTime, Duration, Utc}; -use core::time; -use retry::delay::Fixed; -use retry::Error::Operation; -use retry::OperationResult; +use crate::errors::{CommandError, EngineError}; +use crate::events::EventDetails; +use chrono::Duration; use semver::Version; +use serde_derive::Deserialize; use std::fs::File; use std::str::FromStr; -use std::thread; -const HELM_DEFAULT_TIMEOUT_IN_SECONDS: u32 = 300; +const HELM_DEFAULT_TIMEOUT_IN_SECONDS: u32 = 600; +const HELM_MAX_HISTORY: &str = "50"; pub enum Timeout { Default, @@ -29,7 +25,7 @@ pub enum Timeout { } impl Timeout { - fn value(&self) -> u32 { + pub fn value(&self) -> u32 { match *self { Timeout::Default => HELM_DEFAULT_TIMEOUT_IN_SECONDS, Timeout::Value(t) => t, @@ -37,812 +33,505 @@ impl Timeout { } } -pub fn helm_exec_with_upgrade_history

( - kubernetes_config: P, - namespace: &str, - release_name: &str, - selector: Option, - chart_root_dir: P, - timeout: Timeout, - envs: Vec<(&str, &str)>, - service_type: ServiceType, -) -> Result, CommandError> -where - P: AsRef, -{ - // do exec helm upgrade - info!( - "exec helm upgrade for namespace {} and chart {}", - &namespace, - chart_root_dir.as_ref().to_str().unwrap() - ); +#[derive(thiserror::Error, Debug)] +pub enum HelmError { + #[error("Kubernetes config file path is not valid or does not exist: {0}")] + InvalidKubeConfig(PathBuf), - let path = match chart_root_dir.as_ref().to_str().is_some() { - true => chart_root_dir.as_ref().to_str().unwrap(), - false => "", - } - .to_string(); + #[error("Requested Helm release `{0}` does not exist")] + ReleaseDoesNotExist(String), - let current_chart = CommonChart { - chart_info: ChartInfo::new_from_custom_namespace( - release_name.to_string(), - path.clone(), - namespace.to_string(), - timeout.value() as i64, - match service_type { - ServiceType::Database(_) => vec![format!("{}/q-values.yaml", path)], - _ => vec![], - }, - false, - selector, - ), - }; + #[error("Requested Helm release `{0}` is under an helm lock. Ensure release is de-locked before going further")] + ReleaseLocked(String), - let environment_variables: Vec<(String, String)> = - envs.iter().map(|x| (x.0.to_string(), x.1.to_string())).collect(); + #[error("Helm release `{0}` during helm {1:?} has been rollbacked")] + Rollbacked(String, HelmCommand), - deploy_charts_levels( - kubernetes_config.as_ref(), - &environment_variables, - vec![vec![Box::new(current_chart)]], - false, - )?; + #[error("Helm release `{0}` cannot be rollbacked due to be at revision 1")] + CannotRollback(String), - // list helm history - info!( - "exec helm history for namespace {} and chart {}", - namespace, - chart_root_dir.as_ref().to_str().unwrap() - ); + #[error("Helm timed out for release `{0}` during helm {1:?}: {2}")] + Timeout(String, HelmCommand, String), - let helm_history_rows = helm_exec_history(kubernetes_config.as_ref(), namespace, release_name, &envs)?; - - // take the last deployment from helm history - or return none if there is no history - Ok(helm_history_rows - .first() - .map(|helm_history_row| helm_history_row.clone())) + #[error("Helm command `{1:?}` for release {0} terminated with an error: {2:?}")] + CmdError(String, HelmCommand, CommandError), } -pub fn helm_destroy_chart_if_breaking_changes_version_detected( - kubernetes_config: &Path, - environment_variables: &Vec<(&str, &str)>, - chart_info: &ChartInfo, -) -> Result<(), CommandError> { - // If there is a breaking version set for the current helm chart, - // then we compare this breaking version with the currently installed version if any. - // If current installed version is older than breaking change one, then we delete - // the chart before applying it. - if let Some(breaking_version) = &chart_info.last_breaking_version_requiring_restart { - let chart_namespace = chart_info.get_namespace_string(); - if let Some(installed_version) = helm_get_chart_version( +#[derive(Debug)] +pub struct Helm { + kubernetes_config: PathBuf, + common_envs: Vec<(String, String)>, +} + +#[derive(Debug, Clone, Copy)] +pub enum HelmCommand { + ROLLBACK, + STATUS, + UPGRADE, + UNINSTALL, + LIST, + DIFF, +} + +#[derive(Debug, Clone, Deserialize, Default)] +pub struct ReleaseInfo { + // https://github.com/helm/helm/blob/12f1bc0acdeb675a8c50a78462ed3917fb7b2e37/pkg/release/status.go + status: String, +} + +#[derive(Debug, Clone, Deserialize, Default)] +pub struct ReleaseStatus { + pub version: u64, + pub info: ReleaseInfo, +} + +impl ReleaseStatus { + fn is_locked(&self) -> bool { + self.info.status.starts_with("pending-") + } +} + +impl Helm { + fn get_all_envs<'a>(&'a self, envs: &'a [(&'a str, &'a str)]) -> Vec<(&'a str, &'a str)> { + let mut all_envs: Vec<(&str, &str)> = self.common_envs.iter().map(|(k, v)| (k.as_str(), v.as_str())).collect(); + all_envs.append(&mut envs.to_vec()); + + all_envs + } + + pub fn new>(kubernetes_config: P, common_envs: &[(&str, &str)]) -> Result { + // Check kube config file is valid + let kubernetes_config = kubernetes_config.as_ref().to_path_buf(); + if !kubernetes_config.exists() || !kubernetes_config.is_file() { + return Err(InvalidKubeConfig(kubernetes_config)); + } + + Ok(Helm { kubernetes_config, - environment_variables.to_owned(), - Some(chart_namespace.as_str()), - chart_info.name.clone(), + common_envs: common_envs + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(), + }) + } + + pub fn check_release_exist(&self, chart: &ChartInfo, envs: &[(&str, &str)]) -> Result { + let namespace = chart.get_namespace_string(); + let args = vec![ + "status", + &chart.name, + "--kubeconfig", + self.kubernetes_config.to_str().unwrap_or_default(), + "--namespace", + &namespace, + "-o", + "json", + ]; + + let mut stdout = String::new(); + let mut stderr = String::new(); + match helm_exec_with_output( + &args, + &self.get_all_envs(envs), + |line| stdout.push_str(&line), + |line| stderr.push_str(&line), ) { - if installed_version.le(breaking_version) { - return helm_exec_uninstall( - kubernetes_config, - chart_namespace.as_str(), - chart_info.name.as_str(), - environment_variables.to_owned(), - ); + Err(_) if stderr.contains("release: not found") => Err(ReleaseDoesNotExist(chart.name.clone())), + Err(err) => { + stderr.push_str(&err.message()); + let error = CommandError::new(stderr, err.message_safe()); + Err(CmdError(chart.name.clone(), STATUS, error)) + } + Ok(_) => { + let status: ReleaseStatus = serde_json::from_str(&stdout).unwrap_or_default(); + Ok(status) } } } - Ok(()) -} + pub fn rollback(&self, chart: &ChartInfo, envs: &[(&str, &str)]) -> Result<(), HelmError> { + if self.check_release_exist(chart, envs)?.version <= 1 { + return Err(CannotRollback(chart.name.clone())); + } -pub fn helm_exec_upgrade_with_chart_info

( - kubernetes_config: P, - envs: &Vec<(&str, &str)>, - chart: &ChartInfo, -) -> Result<(), CommandError> -where - P: AsRef, -{ - let debug = false; - let timeout_string = format!("{}s", &chart.timeout_in_seconds); + let timeout = format!("{}s", &chart.timeout_in_seconds); + let namespace = chart.get_namespace_string(); + let args = vec![ + "rollback", + &chart.name, + "--kubeconfig", + self.kubernetes_config.to_str().unwrap_or_default(), + "--namespace", + &namespace, + "--timeout", + &timeout, + "--history-max", + HELM_MAX_HISTORY, + "--cleanup-on-fail", + "--force", + "--wait", + ]; - let mut args_string: Vec = vec![ - "upgrade", - "--kubeconfig", - kubernetes_config.as_ref().to_str().unwrap(), - "--create-namespace", - "--install", - "--timeout", - timeout_string.as_str(), - "--history-max", - "50", - "--namespace", - chart.get_namespace_string().as_str(), - ] - .into_iter() - .map(|x| x.to_string()) - .collect(); - - if debug { - args_string.push("-o".to_string()); - args_string.push("json".to_string()); - } - // warn: don't add debug or json output won't work - if chart.atomic { - args_string.push("--atomic".to_string()) - } - if chart.force_upgrade { - args_string.push("--force".to_string()) - } - if chart.dry_run { - args_string.push("--dry-run".to_string()) - } - if chart.wait { - args_string.push("--wait".to_string()) + let mut stderr = String::new(); + match helm_exec_with_output(&args, &self.get_all_envs(envs), |_| {}, |line| stderr.push_str(&line)) { + Err(err) => { + stderr.push_str(&err.message()); + let error = CommandError::new(stderr, err.message_safe()); + Err(CmdError(chart.name.clone(), ROLLBACK, error)) + } + Ok(_) => Ok(()), + } } - // overrides and files overrides - for value in &chart.values { - args_string.push("--set".to_string()); - args_string.push(format!("{}={}", value.key, value.value)); - } - for value_file in &chart.values_files { - args_string.push("-f".to_string()); - args_string.push(value_file.clone()); - } - for value_file in &chart.yaml_files_content { - let file_path = format!("{}/{}", chart.path, &value_file.filename); - let file_create = || -> Result<(), Error> { - let mut file = File::create(&file_path)?; - file.write_all(value_file.yaml_content.as_bytes())?; - Ok(()) - }; - // no need to validate yaml as it will be done by helm - if let Err(e) = file_create() { - let safe_message = format!("Error while writing yaml content to file `{}`", &file_path); - return Err(CommandError::new( - format!( - "{}\nContent\n{}\nError: {}", - safe_message.to_string(), - value_file.yaml_content, - e - ), - Some(safe_message.to_string()), - )); - }; + pub fn uninstall(&self, chart: &ChartInfo, envs: &[(&str, &str)]) -> Result<(), HelmError> { + // If the release does not exist, we do not return an error + match self.check_release_exist(chart, envs) { + Ok(_) => {} + Err(ReleaseDoesNotExist(_)) => return Ok(()), + Err(err) => return Err(err), + } - args_string.push("-f".to_string()); - args_string.push(file_path.clone()); + let timeout = format!("{}s", &chart.timeout_in_seconds); + let namespace = chart.get_namespace_string(); + let args = vec![ + "uninstall", + &chart.name, + "--kubeconfig", + self.kubernetes_config.to_str().unwrap_or_default(), + "--namespace", + &namespace, + "--timeout", + &timeout, + "--wait", + ]; + + let mut stderr = String::new(); + match helm_exec_with_output(&args, &self.get_all_envs(envs), |_| {}, |line| stderr.push_str(&line)) { + Err(err) => { + stderr.push_str(&err.message()); + let error = CommandError::new(stderr, err.message_safe()); + Err(CmdError(chart.name.clone(), UNINSTALL, error)) + } + Ok(_) => Ok(()), + } } - // add last elements - args_string.push(chart.name.to_string()); - args_string.push(chart.path.to_string()); + fn unlock_release(&self, chart: &ChartInfo, envs: &[(&str, &str)]) -> Result<(), HelmError> { + match self.check_release_exist(chart, envs) { + Ok(release) if release.is_locked() && release.version <= 1 => { + info!("Helm lock detected. Uninstalling it as it is the first version and rollback is not possible"); + self.uninstall(chart, envs)?; + } + Ok(release) if release.is_locked() => { + info!("Helm lock detected. Forcing rollback to previous version"); + self.rollback(chart, envs)?; + } + Ok(release) => { + // Happy path nothing to do + debug!("Helm release status: {:?}", release) + } + Err(_) => {} // Happy path nothing to do + } - let mut json_output_string = String::new(); - let mut error_message = String::new(); + Ok(()) + } - let result = retry::retry(Fixed::from_millis(15000).take(3), || { - let args = args_string.iter().map(|x| x.as_str()).collect(); - let mut helm_error_during_deployment = SimpleError { - kind: SimpleErrorKind::Other, - message: None, - }; - let mut should_clean_helm_lock = false; + /// List deployed helm charts + /// + /// # Arguments + /// + /// * `envs` - environment variables required for kubernetes connection + /// * `namespace` - list charts from a kubernetes namespace or use None to select all namespaces + pub fn list_release(&self, namespace: Option<&str>, envs: &[(&str, &str)]) -> Result, HelmError> { + let mut helm_args = vec![ + "list", + "-a", + "--kubeconfig", + self.kubernetes_config.to_str().unwrap_or_default(), + "-o", + "json", + ]; + match namespace { + Some(ns) => helm_args.append(&mut vec!["-n", ns]), + None => helm_args.push("-A"), + } + let mut output_string: Vec = Vec::with_capacity(20); + if let Err(cmd_error) = helm_exec_with_output( + &helm_args, + &self.get_all_envs(envs), + |line| output_string.push(line), + |line| error!("{}", line), + ) { + return Err(HelmError::CmdError("none".to_string(), LIST, cmd_error)); + } + + let values = serde_json::from_str::>(&output_string.join("")); + let mut helms_charts: Vec = Vec::new(); + + match values { + Ok(all_helms) => { + for helm in all_helms { + let raw_version = helm.chart.replace(format!("{}-", helm.name).as_str(), ""); + let version = Version::from_str(raw_version.as_str()).ok(); + helms_charts.push(HelmChart::new(helm.name, helm.namespace, version)) + } + + Ok(helms_charts) + } + Err(e) => { + let message_safe = "Error while deserializing all helms names"; + Err(HelmError::CmdError( + "none".to_string(), + LIST, + CommandError::new( + format!("{}, error: {}", message_safe, e), + Some(message_safe.to_string()), + ), + )) + } + } + } + + pub fn get_chart_version( + &self, + chart_name: String, + namespace: Option<&str>, + envs: &[(&str, &str)], + ) -> Result, HelmError> { + let deployed_charts = self.list_release(namespace, envs)?; + for chart in deployed_charts { + if chart.name == chart_name { + return Ok(chart.version); + } + } + + // found nothing ;'( + Ok(None) + } + + pub fn upgrade_diff(&self, chart: &ChartInfo, envs: &[(&str, &str)]) -> Result<(), HelmError> { + let mut args_string: Vec = vec![ + "diff".to_string(), + "upgrade".to_string(), + "--kubeconfig".to_string(), + self.kubernetes_config.to_str().unwrap_or_default().to_string(), + "--install".to_string(), + "--namespace".to_string(), + chart.get_namespace_string(), + ]; + + for value in &chart.values { + args_string.push("--set".to_string()); + args_string.push(format!("{}={}", value.key, value.value)); + } + + for value_file in &chart.values_files { + args_string.push("-f".to_string()); + args_string.push(value_file.clone()); + } + + for value_file in &chart.yaml_files_content { + let file_path = format!("{}/{}", chart.path, &value_file.filename); + let file_create = || -> Result<(), Error> { + let mut file = File::create(&file_path)?; + file.write_all(value_file.yaml_content.as_bytes())?; + Ok(()) + }; + + // no need to validate yaml as it will be done by helm + if let Err(e) = file_create() { + let safe_message = format!("Error while writing yaml content to file `{}`", &file_path); + let cmd_err = CommandError::new( + format!("{}\nContent\n{}\nError: {}", safe_message, value_file.yaml_content, e), + Some(safe_message), + ); + return Err(HelmError::CmdError(chart.name.clone(), HelmCommand::UPGRADE, cmd_err)); + }; + + args_string.push("-f".to_string()); + args_string.push(file_path); + } + + // add last elements + args_string.push(chart.name.clone()); + args_string.push(chart.path.clone()); + + let mut stderr_msg = String::new(); let helm_ret = helm_exec_with_output( - args, - envs.clone(), + &args_string.iter().map(|x| x.as_str()).collect::>(), + &self.get_all_envs(envs), |line| { info!("{}", line); - json_output_string = line }, |line| { - if line.contains("another operation (install/upgrade/rollback) is in progress") { - error_message = format!("helm lock detected for {}, looking for cleaning lock", chart.name); - helm_error_during_deployment.message = Some(error_message.clone()); - warn!("{}. {}", &error_message, &line); - should_clean_helm_lock = true; - return; - } - - if !chart.parse_stderr_for_error { - warn!("chart {}: {}", chart.name, line); - return; - } - - // helm errors are not json formatted unfortunately - if line.contains("has been rolled back") { - error_message = format!("deployment {} has been rolled back", chart.name); - helm_error_during_deployment.message = Some(error_message.clone()); - warn!("{}. {}", &error_message, &line); - } else if line.contains("has been uninstalled") { - error_message = format!("deployment {} has been uninstalled due to failure", chart.name); - helm_error_during_deployment.message = Some(error_message.clone()); - warn!("{}. {}", &error_message, &line); - // special fix for prometheus operator - } else if line.contains("info: skipping unknown hook: \"crd-install\"") { - debug!("chart {}: {}", chart.name, line); - } else { - error_message = format!("deployment {} has failed", chart.name); - helm_error_during_deployment.message = Some(error_message.clone()); - error!("{}. {}", &error_message, &line); - } + stderr_msg.push_str(&line); + warn!("chart {}: {}", chart.name, line); }, ); - if should_clean_helm_lock { - match clean_helm_lock( - &kubernetes_config, - chart.get_namespace_string().as_str(), - &chart.name, - chart.timeout_in_seconds, - envs.clone(), - ) { - Ok(_) => info!("Helm lock detected and cleaned"), - Err(e) => warn!("Couldn't cleanup Helm lock. {:?}", e.message()), + match helm_ret { + // Ok is ok + Ok(_) => Ok(()), + Err(err) => { + error!("Helm error: {:?}", err); + Err(CmdError( + chart.name.clone(), + HelmCommand::DIFF, + CommandError::new(stderr_msg.clone(), Some(stderr_msg)), + )) } } + } + + pub fn upgrade(&self, chart: &ChartInfo, envs: &[(&str, &str)]) -> Result<(), HelmError> { + // Due to crash or error it is possible that the release is under an helm lock + // Try to un-stuck the situation first if needed + // We don't care if the rollback failed, as it is a best effort to remove the lock + // and to re-launch an upgrade just after + let unlock_ret = self.unlock_release(chart, envs); + info!("Helm lock status: {:?}", unlock_ret); + + let debug = false; + let timeout_string = format!("{}s", &chart.timeout_in_seconds); + + let mut args_string: Vec = vec![ + "upgrade".to_string(), + "--kubeconfig".to_string(), + self.kubernetes_config.to_str().unwrap_or_default().to_string(), + "--create-namespace".to_string(), + "--install".to_string(), + "--timeout".to_string(), + timeout_string.as_str().to_string(), + "--history-max".to_string(), + HELM_MAX_HISTORY.to_string(), + "--namespace".to_string(), + chart.get_namespace_string(), + ]; + + if debug { + args_string.push("-o".to_string()); + args_string.push("json".to_string()); + } + + // warn: don't add debug or json output won't work + if chart.atomic { + args_string.push("--atomic".to_string()) + } + if chart.force_upgrade { + args_string.push("--force".to_string()) + } + if chart.dry_run { + args_string.push("--dry-run".to_string()) + } + if chart.wait { + args_string.push("--wait".to_string()) + } + + // overrides and files overrides + for value in &chart.values { + args_string.push("--set".to_string()); + args_string.push(format!("{}={}", value.key, value.value)); + } + + for value_file in &chart.values_files { + args_string.push("-f".to_string()); + args_string.push(value_file.clone()); + } + for value_file in &chart.yaml_files_content { + let file_path = format!("{}/{}", chart.path, &value_file.filename); + let file_create = || -> Result<(), Error> { + let mut file = File::create(&file_path)?; + file.write_all(value_file.yaml_content.as_bytes())?; + Ok(()) + }; + + // no need to validate yaml as it will be done by helm + if let Err(e) = file_create() { + let safe_message = format!("Error while writing yaml content to file `{}`", &file_path); + let cmd_err = CommandError::new( + format!("{}\nContent\n{}\nError: {}", safe_message, value_file.yaml_content, e), + Some(safe_message), + ); + return Err(HelmError::CmdError(chart.name.clone(), HelmCommand::UPGRADE, cmd_err)); + }; + + args_string.push("-f".to_string()); + args_string.push(file_path); + } + + // add last elements + args_string.push(chart.name.clone()); + args_string.push(chart.path.clone()); + + let mut error_message: Vec = vec![]; + + let helm_ret = helm_exec_with_output( + &args_string.iter().map(|x| x.as_str()).collect::>(), + &self.get_all_envs(envs), + |line| { + info!("{}", line); + }, + |line| { + warn!("chart {}: {}", chart.name, line); + error_message.push(line); + }, + ); match helm_ret { - Ok(_) => { - if helm_error_during_deployment.message.is_some() { - OperationResult::Retry(helm_error_during_deployment) + // Ok is ok + Ok(_) => Ok(()), + Err(err) => { + error!("Helm error: {:?}", err); + + // Try do define/specify a bit more the message + let stderr_msg: String = error_message.into_iter().collect(); + let stderr_msg = format!("{}: {}", stderr_msg, err.message()); + let error = if stderr_msg.contains("another operation (install/upgrade/rollback) is in progress") { + HelmError::ReleaseLocked(chart.name.clone()) + } else if stderr_msg.contains("has been rolled back") { + HelmError::Rollbacked(chart.name.clone(), UPGRADE) + } else if stderr_msg.contains("timed out waiting") { + HelmError::Timeout(chart.name.clone(), UPGRADE, stderr_msg) } else { - OperationResult::Ok(()) - } - } - Err(e) => OperationResult::Retry(SimpleError::new(SimpleErrorKind::Other, Some(e.message()))), - } - }); - - match result { - Ok(_) => Ok(()), - Err(Operation { error, .. }) => { - return Err(CommandError::new( - error.message.unwrap_or("No error message".to_string()), - None, - )); - } - Err(retry::Error::Internal(e)) => return Err(CommandError::new(e, None)), - } -} - -pub fn clean_helm_lock

( - kubernetes_config: P, - namespace: &str, - release_name: &str, - timeout: i64, - envs: Vec<(&str, &str)>, -) -> Result<(), CommandError> -where - P: AsRef, -{ - let selector = format!("name={}", release_name); - let timeout_i64 = timeout; - - let result = retry::retry(Fixed::from_millis(3000).take(5), || { - // get secrets for this helm deployment - let result = match kubectl_exec_get_secrets(&kubernetes_config, namespace, &selector, envs.clone()) { - Ok(x) => x, - Err(e) => return OperationResult::Retry(e), - }; - - // get helm release name (secret) containing the lock and clean if possible - match helm_get_secret_lock_name(&result, timeout_i64.clone()) { - Ok(x) => return OperationResult::Ok(x), - Err(e) => match e.kind { - ParsingError => OperationResult::Retry(CommandError::new(e.message, None)), - IncorrectFormatDate => OperationResult::Retry(CommandError::new(e.message, None)), - NotYetExpired => { - if e.wait_before_release_lock.is_none() { - return OperationResult::Retry(CommandError::new_from_safe_message( - "Missing helm time to wait information, before releasing the lock".to_string(), - )); - }; - - let time_to_wait = e.wait_before_release_lock.unwrap() as u64; - // wait 2min max to avoid the customer to re-launch a job or exit - if time_to_wait < 120 { - info!("waiting {}s before retrying the deployment...", time_to_wait); - thread::sleep(time::Duration::from_secs(time_to_wait)); - } else { - return OperationResult::Err(CommandError::new(e.message, None)); - } - - // retrieve now the secret - match helm_get_secret_lock_name(&result, timeout_i64.clone()) { - Ok(x) => OperationResult::Ok(x), - Err(e) => OperationResult::Err(CommandError::new(e.message, None)), - } - } - }, - } - }); - - match result { - Err(err) => { - return match err { - retry::Error::Operation { .. } => Err(CommandError::new_from_safe_message(format!( - "internal error while trying to deploy helm chart {}", - release_name - ))), - retry::Error::Internal(err) => Err(CommandError::new_from_safe_message(err)), - } - } - Ok(x) => { - if let Err(e) = kubectl_exec_delete_secret(&kubernetes_config, namespace, x.as_str(), envs.clone()) { - return Err(e); - }; - Ok(()) - } - } -} - -pub enum HelmDeploymentErrors { - SimpleError, - HelmLockError, -} - -#[derive(Debug)] -pub enum HelmLockErrors { - ParsingError, - IncorrectFormatDate, - NotYetExpired, -} - -#[derive(Debug)] -pub struct HelmLockError { - kind: HelmLockErrors, - message: String, - wait_before_release_lock: Option, -} - -/// Get helm secret name containing the lock -pub fn helm_get_secret_lock_name(secrets_items: &Secrets, timeout: i64) -> Result { - match secrets_items.items.last() { - None => Err(HelmLockError { - kind: ParsingError, - message: "couldn't parse the list of secrets, it's certainly empty".to_string(), - wait_before_release_lock: None, - }), - Some(x) => { - let creation_time = match DateTime::parse_from_rfc3339(&x.metadata.creation_timestamp) { - Ok(x) => x, - Err(e) => { - return Err(HelmLockError { - kind: IncorrectFormatDate, - message: format!("incorrect format date input from secrets. {:?}", e), - wait_before_release_lock: None, - }) - } - }; - let now = Utc::now().timestamp(); - let max_timeout = creation_time.timestamp() + timeout; - - // not yet expired - if &now < &max_timeout { - let time_to_wait = &max_timeout - &now; - return Err(HelmLockError { - kind: NotYetExpired, - message: format!( - "helm lock has not yet expired, please wait {}s before retrying", - &time_to_wait - ), - wait_before_release_lock: Some(time_to_wait), - }); - } - - //expired - Ok(x.metadata.name.to_string()) - } - } -} - -pub fn helm_exec_uninstall_with_chart_info

( - kubernetes_config: P, - envs: &Vec<(&str, &str)>, - chart: &ChartInfo, -) -> Result<(), CommandError> -where - P: AsRef, -{ - helm_exec_with_output( - vec![ - "uninstall", - "--kubeconfig", - kubernetes_config.as_ref().to_str().unwrap(), - "--namespace", - chart.get_namespace_string().as_str(), - &chart.name, - ], - envs.clone(), - |line| info!("{}", line.as_str()), - |line| error!("{}", line.as_str()), - ) -} - -pub fn helm_exec_uninstall

( - kubernetes_config: P, - namespace: &str, - release_name: &str, - envs: Vec<(&str, &str)>, -) -> Result<(), CommandError> -where - P: AsRef, -{ - helm_exec_with_output( - vec![ - "uninstall", - "--kubeconfig", - kubernetes_config.as_ref().to_str().unwrap(), - "--namespace", - namespace, - release_name, - ], - envs, - |line| info!("{}", line.as_str()), - |line| error!("{}", line.as_str()), - ) -} - -pub fn helm_exec_history

( - kubernetes_config: P, - namespace: &str, - release_name: &str, - envs: &Vec<(&str, &str)>, -) -> Result, CommandError> -where - P: AsRef, -{ - let mut output_string = String::new(); - let _ = helm_exec_with_output( - // WARN: do not add argument --debug, otherwise JSON decoding will not work - vec![ - "history", - "--kubeconfig", - kubernetes_config.as_ref().to_str().unwrap(), - "--namespace", - namespace, - "-o", - "json", - release_name, - ], - envs.clone(), - |line| output_string = line, - |line| { - if line.contains("Error: release: not found") { - info!("{}", line) - } else { - error!("{}", line) - } - }, - ); - - // TODO better check, release not found - - let mut results = match serde_json::from_str::>(output_string.as_str()) { - Ok(x) => x, - Err(_) => vec![], - }; - - // unsort results by revision number - let _ = results.sort_by_key(|x| x.revision); - // there is no performance penalty to do it in 2 operations instead of one, but who really cares anyway - let _ = results.reverse(); - - Ok(results) -} - -pub fn helm_uninstall_list

( - kubernetes_config: P, - helm_list: Vec, - envs: Vec<(&str, &str)>, -) -> Result -where - P: AsRef, -{ - let mut output_vec: Vec = Vec::new(); - - for chart in helm_list { - match helm_exec_with_output( - vec![ - "uninstall", - "-n", - chart.namespace.as_str(), - chart.name.as_str(), - "--kubeconfig", - kubernetes_config.as_ref().to_str().unwrap(), - ], - envs.clone(), - |line| output_vec.push(line), - |line| error!("{}", line), - ) { - Ok(_) => info!( - "Helm uninstall succeed for {} on namespace {}", - chart.name, chart.namespace - ), - Err(_) => info!( - "Helm history found for release name {} on namespace {}", - chart.name, chart.namespace - ), - }; - } - - Ok(output_vec.join("\n")) -} - -pub fn helm_exec_upgrade_with_override_file

( - kubernetes_config: P, - namespace: &str, - release_name: &str, - chart_root_dir: P, - override_file: &str, - envs: Vec<(&str, &str)>, -) -> Result<(), CommandError> -where - P: AsRef, -{ - helm_exec_with_output( - vec![ - "upgrade", - "--kubeconfig", - kubernetes_config.as_ref().to_str().unwrap(), - "--create-namespace", - "--install", - "--history-max", - "50", - "--wait", - "--namespace", - namespace, - release_name, - chart_root_dir.as_ref().to_str().unwrap(), - "-f", - override_file, - ], - envs, - |line| info!("{}", line.as_str()), - |line| { - // don't crash errors if releases are not found - if line.contains("Error: release: not found") { - info!("{}", line) - } else { - error!("{}", line) - } - }, - ) -} - -pub fn helm_exec_with_upgrade_history_with_override

( - kubernetes_config: P, - namespace: &str, - release_name: &str, - chart_root_dir: P, - override_file: &str, - envs: Vec<(&str, &str)>, -) -> Result, CommandError> -where - P: AsRef, -{ - // do exec helm upgrade - info!( - "exec helm upgrade for namespace {} and chart {}", - namespace, - chart_root_dir.as_ref().to_str().unwrap() - ); - - let _ = helm_exec_upgrade_with_override_file( - kubernetes_config.as_ref(), - namespace, - release_name, - chart_root_dir.as_ref(), - override_file, - envs.clone(), - )?; - - // list helm history - info!( - "exec helm history for namespace {} and chart {}", - namespace, - chart_root_dir.as_ref().to_str().unwrap() - ); - - let helm_history_rows = helm_exec_history(kubernetes_config.as_ref(), namespace, release_name, &envs)?; - - // take the last deployment from helm history - or return none if there is no history - Ok(helm_history_rows - .first() - .map(|helm_history_row| helm_history_row.clone())) -} - -pub fn is_chart_deployed

( - kubernetes_config: P, - envs: Vec<(&str, &str)>, - namespace: Option<&str>, - chart_name: String, -) -> Result -where - P: AsRef, -{ - let deployed_charts = helm_list(kubernetes_config, envs, namespace)?; - - for chart in deployed_charts { - if chart.name == chart_name { - return Ok(true); - } - } - - Ok(false) -} - -pub fn helm_get_chart_version

( - kubernetes_config: P, - envs: Vec<(&str, &str)>, - namespace: Option<&str>, - chart_name: String, -) -> Option -where - P: AsRef, -{ - match helm_list(kubernetes_config, envs, namespace) { - Ok(deployed_charts) => { - for chart in deployed_charts { - if chart.name == chart_name { - return chart.version; - } - } - - None - } - Err(_) => None, - } -} - -/// List deployed helm charts -/// -/// # Arguments -/// -/// * `kubernetes_config` - kubernetes config path -/// * `envs` - environment variables required for kubernetes connection -/// * `namespace` - list charts from a kubernetes namespace or use None to select all namespaces -pub fn helm_list

( - kubernetes_config: P, - envs: Vec<(&str, &str)>, - namespace: Option<&str>, -) -> Result, CommandError> -where - P: AsRef, -{ - let mut output_vec: Vec = Vec::new(); - let mut helm_args = vec![ - "list", - "--kubeconfig", - kubernetes_config.as_ref().to_str().unwrap(), - "-o", - "json", - ]; - match namespace { - Some(ns) => helm_args.append(&mut vec!["-n", ns]), - None => helm_args.push("-A"), - } - - let _ = helm_exec_with_output(helm_args, envs, |line| output_vec.push(line), |line| error!("{}", line)); - - let output_string: String = output_vec.join(""); - let values = serde_json::from_str::>(output_string.as_str()); - let mut helms_charts: Vec = Vec::new(); - - match values { - Ok(all_helms) => { - for helm in all_helms { - let raw_version = helm.chart.replace(format!("{}-", helm.name).as_str(), ""); - let version = match Version::from_str(raw_version.as_str()) { - Ok(v) => Some(v), - Err(_) => None, + CmdError( + chart.name.clone(), + HelmCommand::UPGRADE, + CommandError::new(stderr_msg.clone(), Some(stderr_msg)), + ) }; - helms_charts.push(HelmChart::new(helm.name, helm.namespace, version)) + Err(error) } } - Err(e) => { - let message_safe = "Error while deserializing all helms names"; - return Err(CommandError::new( - format!("{}, error: {}", message_safe, e), - Some(message_safe.to_string()), - )); + } + + pub fn uninstall_chart_if_breaking_version( + &self, + chart: &ChartInfo, + envs: &[(&str, &str)], + ) -> Result<(), HelmError> { + // If there is a breaking version set for the current helm chart, + // then we compare this breaking version with the currently installed version if any. + // If current installed version is older than breaking change one, then we delete + // the chart before applying it. + if let Some(breaking_version) = &chart.last_breaking_version_requiring_restart { + if let Some(installed_version) = + self.get_chart_version(chart.name.clone(), Some(chart.get_namespace_string().as_str()), envs)? + { + if installed_version.le(breaking_version) { + self.uninstall(&chart, envs)?; + } + } } - } - Ok(helms_charts) + Ok(()) + } } -pub fn helm_upgrade_diff_with_chart_info

( - kubernetes_config: P, - envs: &Vec<(String, String)>, - chart: &ChartInfo, -) -> Result<(), CommandError> -where - P: AsRef, -{ - let mut environment_variables = envs.clone(); - environment_variables.push(("HELM_NAMESPACE".to_string(), chart.get_namespace_string())); - - let mut args_string: Vec = vec![ - "diff", - "upgrade", - "--no-color", - "--allow-unreleased", - "--kubeconfig", - kubernetes_config.as_ref().to_str().unwrap(), - ] - .into_iter() - .map(|x| x.to_string()) - .collect(); - - // overrides and files overrides - for value in &chart.values { - args_string.push("--set".to_string()); - args_string.push(format!("{}={}", value.key, value.value)); - } - for value_file in &chart.values_files { - args_string.push("-f".to_string()); - args_string.push(value_file.clone()); - } - for value_file in &chart.yaml_files_content { - let file_path = format!("{}/{}", chart.path, &value_file.filename); - let file_create = || -> Result<(), Error> { - let mut file = File::create(&file_path)?; - file.write_all(value_file.yaml_content.as_bytes())?; - Ok(()) - }; - // no need to validate yaml as it will be done by helm - if let Err(e) = file_create() { - let safe_message = format!("Error while writing yaml content to file `{}`", &file_path); - return Err(CommandError::new( - format!( - "{}\nContent\n{}\nError: {}", - safe_message.to_string(), - value_file.yaml_content, - e - ), - Some(safe_message.to_string()), - )); - }; - - args_string.push("-f".to_string()); - args_string.push(file_path.clone()); - } - - // add last elements - args_string.push(chart.name.to_string()); - args_string.push(chart.path.to_string()); - - helm_exec_with_output( - args_string.iter().map(|x| x.as_str()).collect(), - environment_variables - .iter() - .map(|x| (x.0.as_str(), x.1.as_str())) - .collect(), - |line| info!("{}", line), - |line| error!("{}", line), - ) -} - -pub fn helm_exec(args: Vec<&str>, envs: Vec<(&str, &str)>) -> Result<(), CommandError> { - helm_exec_with_output( - args, - envs, - |line| { - span!(Level::INFO, "{}", "{}", line); - }, - |line_err| { - span!(Level::INFO, "{}", "{}", line_err); - }, - ) -} - -pub fn helm_exec_with_output( - args: Vec<&str>, - envs: Vec<(&str, &str)>, +fn helm_exec_with_output( + args: &[&str], + envs: &[(&str, &str)], stdout_output: F, stderr_output: X, ) -> Result<(), CommandError> @@ -853,69 +542,307 @@ where // Note: Helm CLI use spf13/cobra lib for the CLI; One function is mainly used to return an error if a command failed. // Helm returns an error each time a command does not succeed as they want. Which leads to handling error with status code 1 // It means that the command successfully ran, but it didn't terminate as expected - let mut cmd = QoveryCommand::new("helm", &args, &envs); + let mut cmd = QoveryCommand::new("helm", args, envs); match cmd.exec_with_timeout(Duration::max_value(), stdout_output, stderr_output) { Err(err) => Err(CommandError::new(format!("{:?}", err), None)), _ => Ok(()), } } +pub fn to_command_error(error: HelmError) -> CommandError { + CommandError::new_from_safe_message(error.to_string()) +} + +pub fn to_engine_error(event_details: &EventDetails, error: HelmError) -> EngineError { + EngineError::new_helm_error(event_details.clone(), error) +} + +#[cfg(feature = "test-with-kube")] #[cfg(test)] mod tests { - use crate::cmd::helm::helm_get_secret_lock_name; - use crate::cmd::structs::Secrets; - use chrono::{DateTime, NaiveDateTime, Utc}; + use crate::cloud_provider::helm::{ChartInfo, ChartSetValue}; + use crate::cmd::helm::{helm_exec_with_output, Helm, HelmError}; + use crate::cmd::utilities::QoveryCommand; + use std::sync::{Arc, Barrier}; + use std::thread; + use std::time::Duration; + + struct HelmTestCtx { + helm: Helm, + chart: ChartInfo, + } + + impl HelmTestCtx { + fn cleanup(&self) { + let ret = self.helm.uninstall(&self.chart, &vec![]); + assert!(ret.is_ok()) + } + + fn new(release_name: &str) -> HelmTestCtx { + let chart = ChartInfo::new_from_custom_namespace( + release_name.to_string(), + "tests/helm/simple_nginx".to_string(), + "default".to_string(), + 600, + vec![], + false, + None, + ); + let mut kube_config = dirs::home_dir().unwrap(); + kube_config.push(".kube/config"); + let helm = Helm::new(kube_config.to_str().unwrap(), &vec![]).unwrap(); + + let cleanup = HelmTestCtx { helm, chart }; + cleanup.cleanup(); + cleanup + } + } + + impl Drop for HelmTestCtx { + fn drop(&mut self) { + self.cleanup() + } + } #[test] - fn test_helm_lock_get_name() { - let json_content = r#" -{ - "apiVersion": "v1", - "items": [ - { - "apiVersion": "v1", - "data": { - "release": "coucou" - }, - "kind": "Secret", - "metadata": { - "creationTimestamp": "2021-09-02T23:20:36Z", - "labels": { - "modifiedAt": "1632324195", - "name": "cert-manager", - "owner": "helm", - "status": "superseded", - "version": "1" - }, - "name": "sh.helm.release.v1.cert-manager.v1", - "namespace": "cert-manager", - "resourceVersion": "7287406", - "uid": "173b76c4-4f48-4544-8928-64a9b8b376d5" - }, - "type": "helm.sh/release.v1" - } - ], - "kind": "List", - "metadata": { - "resourceVersion": "", - "selfLink": "" - } -} - "#; - let mut secrets = serde_json::from_str::(json_content).unwrap(); - - // expired lock should be ok - let res = helm_get_secret_lock_name(&secrets, 300).unwrap(); - assert_eq!(res, "sh.helm.release.v1.cert-manager.v1".to_string()); - - // lock is not expired yet - let time_in_future = NaiveDateTime::from_timestamp(Utc::now().timestamp() + 30, 0); - let time_in_future_datetime_format: DateTime = DateTime::from_utc(time_in_future, Utc); - secrets.items[0].metadata.creation_timestamp = time_in_future_datetime_format.to_rfc3339(); - let res = helm_get_secret_lock_name(&secrets, 300); - assert_eq!( - res.unwrap_err().message, - "helm lock has not yet expired, please wait 330s before retrying".to_string() - ) + fn check_version() { + let mut output = String::new(); + let _ = helm_exec_with_output(&vec!["version"], &vec![], |line| output.push_str(&line), |_line| {}); + assert!(output.contains("Version:\"v3.7.2\"")); + } + + #[test] + fn test_release_exist() { + let HelmTestCtx { ref helm, ref chart } = HelmTestCtx::new("test-release-exist"); + let ret = helm.check_release_exist(chart, &vec![]); + + assert!(matches!(ret, Err(HelmError::ReleaseDoesNotExist(test)) if test == chart.name)) + } + + #[test] + fn test_list_release() { + let HelmTestCtx { + ref helm, + ref mut chart, + } = HelmTestCtx::new("test-list-release"); + chart.custom_namespace = Some("hello-my-friend-this-is-a-test".to_string()); + + // no existing namespace should return an empty array + let ret = helm.list_release(Some("tsdfsfsdf"), &vec![]); + assert!(matches!(ret, Ok(vec) if vec.is_empty())); + + // install something + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // We should have at least one release in all the release + let ret = helm.list_release(None, &vec![]); + assert!(matches!(ret, Ok(vec) if !vec.is_empty())); + + // We should have at least one release in all the release + let ret = helm.list_release(Some(&chart.get_namespace_string()), &vec![]); + assert!(matches!(ret, Ok(vec) if vec.len() == 1)); + + // Install a second stuff + let HelmTestCtx { + ref helm, + ref mut chart, + } = HelmTestCtx::new("test-list-release-2"); + chart.custom_namespace = Some("hello-my-friend-this-is-a-test".to_string()); + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + let ret = helm.list_release(Some(&chart.get_namespace_string()), &vec![]); + assert!(matches!(ret, Ok(vec) if vec.len() == 2)); + } + + #[test] + fn test_upgrade_diff() { + let HelmTestCtx { ref helm, ref chart } = HelmTestCtx::new("test-upgrade-diff"); + + let ret = helm.upgrade_diff(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + } + + #[test] + fn test_rollback() { + let HelmTestCtx { ref helm, ref chart } = HelmTestCtx::new("test-rollback"); + + // check release does not exist yet + let ret = helm.rollback(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::ReleaseDoesNotExist(test)) if test == chart.name)); + + // install it + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // First revision cannot be rollback + let ret = helm.rollback(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::CannotRollback(_)))); + + // 2nd upgrade + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // Rollback should be ok now + let ret = helm.rollback(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + } + + #[test] + fn test_upgrade() { + let HelmTestCtx { ref helm, ref chart } = HelmTestCtx::new("test-upgrade"); + + // check release does not exist yet + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::ReleaseDoesNotExist(test)) if test == chart.name)); + + // install it + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // check now it exists + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Ok(_))); + } + + #[test] + fn test_upgrade_timeout() { + let HelmTestCtx { + ref helm, + ref mut chart, + } = HelmTestCtx::new("test-upgrade-timeout"); + chart.timeout_in_seconds = 1; + + // check release does not exist yet + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::ReleaseDoesNotExist(test)) if test == chart.name)); + + // install it + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::Timeout(_, _, _)))); + + // Release should not exist if it fails + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::ReleaseDoesNotExist(test)) if test == chart.name)); + } + + #[test] + fn test_upgrade_with_lock_during_install() { + // We want to check that we manage to install a chart even if a lock is present while it was the first installation + let HelmTestCtx { ref helm, ref chart } = HelmTestCtx::new("test-upgrade-with-lock-install"); + + // check release does not exist yet + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::ReleaseDoesNotExist(test)) if test == chart.name)); + + // Spawn our task killer + let barrier = Arc::new(Barrier::new(2)); + std::thread::spawn({ + let barrier = barrier.clone(); + let chart_name = chart.name.clone(); + move || { + barrier.wait(); + thread::sleep(Duration::from_millis(3000)); + let mut cmd = QoveryCommand::new("pkill", &vec!["-9", "-f", &format!("helm.*{}", chart_name)], &vec![]); + let _ = cmd.exec(); + } + }); + + // install it + barrier.wait(); + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Err(_))); + + // Release should be locked + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Ok(release) if release.is_locked())); + + // New installation should work even if a lock is present + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // Release should not be locked anymore + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Ok(release) if !release.is_locked())); + } + + #[test] + fn test_upgrade_with_lock_during_upgrade() { + // We want to check that we manage to install a chart even if a lock is present while it not the first installation + let HelmTestCtx { + ref helm, + ref mut chart, + } = HelmTestCtx::new("test-upgrade-with-lock-upgrade"); + + // check release does not exist yet + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::ReleaseDoesNotExist(test)) if test == chart.name)); + + // First install + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // Spawn our task killer + let barrier = Arc::new(Barrier::new(2)); + std::thread::spawn({ + let barrier = barrier.clone(); + let chart_name = chart.name.clone(); + move || { + barrier.wait(); + thread::sleep(Duration::from_millis(3000)); + let mut cmd = QoveryCommand::new("pkill", &vec!["-9", "-f", &format!("helm.*{}", chart_name)], &vec![]); + let _ = cmd.exec(); + } + }); + + chart.values = vec![ChartSetValue { + key: "initialDelaySeconds".to_string(), + value: "6".to_string(), + }]; + barrier.wait(); + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Err(_))); + + // Release should be locked + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Ok(release) if release.is_locked() && release.version == 2)); + + // New installation should work even if a lock is present + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // Release should not be locked anymore + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Ok(release) if !release.is_locked() && release.version == 4)); + } + + #[test] + fn test_uninstall() { + let HelmTestCtx { ref helm, ref chart } = HelmTestCtx::new("test-uninstall"); + + // check release does not exist yet + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::ReleaseDoesNotExist(test)) if test == chart.name)); + + // deleting something that does not exist should not be an issue + let ret = helm.uninstall(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // install it + let ret = helm.upgrade(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // check now it exists + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Ok(_))); + + // Delete it + let ret = helm.uninstall(&chart, &vec![]); + assert!(matches!(ret, Ok(()))); + + // check release does not exist anymore + let ret = helm.check_release_exist(&chart, &vec![]); + assert!(matches!(ret, Err(HelmError::ReleaseDoesNotExist(test)) if test == chart.name)); } } diff --git a/src/errors/io.rs b/src/errors/io.rs index 139b5df7..cb77a248 100644 --- a/src/errors/io.rs +++ b/src/errors/io.rs @@ -76,6 +76,9 @@ pub enum Tag { CannotGetCluster, ObjectStorageCannotCreateBucket, ObjectStorageCannotPutFileIntoBucket, + NoClusterFound, + OnlyOneClusterExpected, + CloudProviderApiMissingInfo, } impl From for Tag { @@ -139,6 +142,9 @@ impl From for Tag { errors::Tag::UnsupportedZone => Tag::UnsupportedZone, errors::Tag::K8sNodeIsNotReadyWithTheRequestedVersion => Tag::K8sNodeIsNotReadyWithTheRequestedVersion, errors::Tag::K8sNodeIsNotReady => Tag::K8sNodeIsNotReady, + errors::Tag::NoClusterFound => Tag::NoClusterFound, + errors::Tag::OnlyOneClusterExpected => Tag::OnlyOneClusterExpected, + errors::Tag::CloudProviderApiMissingInfo => Tag::CloudProviderApiMissingInfo, } } } diff --git a/src/errors/mod.rs b/src/errors/mod.rs index d1648f7d..915dd366 100644 --- a/src/errors/mod.rs +++ b/src/errors/mod.rs @@ -3,6 +3,7 @@ pub mod io; extern crate url; use crate::cloud_provider::utilities::VersionsNumber; +use crate::cmd::helm::HelmError; use crate::error::{EngineError as LegacyEngineError, EngineErrorCause, EngineErrorScope}; use crate::events::EventDetails; use url::Url; @@ -88,6 +89,8 @@ pub enum Tag { Unknown, /// MissingRequiredEnvVariable: represents an error where a required env variable is not set. MissingRequiredEnvVariable, + /// NoClusterFound: represents an error where no cluster was found + NoClusterFound, /// ClusterHasNoWorkerNodes: represents an error where the current cluster doesn't have any worker nodes. ClusterHasNoWorkerNodes, /// CannotGetWorkspaceDirectory: represents an error while trying to get workspace directory. @@ -188,10 +191,14 @@ pub enum Tag { CannotGetSupportedVersions, /// CannotGetCluster: represents an error where we cannot get cluster. CannotGetCluster, + /// OnlyOneClusterExpected: represents an error where only one cluster was expected but several where found + OnlyOneClusterExpected, /// ObjectStorageCannotCreateBucket: represents an error while trying to create a new object storage bucket. ObjectStorageCannotCreateBucket, /// ObjectStorageCannotPutFileIntoBucket: represents an error while trying to put a file into an object storage bucket. ObjectStorageCannotPutFileIntoBucket, + /// CloudProviderApiMissingInfo: represents an error while expecting mandatory info + CloudProviderApiMissingInfo, } #[derive(Clone, Debug)] @@ -353,14 +360,18 @@ impl EngineError { /// Arguments: /// /// * `event_details`: Error linked event details. - pub fn new_cluster_has_no_worker_nodes(event_details: EventDetails) -> EngineError { + /// * `raw_error`: Raw error message. + pub fn new_cluster_has_no_worker_nodes( + event_details: EventDetails, + raw_error: Option, + ) -> EngineError { let message = "No worker nodes present, can't proceed with operation."; EngineError::new( event_details, Tag::ClusterHasNoWorkerNodes, message.to_string(), message.to_string(), - None, + raw_error, None, Some( "This can happen if there where a manual operations on the workers or the infrastructure is paused." @@ -369,6 +380,32 @@ impl EngineError { ) } + /// Missing API info from the Cloud provider itself + /// + /// + /// + /// Arguments: + /// + /// * `event_details`: Error linked event details. + /// * `raw_error`: Raw error message. + pub fn new_missing_api_info_from_cloud_provider_error( + event_details: EventDetails, + raw_error: Option, + ) -> EngineError { + let message = "Error, missing required information from the Cloud Provider API"; + EngineError::new( + event_details, + Tag::CloudProviderApiMissingInfo, + message.to_string(), + message.to_string(), + raw_error, + None, + Some( + "This can happen if the cloud provider is encountering issues. You should try again later".to_string(), + ), + ) + } + /// Creates new error for unsupported instance type. /// /// Cloud provider doesn't support the requested instance type. @@ -1467,6 +1504,29 @@ impl EngineError { ) } + /// Creates new error from an Helm error + /// + /// Arguments: + /// + /// * `event_details`: Error linked event details. + /// * `error`: Raw error message. + pub fn new_helm_error(event_details: EventDetails, error: HelmError) -> EngineError { + let cmd_error = match &error { + HelmError::CmdError(_, _, cmd_error) => Some(cmd_error.clone()), + _ => None, + }; + + EngineError::new( + event_details, + Tag::HelmChartUninstallError, + error.to_string(), + error.to_string(), + cmd_error, + None, + None, + ) + } + /// Creates new error while uninstalling Helm chart. /// /// Arguments: @@ -1678,4 +1738,67 @@ impl EngineError { Some("Maybe there is a lag and cluster is not yet reported, please retry later.".to_string()), ) } + + /// Creates new error while trying to get cluster. + /// + /// Arguments: + /// + /// * `event_details`: Error linked event details. + /// * `raw_error`: Raw error message. + pub fn new_missing_workers_group_info_error(event_details: EventDetails, raw_error: CommandError) -> EngineError { + let message = "Error, cannot get cluster."; + + EngineError::new( + event_details, + Tag::CannotGetCluster, + message.to_string(), + message.to_string(), + Some(raw_error), + None, + Some("Maybe there is a lag and cluster is not yet reported, please retry later.".to_string()), + ) + } + + /// No cluster found + /// + /// Arguments: + /// + /// * `event_details`: Error linked event details. + /// * `raw_error`: Raw error message. + pub fn new_no_cluster_found_error(event_details: EventDetails, raw_error: CommandError) -> EngineError { + let message = "Error, no cluster found."; + + EngineError::new( + event_details, + Tag::CannotGetCluster, + message.to_string(), + message.to_string(), + Some(raw_error), + None, + Some("Maybe there is a lag and cluster is not yet reported, please retry later.".to_string()), + ) + } + + /// Too many clusters found, while expected only one + /// + /// Arguments: + /// + /// * `event_details`: Error linked event details. + /// * `raw_error`: Raw error message. + pub fn new_multiple_cluster_found_expected_one_error( + event_details: EventDetails, + raw_error: CommandError, + ) -> EngineError { + let message = "Too many clusters found with this name, where 1 was expected"; + + EngineError::new( + event_details, + Tag::OnlyOneClusterExpected, + message.to_string(), + message.to_string(), + Some(raw_error), + None, + Some("Please contact Qovery support for investigation.".to_string()), + ) + } } diff --git a/tests/aws/aws_environment.rs b/tests/aws/aws_environment.rs index 3fe1da2b..cc5f133d 100644 --- a/tests/aws/aws_environment.rs +++ b/tests/aws/aws_environment.rs @@ -102,7 +102,7 @@ fn test_build_cache() { .as_str(), ); - let mut environment = test_utilities::common::working_minimal_environment( + let environment = test_utilities::common::working_minimal_environment( &context, secrets .DEFAULT_TEST_DOMAIN @@ -118,9 +118,9 @@ fn test_build_cache() { let app_build = app.to_build(); let _ = match local_docker.has_cache(&app_build) { Ok(CacheResult::Hit) => assert!(false), - Ok(CacheResult::Miss(parent_build)) => assert!(true), + Ok(CacheResult::Miss(_)) => assert!(true), Ok(CacheResult::MissWithoutParentBuild) => assert!(false), - Err(err) => assert!(false), + Err(_) => assert!(false), }; let _ = match ecr.pull(&image).unwrap() { @@ -147,9 +147,9 @@ fn test_build_cache() { let _ = match local_docker.has_cache(&build_result.build) { Ok(CacheResult::Hit) => assert!(true), - Ok(CacheResult::Miss(parent_build)) => assert!(false), + Ok(CacheResult::Miss(_)) => assert!(false), Ok(CacheResult::MissWithoutParentBuild) => assert!(false), - Err(err) => assert!(false), + Err(_) => assert!(false), }; let start_pull_time = SystemTime::now(); @@ -1111,6 +1111,10 @@ fn deploy_a_non_working_environment_with_a_working_failover_on_aws_eks() { fn deploy_2_non_working_environments_with_2_working_failovers_on_aws_eks() { init(); + let test_name = function_name!(); + let span = span!(Level::INFO, "test", name = test_name); + let _enter = span.enter(); + let logger = logger(); let secrets = FuncTestsSecrets::new(); diff --git a/tests/digitalocean/do_environment.rs b/tests/digitalocean/do_environment.rs index f87d8098..f18c9275 100644 --- a/tests/digitalocean/do_environment.rs +++ b/tests/digitalocean/do_environment.rs @@ -104,7 +104,7 @@ fn test_build_cache() { .expect("DIGITAL_OCEAN_TEST_CLUSTER_ID is not set"), ); - let mut environment = test_utilities::common::working_minimal_environment( + let environment = test_utilities::common::working_minimal_environment( &context, secrets .DEFAULT_TEST_DOMAIN @@ -120,9 +120,9 @@ fn test_build_cache() { let app_build = app.to_build(); let _ = match local_docker.has_cache(&app_build) { Ok(CacheResult::Hit) => assert!(false), - Ok(CacheResult::Miss(parent_build)) => assert!(true), + Ok(CacheResult::Miss(_)) => assert!(true), Ok(CacheResult::MissWithoutParentBuild) => assert!(false), - Err(err) => assert!(false), + Err(_) => assert!(false), }; let _ = match docr.pull(&image).unwrap() { @@ -149,9 +149,9 @@ fn test_build_cache() { let _ = match local_docker.has_cache(&build_result.build) { Ok(CacheResult::Hit) => assert!(true), - Ok(CacheResult::Miss(parent_build)) => assert!(false), + Ok(CacheResult::Miss(_)) => assert!(false), Ok(CacheResult::MissWithoutParentBuild) => assert!(false), - Err(err) => assert!(false), + Err(_) => assert!(false), }; let start_pull_time = SystemTime::now(); diff --git a/tests/helm/simple_nginx/.helmignore b/tests/helm/simple_nginx/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/tests/helm/simple_nginx/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/tests/helm/simple_nginx/Chart.yaml b/tests/helm/simple_nginx/Chart.yaml new file mode 100644 index 00000000..3464a394 --- /dev/null +++ b/tests/helm/simple_nginx/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: nginx +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/tests/helm/simple_nginx/templates/_helpers.tpl b/tests/helm/simple_nginx/templates/_helpers.tpl new file mode 100644 index 00000000..7423a2c9 --- /dev/null +++ b/tests/helm/simple_nginx/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "toto.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "toto.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "toto.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "toto.labels" -}} +helm.sh/chart: {{ include "toto.chart" . }} +{{ include "toto.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "toto.selectorLabels" -}} +app.kubernetes.io/name: {{ include "toto.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "toto.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "toto.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/tests/helm/simple_nginx/templates/deployment.yaml b/tests/helm/simple_nginx/templates/deployment.yaml new file mode 100644 index 00000000..259e9faa --- /dev/null +++ b/tests/helm/simple_nginx/templates/deployment.yaml @@ -0,0 +1,62 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "toto.fullname" . }} + labels: + {{- include "toto.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "toto.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "toto.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "toto.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 80 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + initialDelaySeconds: {{ .Values.initialDelaySeconds }} + httpGet: + path: / + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/tests/helm/simple_nginx/templates/hpa.yaml b/tests/helm/simple_nginx/templates/hpa.yaml new file mode 100644 index 00000000..d7c1529d --- /dev/null +++ b/tests/helm/simple_nginx/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "toto.fullname" . }} + labels: + {{- include "toto.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "toto.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/tests/helm/simple_nginx/templates/ingress.yaml b/tests/helm/simple_nginx/templates/ingress.yaml new file mode 100644 index 00000000..c0309c25 --- /dev/null +++ b/tests/helm/simple_nginx/templates/ingress.yaml @@ -0,0 +1,61 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "toto.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "toto.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/tests/helm/simple_nginx/templates/service.yaml b/tests/helm/simple_nginx/templates/service.yaml new file mode 100644 index 00000000..c57264e0 --- /dev/null +++ b/tests/helm/simple_nginx/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "toto.fullname" . }} + labels: + {{- include "toto.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "toto.selectorLabels" . | nindent 4 }} diff --git a/tests/helm/simple_nginx/templates/serviceaccount.yaml b/tests/helm/simple_nginx/templates/serviceaccount.yaml new file mode 100644 index 00000000..8e86f4e2 --- /dev/null +++ b/tests/helm/simple_nginx/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "toto.serviceAccountName" . }} + labels: + {{- include "toto.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/tests/helm/simple_nginx/templates/tests/test-connection.yaml b/tests/helm/simple_nginx/templates/tests/test-connection.yaml new file mode 100644 index 00000000..89ca584c --- /dev/null +++ b/tests/helm/simple_nginx/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "toto.fullname" . }}-test-connection" + labels: + {{- include "toto.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "toto.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/tests/helm/simple_nginx/values.yaml b/tests/helm/simple_nginx/values.yaml new file mode 100644 index 00000000..8ab1b5c2 --- /dev/null +++ b/tests/helm/simple_nginx/values.yaml @@ -0,0 +1,83 @@ +# Default values for toto. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 +initialDelaySeconds: 5 + +image: + repository: nginx + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ClusterIP + port: 80 + +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +nodeSelector: {} + +tolerations: [] + +affinity: {} diff --git a/tests/scaleway/scw_environment.rs b/tests/scaleway/scw_environment.rs index 405763cc..41063c19 100644 --- a/tests/scaleway/scw_environment.rs +++ b/tests/scaleway/scw_environment.rs @@ -109,7 +109,7 @@ fn test_build_cache() { .as_str(), ); - let mut environment = test_utilities::common::working_minimal_environment( + let environment = test_utilities::common::working_minimal_environment( &context, secrets .DEFAULT_TEST_DOMAIN @@ -125,9 +125,9 @@ fn test_build_cache() { let app_build = app.to_build(); let _ = match local_docker.has_cache(&app_build) { Ok(CacheResult::Hit) => assert!(false), - Ok(CacheResult::Miss(parent_build)) => assert!(true), + Ok(CacheResult::Miss(_)) => assert!(true), Ok(CacheResult::MissWithoutParentBuild) => assert!(false), - Err(err) => assert!(false), + Err(_) => assert!(false), }; let _ = match scr.pull(&image).unwrap() { @@ -154,9 +154,9 @@ fn test_build_cache() { let _ = match local_docker.has_cache(&build_result.build) { Ok(CacheResult::Hit) => assert!(true), - Ok(CacheResult::Miss(parent_build)) => assert!(false), + Ok(CacheResult::Miss(_)) => assert!(false), Ok(CacheResult::MissWithoutParentBuild) => assert!(false), - Err(err) => assert!(false), + Err(_) => assert!(false), }; let start_pull_time = SystemTime::now();