From 5390ace3bc39ddc3d4c35ac023b35ab5a942d67e Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Sat, 13 Apr 2024 22:57:54 -0700 Subject: [PATCH 1/5] Toolbox: Support GKE Workload Identity Federation for gsutil Prevously gsutil would require backup credentials be specified in a Kubernetes secret, but this requires configuring a secret. This does not allow a service account tied to the node or the cluster from being used. `GOOGLE_APPLICATION_CREDENTIALS` is configured by the Chart to specify the location of the backup credentials. If this file does not exist, we tell `gsutil` to obtain a token via the default service account by a config parameter in `.boto`. This enables backups to work with GKE Workload Identity Federation. Relates to https://gitlab.com/gitlab-org/charts/gitlab/-/issues/3434 Changelog: added --- .../charts/toolbox/templates/configmap.yaml | 21 ++++++++++++------- templates/_checkConfig_toolbox.tpl | 2 +- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/charts/gitlab/charts/toolbox/templates/configmap.yaml b/charts/gitlab/charts/toolbox/templates/configmap.yaml index 435371b158..adf7712b40 100644 --- a/charts/gitlab/charts/toolbox/templates/configmap.yaml +++ b/charts/gitlab/charts/toolbox/templates/configmap.yaml @@ -150,11 +150,18 @@ data: fi {{- end }} configure-gsutil: | - # The following script is used to configure gsutil when creating backups - # It provides inputs to the `gsutil config -e` prompt as follows: - # 1) Path to service account JSON key file - # 2) Do not set permissions for key file - # 3) GCP Project ID - # 4) Decline anonymous usage statistics - printf "$GOOGLE_APPLICATION_CREDENTIALS\nN\n{{ .Values.backups.objectStorage.config.gcpProject }}\nN\n" | gsutil config -e + if [ -e "$GOOGLE_APPLICATION_CREDENTIALS" ]; then + # The following script is used to configure gsutil when creating backups + # It provides inputs to the `gsutil config -e` prompt as follows: + # 1) Path to service account JSON key file + # 2) Do not set permissions for key file + # 3) GCP Project ID + # 4) Decline anonymous usage statistics + printf "$GOOGLE_APPLICATION_CREDENTIALS\nN\n{{ .Values.backups.objectStorage.config.gcpProject }}\nN\n" | gsutil config -e + else + # If the backup config is not configured, assume that + # Application Default Credentials are used with a service account. + # This enables GKE Workload Identity Federation to work. + echo '[GoogleCompute]\nservice_account = default' > ~/.boto + fi {{- end }} diff --git a/templates/_checkConfig_toolbox.tpl b/templates/_checkConfig_toolbox.tpl index e8969bb574..e57b91b13a 100644 --- a/templates/_checkConfig_toolbox.tpl +++ b/templates/_checkConfig_toolbox.tpl @@ -3,7 +3,7 @@ Ensure that a valid object storage config secret is provided. */}} {{- define "gitlab.toolbox.backups.objectStorage.config.secret" -}} {{- if .Values.gitlab.toolbox.enabled -}} -{{- if or .Values.gitlab.toolbox.backups.objectStorage.config (not (or .Values.global.minio.enabled .Values.global.appConfig.object_store.enabled)) (eq .Values.gitlab.toolbox.backups.objectStorage.backend "gcs") }} +{{- if or .Values.gitlab.toolbox.backups.objectStorage.config (not (or .Values.global.minio.enabled .Values.global.appConfig.object_store.enabled)) }} {{- if not .Values.gitlab.toolbox.backups.objectStorage.config.secret -}} toolbox: A valid object storage config secret is needed for backups. -- GitLab From cf1ecb188b1c96c9b25a7f0f1907de1dd4928154 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Wed, 17 Apr 2024 10:09:25 -0700 Subject: [PATCH 2/5] Toolbox: Simplify condition for defining backup config secret Previously the backup config secret would be configured if Minio were disabled AND consolidated object storage settings were enabled, but the latter doesn't really make sense since it's not used for backups. As discussed earlier in https://gitlab.com/gitlab-org/charts/gitlab/-/merge_requests/1516#note_401877959, drop that consideration to simplify the logic. --- templates/_checkConfig_toolbox.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/_checkConfig_toolbox.tpl b/templates/_checkConfig_toolbox.tpl index e57b91b13a..c65632beab 100644 --- a/templates/_checkConfig_toolbox.tpl +++ b/templates/_checkConfig_toolbox.tpl @@ -3,7 +3,7 @@ Ensure that a valid object storage config secret is provided. */}} {{- define "gitlab.toolbox.backups.objectStorage.config.secret" -}} {{- if .Values.gitlab.toolbox.enabled -}} -{{- if or .Values.gitlab.toolbox.backups.objectStorage.config (not (or .Values.global.minio.enabled .Values.global.appConfig.object_store.enabled)) }} +{{- if or .Values.gitlab.toolbox.backups.objectStorage.config (not .Values.global.minio.enabled) }} {{- if not .Values.gitlab.toolbox.backups.objectStorage.config.secret -}} toolbox: A valid object storage config secret is needed for backups. -- GitLab From 3622db09f088919d6aba33fe7a7c4af6ba77d976 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Thu, 18 Apr 2024 00:36:03 -0700 Subject: [PATCH 3/5] Toolbox: Allow GCS backend to be used without a secret In the case of GKE Workload Identity, no secrets are needed for the backups. --- spec/integration/check_config/toolbox_spec.rb | 19 +++++++++++++++++++ templates/_checkConfig_toolbox.tpl | 8 +++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/spec/integration/check_config/toolbox_spec.rb b/spec/integration/check_config/toolbox_spec.rb index ba9b6cc536..0217a0509c 100644 --- a/spec/integration/check_config/toolbox_spec.rb +++ b/spec/integration/check_config/toolbox_spec.rb @@ -64,6 +64,25 @@ describe 'checkConfig toolbox' do include_examples 'config validation', success_description: 'when toolbox has a valid object storage backup secret configured', error_description: 'when toolbox does not have a valid object storage backup secret configured' + + context 'with Google Cloud Storage backend' do + let(:success_values) do + YAML.safe_load(%( + gitlab: + toolbox: + enabled: true + backups: + objectStorage: + backend: gcs + config: + # secret: s3cmd-config + key: config + )).merge(default_required_values) + end + + include_examples 'config validation', + success_description: 'when toolbox uses GCS for backup with no secret configured' + end end describe 'gitlab.toolbox.enabled (set to false)' do diff --git a/templates/_checkConfig_toolbox.tpl b/templates/_checkConfig_toolbox.tpl index c65632beab..3bde44d493 100644 --- a/templates/_checkConfig_toolbox.tpl +++ b/templates/_checkConfig_toolbox.tpl @@ -1,10 +1,12 @@ {{/* -Ensure that a valid object storage config secret is provided. +Ensure that a valid object storage config secret is provided. Make +an exception for the Google Cloud Storage (GCS) since with GKE Workload Identity +no secrets have to be configured. */}} {{- define "gitlab.toolbox.backups.objectStorage.config.secret" -}} {{- if .Values.gitlab.toolbox.enabled -}} -{{- if or .Values.gitlab.toolbox.backups.objectStorage.config (not .Values.global.minio.enabled) }} -{{- if not .Values.gitlab.toolbox.backups.objectStorage.config.secret -}} +{{- if or .Values.gitlab.toolbox.backups.objectStorage.config (not .Values.global.minio.enabled) -}} +{{- if and (not (eq .Values.gitlab.toolbox.backups.objectStorage.backend "gcs")) (not .Values.gitlab.toolbox.backups.objectStorage.config.secret) -}} toolbox: A valid object storage config secret is needed for backups. Please configure it via `gitlab.toolbox.backups.objectStorage.config.secret`. -- GitLab From 773529115f003d0dd70d3123b3981a8be7b2a4e7 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Mon, 29 Apr 2024 12:15:38 -0700 Subject: [PATCH 4/5] Add docs on how to use GKE Workload Identity This is needed for both object storage and backups. --- .../gke-workload-identity.md | 31 ++++++++++++++++ doc/advanced/external-object-storage/index.md | 14 ++++++- doc/backup-restore/index.md | 37 +++++++++++++++---- 3 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 doc/advanced/external-object-storage/gke-workload-identity.md diff --git a/doc/advanced/external-object-storage/gke-workload-identity.md b/doc/advanced/external-object-storage/gke-workload-identity.md new file mode 100644 index 0000000000..f0190fa5d1 --- /dev/null +++ b/doc/advanced/external-object-storage/gke-workload-identity.md @@ -0,0 +1,31 @@ +--- +stage: Systems +group: Distribution +info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://handbook.gitlab.com/handbook/product/ux/technical-writing/#assignments +--- + +# Workload Identity Federation for GKE using the GitLab chart + +The default configuration for external object storage in the charts uses +secret keys. [Workload Identity Federation for GKE](https://cloud.google.com/kubernetes-engine/docs/concepts/workload-identity) +makes it possible to grant access to object storage to the Kubernetes cluster using short-lived +tokens. If you have an existing GKE cluster, read the [Google documentation on how to update the node pool to use Workload Identity Federation](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity#option_2_node_pool_modification). + +## Troubleshooting + +You can check whether Workload Identity is configured properly by +querying the metadata endpoint inside the toolbox pod. The service +account associated with the cluster should be returned: + +```shell +$ curl -H "Metadata-Flavor: Google" http://169.254.169.254/computeMetadata/v1/instance/service-accounts/default/email +example@your-example-project.iam.gserviceaccount.com +``` + +This account should also be able to access the following scopes: + +```shell +$ curl -H "Metadata-Flavor: Google" http://169.254.169.254/computeMetadata/v1/instance/service-accounts/default/scopes +https://www.googleapis.com/auth/cloud-platform +https://www.googleapis.com/auth/userinfo.email +``` diff --git a/doc/advanced/external-object-storage/index.md b/doc/advanced/external-object-storage/index.md index f710fe8c80..e7027903bd 100644 --- a/doc/advanced/external-object-storage/index.md +++ b/doc/advanced/external-object-storage/index.md @@ -206,7 +206,7 @@ are supported backends. You can configure the backend type by setting `global.ap to `s3` for AWS S3, `gcs` for Google Cloud Storage, or `azure` for Azure Blob Storage. You must also provide a connection configuration through the `gitlab.toolbox.backups.objectStorage.config` key. -When using Google Cloud Storage, the GCP project must be set with the `global.appConfig.backups.objectStorage.config.gcpProject` value. +When using Google Cloud Storage with a secret, the GCP project must be set with the `global.appConfig.backups.objectStorage.config.gcpProject` value. For S3-compatible storage: @@ -217,7 +217,7 @@ For S3-compatible storage: --set gitlab.toolbox.backups.objectStorage.config.key=config ``` -For Google Cloud Storage (GCS): +For Google Cloud Storage (GCS) with a secret: ```shell --set global.appConfig.backups.bucket=gitlab-backup-storage @@ -228,6 +228,16 @@ For Google Cloud Storage (GCS): --set gitlab.toolbox.backups.objectStorage.config.key=config ``` +For Google Cloud Storage (GCS) with [Workload Identity Federation for GKE](gke-workload-identity.md), only the backend and buckets need to be set. +Make sure `gitlab.toolbox.backups.objectStorage.config.secret` and `gitlab.toolbox.backups.objectStorage.config.key` are not set, +so that the cluster uses [Google's Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials): + +```shell +--set global.appConfig.backups.bucket=gitlab-backup-storage +--set global.appConfig.backups.tmpBucket=gitlab-tmp-storage +--set gitlab.toolbox.backups.objectStorage.backend=gcs +``` + For Azure Blob Storage: ```shell diff --git a/doc/backup-restore/index.md b/doc/backup-restore/index.md index d684b2aa1c..a3538bf6a8 100644 --- a/doc/backup-restore/index.md +++ b/doc/backup-restore/index.md @@ -54,8 +54,28 @@ when restoring a backup. ### Backups to Google Cloud Storage (GCS) -To backup to GCS you must set `gitlab.toolbox.backups.objectStorage.backend` to `gcs`. This ensures that the Toolbox uses the `gsutil` CLI when storing and retrieving -objects. Additionally you must set `gitlab.toolbox.backups.objectStorage.config.gcpProject` to the project ID of the GCP project that contains your storage buckets. +To backup to GCS, you must first set `gitlab.toolbox.backups.objectStorage.backend` to `gcs`. This ensures +that the Toolbox uses the `gsutil` CLI when storing and retrieving +objects. + +In addition, two bucket locations need to be configured, one for storing +the backups, and one temporary bucket that is used when restoring a +backup. + +```shell +--set global.appConfig.backups.bucket=gitlab-backup-storage +--set global.appConfig.backups.tmpBucket=gitlab-tmp-storage +``` + +The backup utility needs access to these buckets. There are two ways to grant access: + +- Specifying credentials in a Kubernetes secret. +- Configuring [Workload Identity Federation for GKE](https://cloud.google.com/kubernetes-engine/docs/concepts/workload-identity). + +#### GCS credentials + +First, set `gitlab.toolbox.backups.objectStorage.config.gcpProject` to the project ID of the GCP project that contains your storage buckets. + You must create a Kubernetes secret with the contents of an active service account JSON key where the service account has the `storage.admin` role for the buckets you will use for backup. Below is an example of using the `gcloud` and `kubectl` to create the secret. @@ -77,13 +97,14 @@ helm install gitlab gitlab/gitlab \ --set gitlab.toolbox.backups.objectStorage.backend=gcs ``` -In addition, two bucket locations need to be configured, one for storing the backups, and one temporary bucket that is used -when restoring a backup. +#### Configuring Workload Identity Federation for GKE -```shell ---set global.appConfig.backups.bucket=gitlab-backup-storage ---set global.appConfig.backups.tmpBucket=gitlab-tmp-storage -``` +See the [documentation on Workload Identity Federation for GKE using the GitLab chart](../advanced/external-object-storage/gke-workload-identity.md). + +When creating an IAM allow policy that references the Kubernetes ServiceAccount, grant the `roles/storage.objectAdmin` role. + +For backups, ensure that Google's Application Default Credentials are used by making sure that +`gitlab.toolbox.backups.objectStorage.config.secret` and `gitlab.toolbox.backups.objectStorage.config.key` are NOT set. ### Backups to Azure blob storage -- GitLab From 8c528b4c475f5a0e9d34cdc0c215c451af90d179 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Wed, 1 May 2024 11:47:08 -0700 Subject: [PATCH 5/5] Add a note about having the iam.gke.io/gcp-service-account annotation This is a key issue in troubleshooting Workload Identity for GKE. --- doc/advanced/external-object-storage/gke-workload-identity.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/advanced/external-object-storage/gke-workload-identity.md b/doc/advanced/external-object-storage/gke-workload-identity.md index f0190fa5d1..1359489740 100644 --- a/doc/advanced/external-object-storage/gke-workload-identity.md +++ b/doc/advanced/external-object-storage/gke-workload-identity.md @@ -13,6 +13,9 @@ tokens. If you have an existing GKE cluster, read the [Google documentation on h ## Troubleshooting +Ensure that the [Kubernetes ServiceAccount is linked to the IAM service account](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity#kubernetes-sa-to-iam) +via the `iam.gke.io/gcp-service-account` annotation. + You can check whether Workload Identity is configured properly by querying the metadata endpoint inside the toolbox pod. The service account associated with the cluster should be returned: -- GitLab