From 7e85341f2c3d1bd1915869725b9b91144c79d83c Mon Sep 17 00:00:00 2001 From: c_fons Date: Tue, 14 Oct 2025 17:01:19 +0100 Subject: [PATCH 1/6] Add checksumming endpoint to data_management API EE: true Changelog: added --- ee/lib/api/admin/data_management.rb | 97 +++++++++-------- .../api/admin/data_management_spec.rb | 101 ++++++++++++++++++ 2 files changed, 155 insertions(+), 43 deletions(-) diff --git a/ee/lib/api/admin/data_management.rb b/ee/lib/api/admin/data_management.rb index 0053a57e05fb2e..2cd394c52ff5d3 100644 --- a/ee/lib/api/admin/data_management.rb +++ b/ee/lib/api/admin/data_management.rb @@ -61,49 +61,6 @@ def find_models_from_record_identifier_array(identifier_array, relation) resource :admin do resource :data_management do route_param :model_name, type: String, desc: 'The name of the model being managed' do - # Example request: - # GET /admin/data_management/:model_name - desc 'Get a list of model data' do - summary 'Retrieve all records of the requested model' - detail 'This feature is experimental.' - success code: 200, model: Entities::Admin::Model - failure [ - { code: 400, message: '400 Bad request' }, - { code: 401, message: '401 Unauthorized' }, - { code: 403, message: '403 Forbidden' }, - { code: 404, message: '404 Model Not Found' } - ] - is_array true - tags %w[data_management] - end - params do - use :pagination - requires :model_name, type: String, values: AVAILABLE_MODEL_NAMES - optional :identifiers, types: [Array[Integer], Array[String]], desc: 'The record identifiers to filter by' - optional :checksum_state, - type: String, - desc: 'The checksum status of the records to filter by', - values: VERIFICATION_STATES - end - get do - model_class = Gitlab::Geo::ModelMapper.find_from_name(params[:model_name]) - not_found!(params[:model_name]) unless model_class - - relation = model_class.respond_to?(:with_state_details) ? model_class.with_state_details : model_class - if params[:identifiers]&.compact.present? - relation = find_models_from_record_identifier_array(params[:identifiers], relation) - end - - if params[:checksum_state].present? - bad_request!("#{model_class} is not a verifiable model.") unless verifiable?(model_class) - relation = relation.with_verification_state("verification_#{params[:checksum_state]}") - end - - relation = relation.order_by_primary_key - - present paginate(relation.all, without_count: true), with: Entities::Admin::Model - end - route_param :record_identifier, types: [Integer, String], desc: 'The identifier of the model being requested' do @@ -171,6 +128,60 @@ def find_models_from_record_identifier_array(identifier_array, relation) present model, with: Entities::Admin::Model end end + + # Example request: + # GET /admin/data_management/:model_name + desc 'Get a list of model data' do + summary 'Retrieve all records of the requested model' + detail 'This feature is experimental.' + success code: 200, model: Entities::Admin::Model + failure [ + { code: 400, message: '400 Bad request' }, + { code: 401, message: '401 Unauthorized' }, + { code: 403, message: '403 Forbidden' }, + { code: 404, message: '404 Model Not Found' } + ] + is_array true + tags %w[data_management] + end + params do + use :pagination + requires :model_name, type: String, values: AVAILABLE_MODEL_NAMES + optional :identifiers, types: [Array[Integer], Array[String]], desc: 'The record identifiers to filter by' + optional :checksum_state, + type: String, + desc: 'The checksum status of the records to filter by', + values: VERIFICATION_STATES + end + get do + model_class = Gitlab::Geo::ModelMapper.find_from_name(params[:model_name]) + not_found!(params[:model_name]) unless model_class + + relation = model_class.respond_to?(:with_state_details) ? model_class.with_state_details : model_class + if params[:identifiers]&.compact.present? + relation = find_models_from_record_identifier_array(params[:identifiers], relation) + end + + if params[:checksum_state].present? + bad_request!("#{model_class} is not a verifiable model.") unless verifiable?(model_class) + relation = relation.with_verification_state("verification_#{params[:checksum_state]}") + end + + relation = relation.order_by_primary_key + + present paginate(relation.all, without_count: true), with: Entities::Admin::Model + end + + put 'checksum' do + bad_request!('Endpoint only available on primary site.') unless ::Gitlab::Geo.primary? + + available_names = Gitlab::Geo::ModelMapper.available_model_names + not_found!(params[:model_name]) unless available_names.include?(params[:model_name]) + + result = ::Geo::BulkPrimaryVerificationService.new(params[:model_name]).async_execute + + present result + end end end end diff --git a/ee/spec/requests/api/admin/data_management_spec.rb b/ee/spec/requests/api/admin/data_management_spec.rb index 272a2d1ba27316..84796bc4981745 100644 --- a/ee/spec/requests/api/admin/data_management_spec.rb +++ b/ee/spec/requests/api/admin/data_management_spec.rb @@ -375,6 +375,107 @@ def create_record_for_given_state(state) end end + describe 'PUT /admin/data_management/:model_name/checksum' do + context 'with feature flag enabled' do + let_it_be(:node) { create(:geo_node) } + let_it_be(:api_path) { "/admin/data_management/merge_request_diff/checksum" } + + before do + stub_current_geo_node(node) + stub_primary_site + end + + context 'when authenticated as admin' do + context 'when not on primary site' do + before do + allow(Gitlab::Geo).to receive(:primary?).and_return(false) + end + + it 'returns 400 bad request' do + put api(api_path, admin, admin_mode: true) + + expect(response).to have_gitlab_http_status(:bad_request) + expect(json_response['message']).to eq('400 Bad request - Endpoint only available on primary site.') + end + end + + context 'with valid model name' do + it 'returns service result' do + expect(::Geo::BulkPrimaryVerificationService).to receive(:new).with('merge_request_diff').and_call_original + + put api(api_path, admin, admin_mode: true) + + expect(response).to have_gitlab_http_status(:ok) + expect(json_response).to include('status' => 'success') + end + end + + context 'with invalid model names' do + # Edge cases - invalid inputs + it 'returns 404 for non-existent model name' do + put api('/admin/data_management/non_existent_model/checksum', admin, admin_mode: true) + + expect(response).to have_gitlab_http_status(:not_found) + end + + it 'returns 404 for empty model name' do + put api('/admin/data_management/checksum', admin, admin_mode: true) + + expect(response).to have_gitlab_http_status(:not_found) + end + end + + context 'with URL encoding' do + # Edge cases - URL encoded characters + it 'handles URL encoded model names' do + put api('/admin/data_management/lfs%5Fobject/checksum', admin, admin_mode: true) + + expect(response).to have_gitlab_http_status(:ok) + end + + it 'handles URL encoded special characters' do + put api('/admin/data_management/lfs%40object/checksum', admin, admin_mode: true) + + expect(response).to have_gitlab_http_status(:not_found) + end + end + end + + context 'when not authenticated as admin' do + # Security boundary tests + it 'denies access for regular users' do + put api(api_path, user) + + expect(response).to have_gitlab_http_status(:forbidden) + end + + it 'denies access for unauthenticated requests' do + put api(api_path) + + expect(response).to have_gitlab_http_status(:unauthorized) + end + + it 'denies access for admin without admin mode' do + put api(api_path, admin) + + expect(response).to have_gitlab_http_status(:forbidden) + end + end + end + + context 'with feature flag disabled' do + before do + Feature.disable(:geo_primary_verification_view) + end + + it 'returns 404' do + get api("/admin/data_management/terraform_state_version/checksum", admin, admin_mode: true) + + expect(response).to have_gitlab_http_status(:not_found) + end + end + end + describe 'GET /admin/data_management/:model_name/:record_identifier' do context 'with feature flag enabled' do context 'with valid model name' do -- GitLab From 184464a050fddcfd357e054c0454a654ada07a5e Mon Sep 17 00:00:00 2001 From: c_fons Date: Tue, 14 Oct 2025 17:23:03 +0100 Subject: [PATCH 2/6] Add endpoint documentation --- doc/api/admin/data_management.md | 30 +++++++++++++++++++++++++++++ ee/lib/api/admin/data_management.rb | 3 ++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/api/admin/data_management.md b/doc/api/admin/data_management.md index 11d300f3fc5e54..67a3586f1929d7 100644 --- a/doc/api/admin/data_management.md +++ b/doc/api/admin/data_management.md @@ -118,6 +118,36 @@ Example response: ] ``` +## Recalculate the checksum of all model record + +```plaintext +PUT /admin/data_management/:model_name/checksum +``` + +| Attribute | Type | Required | Description | +|---------------------|-------------------|----------|---------------------------------------------------------------------------------------------| +| `model_name` | string | Yes | The name of the requested model. Must belong to the `:model_name` list above. | + +If successful, returns [`200`](../rest/troubleshooting.md#status-codes) and a JSON response containing information about the updates: + +| Attribute | Type | Description | +|-----------|--------|----------------------------------------------------------------------| +| `message` | string | A information message about the background update starting. | +| `payload` | JSON | Contains the status of the background processing as being "pending". | +| `status` | string | Always "success". | + +```json +{ + "status": "success", + "message": "Batch update job has been successfully enqueued.", + "payload": { + "status": "pending", + "http_status": "ok", + "reason": null + } +} +``` + ## Get information about a specific model record ```plaintext diff --git a/ee/lib/api/admin/data_management.rb b/ee/lib/api/admin/data_management.rb index 2cd394c52ff5d3..fbe629f175b268 100644 --- a/ee/lib/api/admin/data_management.rb +++ b/ee/lib/api/admin/data_management.rb @@ -171,7 +171,8 @@ def find_models_from_record_identifier_array(identifier_array, relation) present paginate(relation.all, without_count: true), with: Entities::Admin::Model end - + # Example request: + # PUT /admin/data_management/:model_name/checksum put 'checksum' do bad_request!('Endpoint only available on primary site.') unless ::Gitlab::Geo.primary? -- GitLab From 06adc2925f0f804a03cf7fb2207007a599fb10eb Mon Sep 17 00:00:00 2001 From: c_fons Date: Wed, 15 Oct 2025 10:53:51 +0100 Subject: [PATCH 3/6] Add parameters and better result presentation --- doc/api/admin/data_management.md | 16 +++---- ee/lib/api/admin/data_management.rb | 42 +++++++++++++++---- .../api/admin/data_management_spec.rb | 20 +++++++-- 3 files changed, 55 insertions(+), 23 deletions(-) diff --git a/doc/api/admin/data_management.md b/doc/api/admin/data_management.md index 67a3586f1929d7..bde0b57e105232 100644 --- a/doc/api/admin/data_management.md +++ b/doc/api/admin/data_management.md @@ -130,21 +130,15 @@ PUT /admin/data_management/:model_name/checksum If successful, returns [`200`](../rest/troubleshooting.md#status-codes) and a JSON response containing information about the updates: -| Attribute | Type | Description | -|-----------|--------|----------------------------------------------------------------------| -| `message` | string | A information message about the background update starting. | -| `payload` | JSON | Contains the status of the background processing as being "pending". | -| `status` | string | Always "success". | +| Attribute | Type | Description | +|-----------|--------|--------------------------------------------------------------| +| `message` | string | A information message about the background update enqueuing. | +| `status` | string | Can be "success" or "error". | ```json { "status": "success", - "message": "Batch update job has been successfully enqueued.", - "payload": { - "status": "pending", - "http_status": "ok", - "reason": null - } + "message": "Batch update job has been successfully enqueued." } ``` diff --git a/ee/lib/api/admin/data_management.rb b/ee/lib/api/admin/data_management.rb index fbe629f175b268..155f0e7ff22bd4 100644 --- a/ee/lib/api/admin/data_management.rb +++ b/ee/lib/api/admin/data_management.rb @@ -56,6 +56,14 @@ def find_models_from_record_identifier_array(identifier_array, relation) rescue ArgumentError, TypeError => e bad_request!(e) end + + def find_verifiable_model_class + model_class = Gitlab::Geo::ModelMapper.find_from_name(params[:model_name]) + not_found!(params[:model_name]) unless model_class + bad_request!("#{model_class} is not a verifiable model.") unless verifiable?(model_class) + + model_class + end end resource :admin do @@ -115,10 +123,7 @@ def find_models_from_record_identifier_array(identifier_array, relation) put 'checksum' do bad_request!('Endpoint only available on primary site.') unless ::Gitlab::Geo.primary? - model_class = Gitlab::Geo::ModelMapper.find_from_name(params[:model_name]) - not_found!(params[:model_name]) unless model_class - bad_request!("#{model_class} is not a verifiable model.") unless verifiable?(model_class) - + model_class = find_verifiable_model_class model = find_model_from_record_identifier(params[:record_identifier], model_class) not_found!(params[:record_identifier]) unless model @@ -171,15 +176,34 @@ def find_models_from_record_identifier_array(identifier_array, relation) present paginate(relation.all, without_count: true), with: Entities::Admin::Model end + # Example request: # PUT /admin/data_management/:model_name/checksum + desc 'Recalculate the checksum of a all records for a model' do + summary 'Marks all records from a given model for checksum recalculation' + detail 'This feature is experimental.' + success code: 200, model: Entities::Admin::Model + failure [ + { code: 400, message: '400 Bad request' }, + { code: 401, message: '401 Unauthorized' }, + { code: 403, message: '403 Forbidden' }, + { code: 404, message: '404 Model Not Found' } + ] + tags %w[data_management] + end + params do + requires :model_name, type: String, values: AVAILABLE_MODEL_NAMES + end put 'checksum' do bad_request!('Endpoint only available on primary site.') unless ::Gitlab::Geo.primary? - - available_names = Gitlab::Geo::ModelMapper.available_model_names - not_found!(params[:model_name]) unless available_names.include?(params[:model_name]) - - result = ::Geo::BulkPrimaryVerificationService.new(params[:model_name]).async_execute + find_verifiable_model_class + + service_result = ::Geo::BulkPrimaryVerificationService.new(params[:model_name]).async_execute + result = if service_result.success? + { status: 'success', message: service_result.message } + else + { status: 'error', message: service_result.message } + end present result end diff --git a/ee/spec/requests/api/admin/data_management_spec.rb b/ee/spec/requests/api/admin/data_management_spec.rb index 84796bc4981745..e549d7ff1cf12f 100644 --- a/ee/spec/requests/api/admin/data_management_spec.rb +++ b/ee/spec/requests/api/admin/data_management_spec.rb @@ -408,14 +408,28 @@ def create_record_for_given_state(state) expect(response).to have_gitlab_http_status(:ok) expect(json_response).to include('status' => 'success') end + + context 'when service returns an error' do + before do + allow(::Geo::BulkPrimaryVerificationService).to receive_message_chain(:new, :async_execute) + .and_return(ServiceResponse.error(message: 'Error')) + end + + it 'returns error message' do + put api(api_path, admin, admin_mode: true) + + expect(response).to have_gitlab_http_status(:ok) + expect(json_response).to include('status' => 'error') + end + end end context 'with invalid model names' do # Edge cases - invalid inputs - it 'returns 404 for non-existent model name' do + it 'returns 400 for non-existent model name' do put api('/admin/data_management/non_existent_model/checksum', admin, admin_mode: true) - expect(response).to have_gitlab_http_status(:not_found) + expect(response).to have_gitlab_http_status(:bad_request) end it 'returns 404 for empty model name' do @@ -436,7 +450,7 @@ def create_record_for_given_state(state) it 'handles URL encoded special characters' do put api('/admin/data_management/lfs%40object/checksum', admin, admin_mode: true) - expect(response).to have_gitlab_http_status(:not_found) + expect(response).to have_gitlab_http_status(:bad_request) end end end -- GitLab From 8064bfe792d0db6a7612527b42aae42a57c6021b Mon Sep 17 00:00:00 2001 From: c_fons Date: Wed, 15 Oct 2025 10:58:31 +0100 Subject: [PATCH 4/6] Improve endpoint doc --- doc/api/admin/data_management.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/api/admin/data_management.md b/doc/api/admin/data_management.md index bde0b57e105232..91f0569043cc74 100644 --- a/doc/api/admin/data_management.md +++ b/doc/api/admin/data_management.md @@ -118,7 +118,7 @@ Example response: ] ``` -## Recalculate the checksum of all model record +## Recalculate the checksum of all model records ```plaintext PUT /admin/data_management/:model_name/checksum @@ -128,12 +128,12 @@ PUT /admin/data_management/:model_name/checksum |---------------------|-------------------|----------|---------------------------------------------------------------------------------------------| | `model_name` | string | Yes | The name of the requested model. Must belong to the `:model_name` list above. | -If successful, returns [`200`](../rest/troubleshooting.md#status-codes) and a JSON response containing information about the updates: +This endpoint marks all records from the model for checksum recalculation. It enqueues a background job to do so. If successful, returns [`200`](../rest/troubleshooting.md#status-codes) and a JSON response containing the following information: -| Attribute | Type | Description | -|-----------|--------|--------------------------------------------------------------| -| `message` | string | A information message about the background update enqueuing. | -| `status` | string | Can be "success" or "error". | +| Attribute | Type | Description | +|-----------|--------|---------------------------------------------------| +| `message` | string | A information message about the success or error. | +| `status` | string | Can be "success" or "error". | ```json { -- GitLab From 8f9c9c008ca15abe5d19c979c5bcff4ede0c182e Mon Sep 17 00:00:00 2001 From: c_fons Date: Wed, 15 Oct 2025 11:11:24 +0100 Subject: [PATCH 5/6] Fix typo in FF off test --- ee/spec/requests/api/admin/data_management_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/spec/requests/api/admin/data_management_spec.rb b/ee/spec/requests/api/admin/data_management_spec.rb index e549d7ff1cf12f..a67030562b2a01 100644 --- a/ee/spec/requests/api/admin/data_management_spec.rb +++ b/ee/spec/requests/api/admin/data_management_spec.rb @@ -483,7 +483,7 @@ def create_record_for_given_state(state) end it 'returns 404' do - get api("/admin/data_management/terraform_state_version/checksum", admin, admin_mode: true) + put api("/admin/data_management/terraform_state_version/checksum", admin, admin_mode: true) expect(response).to have_gitlab_http_status(:not_found) end -- GitLab From 27c5bdb683fe0b9120ae7c4e48b73c02c46a0b9a Mon Sep 17 00:00:00 2001 From: Chloe Fons Date: Wed, 22 Oct 2025 08:04:19 +0100 Subject: [PATCH 6/6] Apply 1 suggestion(s) to 1 file(s) Co-authored-by: Fred de Gier --- ee/lib/api/admin/data_management.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/lib/api/admin/data_management.rb b/ee/lib/api/admin/data_management.rb index 155f0e7ff22bd4..eb8485dc237763 100644 --- a/ee/lib/api/admin/data_management.rb +++ b/ee/lib/api/admin/data_management.rb @@ -68,7 +68,7 @@ def find_verifiable_model_class resource :admin do resource :data_management do - route_param :model_name, type: String, desc: 'The name of the model being managed' do + route_param :model_name, type: String, desc: 'The name of the model being requested' do route_param :record_identifier, types: [Integer, String], desc: 'The identifier of the model being requested' do -- GitLab