From 8c2715e9a03c9922cd45356ce3d33a793bc69030 Mon Sep 17 00:00:00 2001 From: Ian Norton Date: Mon, 11 Jul 2022 19:16:00 +0100 Subject: [PATCH 1/2] Fixes #76 Just save a zip locally and use zipfile instead of attempting to make a seekable stream or use Range Clean up logging and detection of artifact archive size Also adapt to CA bundle errors when gitlab stores things in google's S3 services. --- emulator/gitlabemu/gitlab_client_api.py | 78 +++++++++++++------ emulator/gitlabemu/stream_response.py | 48 ------------ emulator/gitlabemu/tests/mocked_gitlab.py | 11 ++- emulator/gitlabemu/tests/test_gitlab_from.py | 3 +- .../gitlabemu/tests/test_stream_response.py | 24 ------ emulator/setup.py | 2 +- 6 files changed, 66 insertions(+), 100 deletions(-) delete mode 100644 emulator/gitlabemu/stream_response.py delete mode 100644 emulator/gitlabemu/tests/test_stream_response.py diff --git a/emulator/gitlabemu/gitlab_client_api.py b/emulator/gitlabemu/gitlab_client_api.py index e0ce56c..d708c04 100644 --- a/emulator/gitlabemu/gitlab_client_api.py +++ b/emulator/gitlabemu/gitlab_client_api.py @@ -1,15 +1,16 @@ +import contextlib import os import shutil +import time import zipfile import requests -from typing import Optional, List, cast, Set, Tuple -from typing.io import IO +import tempfile +from typing import Optional, List, Set, Tuple from urllib.parse import urlparse from gitlab import Gitlab, GitlabGetError from gitlab.v4.objects import ProjectPipelineJob, Project from urllib3.exceptions import InsecureRequestWarning -from . import stream_response from .helpers import die, note, make_path_slug, get_git_remote_urls from .userconfig import get_user_config_context @@ -96,6 +97,8 @@ def gitlab_api(alias: str, secure=True) -> Gitlab: die(f"Could not find a configured token for {alias} or GITLAB_PRIVATE_TOKEN not set") client = Gitlab(url=server, private_token=token, ssl_verify=secure) + if secure: + gitlab_session_head(client.session, server) return client @@ -156,18 +159,24 @@ def get_pipeline(fromline, secure: Optional[bool] = True): return gitlab, project, pipeline -def gitlab_session_get(gitlab, geturl, **kwargs): - """Get using requests and retry TLS errors""" +@contextlib.contextmanager +def ca_bundle_error(func: callable): try: - return gitlab.session.get(geturl, **kwargs) + yield func() except requests.exceptions.SSLError: # pragma: no cover # validation was requested but cert was invalid, # tty again without the gitlab-supplied CA cert and try the system ca certs - if "REQUESTS_CA_BUNDLE" in os.environ: - note(f"warning: Encountered TLS/SSL error getting {geturl}), retrying with system ca certs") - del os.environ["REQUESTS_CA_BUNDLE"] - return gitlab.session.get(geturl, **kwargs) - raise + if "REQUESTS_CA_BUNDLE" not in os.environ: + raise + note(f"warning: Encountered TLS/SSL error, retrying with only system ca certs") + del os.environ["REQUESTS_CA_BUNDLE"] + yield func() + + +def gitlab_session_head(session, geturl, **kwargs): + """HEAD using requests to try different CA options""" + with ca_bundle_error(lambda: session.head(geturl, **kwargs)) as resp: + return resp def do_gitlab_fetch(from_pipeline: str, @@ -193,24 +202,45 @@ def do_gitlab_fetch(from_pipeline: str, slug = make_path_slug(fetch_job.name) outdir = os.path.join(export_to, slug) os.makedirs(outdir, exist_ok=True) - - note(f"{mode} {fetch_job.name} artifacts from {from_pipeline}..") - artifact_url = f"{gitlab.api_url}/projects/{project.id}/jobs/{fetch_job.id}/artifacts" reldir = os.path.relpath(outdir, os.getcwd()) - # stream it into zipfile + headers = {} if gitlab.private_token: headers = {"PRIVATE-TOKEN": gitlab.private_token} - resp = gitlab_session_get(gitlab, artifact_url, headers=headers, stream=True) - if resp.status_code == 404: - note(f"Job {fetch_job.name} has no artifacts") + + note(f"{mode} {fetch_job.name} artifacts from {from_pipeline}..") + archive_artifact = [x for x in fetch_job.artifacts if x["file_type"] == "archive"] + if archive_artifact: + artifact_compressed_size = archive_artifact[0]["size"] + artifact_url = f"{gitlab.api_url}/projects/{project.id}/jobs/{fetch_job.id}/artifacts" + note(f"Get {artifact_url} ({int(artifact_compressed_size/1024)} kb) ..") + temp_zip_dir = tempfile.mkdtemp(dir=os.getcwd(), prefix=".temp-gle-download") + try: + started_fetch = time.time() + with ca_bundle_error( + lambda: gitlab.session.get(artifact_url, headers=headers, stream=True)) as resp: + resp.raise_for_status() + temp_zip_file = os.path.join(temp_zip_dir, "artifacts.zip") + with open(temp_zip_file, "wb") as zf: + for chunk in resp.iter_content(chunk_size=1024 * 1024): + if chunk: + zf.write(chunk) + with open(temp_zip_file, "rb") as compressed: + with zipfile.ZipFile(compressed) as zf: + for item in zf.infolist(): + note(f"Saving {reldir}/{item.filename} ..") + zf.extract(item, path=outdir) + completed_fetch = time.time() + duration = completed_fetch - started_fetch + fetch_unpack_rate = artifact_compressed_size / duration + note("Fetched/Unpacked at {} kb/s ({} kb)".format( + int(fetch_unpack_rate / 1024.0), + int(artifact_compressed_size / 1024.0) + )) + finally: + shutil.rmtree(temp_zip_dir) else: - resp.raise_for_status() - seekable = cast(IO, stream_response.ResponseStream(resp.iter_content(4096))) - with zipfile.ZipFile(seekable) as zf: - for item in zf.infolist(): - note(f"Saving {reldir}/{item.filename} ..") - zf.extract(item, path=outdir) + note(f"Job {fetch_job.name} has no artifacts") if export_to: # also get the trace and junit reports diff --git a/emulator/gitlabemu/stream_response.py b/emulator/gitlabemu/stream_response.py deleted file mode 100644 index 924db01..0000000 --- a/emulator/gitlabemu/stream_response.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -stream a requests response like a semi-seekable file - -Found under "unlicense" at https://gist.github.com/obskyr/b9d4b4223e7eaf4eedcd9defabb34f13 -""" -from io import BytesIO, SEEK_SET, SEEK_END - - -class ResponseStream(object): - def __init__(self, request_iterator): - self._bytes = BytesIO() - self._iterator = request_iterator - - def seekable(self): - return True - - def _load_all(self): - self._bytes.seek(0, SEEK_END) - for chunk in self._iterator: - self._bytes.write(chunk) - - def _load_until(self, goal_position): - current_position = self._bytes.seek(0, SEEK_END) - while current_position < goal_position: - try: - current_position += self._bytes.write(next(self._iterator)) - except StopIteration: - break - - def tell(self): - return self._bytes.tell() - - def read(self, size=None): - left_off_at = self._bytes.tell() - if size is None: - self._load_all() - else: - goal_position = left_off_at + size - self._load_until(goal_position) - - self._bytes.seek(left_off_at) - return self._bytes.read(size) - - def seek(self, position, whence=SEEK_SET): - if whence == SEEK_END: - self._load_all() - else: - self._bytes.seek(position, whence) diff --git a/emulator/gitlabemu/tests/mocked_gitlab.py b/emulator/gitlabemu/tests/mocked_gitlab.py index d65ab29..63970b2 100644 --- a/emulator/gitlabemu/tests/mocked_gitlab.py +++ b/emulator/gitlabemu/tests/mocked_gitlab.py @@ -320,12 +320,17 @@ class Job(MockedIDResource): return f"{self.server.web_url}/-/jobs/{self.id}" def json(self) -> dict: - return { + data = { "id": self.id, "name": self.name, "pipeline": self.pipeline.json(), "status": self.status } + if self.archive_artifact: + data["artifacts"] = [ + self.archive_artifact.json() + ] + return data @property def archive_artifact(self) -> Optional[Artifact]: @@ -378,10 +383,12 @@ class Job(MockedIDResource): class MockServer: def __init__(self, mocker: Mocker, hostname: str): - server = Server(url=f"https://{hostname}", mocker=mocker) + url = f"https://{hostname}" + server = Server(url=url, mocker=mocker) self._server = server self.hostname = hostname self.next_id = random.randint(3, 888) + server.mocker.head(url, text="") def get_id(self): claim = self.next_id diff --git a/emulator/gitlabemu/tests/test_gitlab_from.py b/emulator/gitlabemu/tests/test_gitlab_from.py index cecb56d..4d9fb87 100644 --- a/emulator/gitlabemu/tests/test_gitlab_from.py +++ b/emulator/gitlabemu/tests/test_gitlab_from.py @@ -40,7 +40,8 @@ def test_no_token_or_config(capfd): # should fail to connect with pytest.raises(ConnectionError) as err: run(["--from", "nosuch.gitlab/grp/proj/1234"]) - assert err.value.request.url == 'https://myserver.nosuch/api/v4/projects/grp%2Fproj' + assert err.value.request.url == "https://myserver.nosuch/" + assert err.value.request.method == "HEAD" @pytest.fixture(scope="function", autouse=True) diff --git a/emulator/gitlabemu/tests/test_stream_response.py b/emulator/gitlabemu/tests/test_stream_response.py deleted file mode 100644 index bbb83b1..0000000 --- a/emulator/gitlabemu/tests/test_stream_response.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Tests for stream_response.py""" - -import requests -from ..stream_response import ResponseStream - - -def test_seekable_response(): - resp = requests.get("https://rfc-editor.org/rfc/rfc2549.txt") - resp.raise_for_status() - seekable = ResponseStream(resp.iter_content(512)) - - assert seekable.seekable() - - chunk = seekable.read(512) - assert len(chunk) == 512 - assert seekable.tell() == 512 - - seekable.seek(0) - chunk2 = seekable.read(512) - assert chunk == chunk2 - - seekable.seek(0) - full = seekable.read() - assert chunk in full diff --git a/emulator/setup.py b/emulator/setup.py index 7d6d07c..be0e42f 100644 --- a/emulator/setup.py +++ b/emulator/setup.py @@ -1,6 +1,6 @@ from distutils.core import setup -VERSION = "1.0.2" +VERSION = "1.0.3" requirements = [ "pyyaml>=5.1", -- GitLab From 26985e9c292677c1d5c19e26d59cdf58e681812c Mon Sep 17 00:00:00 2001 From: Ian Norton Date: Tue, 12 Jul 2022 13:15:17 +0100 Subject: [PATCH 2/2] Fix bare pipeline numbers in --from --- emulator/gitlabemu/gitlab_client_api.py | 18 +++++++++++++----- emulator/gitlabemu/tests/test_gitlab_from.py | 12 +++++++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/emulator/gitlabemu/gitlab_client_api.py b/emulator/gitlabemu/gitlab_client_api.py index d708c04..4539265 100644 --- a/emulator/gitlabemu/gitlab_client_api.py +++ b/emulator/gitlabemu/gitlab_client_api.py @@ -140,14 +140,22 @@ def get_pipeline(fromline, secure: Optional[bool] = True): """Get a pipeline""" pipeline = None ident = parse_gitlab_from_arg(fromline) - if not ident.server: - raise PipelineInvalid(fromline) if not secure: note("TLS server validation disabled by --insecure") requests.packages.urllib3.disable_warnings(InsecureRequestWarning) - gitlab = gitlab_api(ident.server, secure=secure) - # get project - project = gitlab.projects.get(ident.project) + + if not ident.server: + cwd = os.getcwd() + gitlab, project, remotename = get_gitlab_project_client(cwd, secure) + + else: + gitlab = gitlab_api(ident.server, secure=secure) + # get project + project = gitlab.projects.get(ident.project) + + if not project: + raise PipelineInvalid(fromline) + # get pipeline if ident.pipeline: try: diff --git a/emulator/gitlabemu/tests/test_gitlab_from.py b/emulator/gitlabemu/tests/test_gitlab_from.py index 4d9fb87..93d07ff 100644 --- a/emulator/gitlabemu/tests/test_gitlab_from.py +++ b/emulator/gitlabemu/tests/test_gitlab_from.py @@ -2,6 +2,7 @@ import os import random import shutil +import subprocess import pytest import argparse @@ -83,7 +84,6 @@ def test_mock_list_pipelines(requests_mock: Mocker, capfd: pytest.CaptureFixture assert f"Cannot find pipeline '{unknown_path}'" in stderr - @pytest.mark.usefixtures("posix_only") def test_mock_download(requests_mock: Mocker, capfd: pytest.CaptureFixture): """Test downloading individual job artifacts""" @@ -117,6 +117,16 @@ def test_mock_from(requests_mock: Mocker, capfd: pytest.CaptureFixture): assert os.path.isfile("artifact.job1.txt") assert not os.path.isfile("artifact.job2.txt") + # test just the number for the current git repo + subprocess.check_call(["git", "init"]) + subprocess.check_call(["git", "remote", "add", "origin", f"https://{MOCK_HOST}/{project.path_with_namespace}.git"]) + run(["-k", "job2", "--from", str(pipeline.id)]) + _, stderr = capfd.readouterr() + assert "TLS server validation disabled" in stderr + assert "Download artifacts required by 'job2'" in stderr + assert os.path.isfile("artifact.job1.txt") + assert not os.path.isfile("artifact.job2.txt") + @pytest.mark.usefixtures("posix_only") def test_mock_export(requests_mock: Mocker, capfd: pytest.CaptureFixture): -- GitLab