From dd3337ffb4d179423ce175726823aa8f89bea374 Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Thu, 14 Apr 2022 18:39:10 +0100 Subject: [PATCH 01/15] #7 Static Types --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b2f171a..b68aa5c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -199,11 +199,11 @@ pypi: url: https://pypi.org resource_group: production when: manual - rules: - - if: $CI_COMMIT_TAG + # rules: + # - if: $CI_COMMIT_TAG script: - !reference [.deploy, script] - - TWINE_USERNAME=__token__ TWINE_PASSWORD="$pypi_api_token" twine upload --skip-existing dist/* + - TWINE_USERNAME=__token__ TWINE_PASSWORD="pypi-$pypi_api_token" twine upload --skip-existing dist/* artifacts: name: release expire_in: 3 year -- GitLab From 2dddd70a429b806e1d377bc1a4b01f7dda988c73 Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Thu, 14 Apr 2022 19:42:14 +0100 Subject: [PATCH 02/15] #7 Static Types: test pypi upload --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b68aa5c..d448b41 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -203,7 +203,7 @@ pypi: # - if: $CI_COMMIT_TAG script: - !reference [.deploy, script] - - TWINE_USERNAME=__token__ TWINE_PASSWORD="pypi-$pypi_api_token" twine upload --skip-existing dist/* + - TWINE_USERNAME=__token__ TWINE_PASSWORD="$pypi_api_token" twine upload --skip-existing dist/* artifacts: name: release expire_in: 3 year -- GitLab From 6cedec062b7d7313f89c4e7ea83e8f10db489183 Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Thu, 14 Apr 2022 19:53:11 +0100 Subject: [PATCH 03/15] #7 Static Types: pypi upload on tag push --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d448b41..b2f171a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -199,8 +199,8 @@ pypi: url: https://pypi.org resource_group: production when: manual - # rules: - # - if: $CI_COMMIT_TAG + rules: + - if: $CI_COMMIT_TAG script: - !reference [.deploy, script] - TWINE_USERNAME=__token__ TWINE_PASSWORD="$pypi_api_token" twine upload --skip-existing dist/* -- GitLab From ce52b8efdec6f548b8fcbfe704f713912f5b8a4a Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 11:13:33 +0100 Subject: [PATCH 04/15] #7 Static Types: Coverage Report, Security Testing (SAST), Secret Detection, and Code Quality Enabled --- .gitlab-ci.yml | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b2f171a..99102da 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -22,8 +22,13 @@ # https://hub.docker.com/r/library/python/tags/ image: python:latest -# include: -# - template: Security/Dependency-Scanning.gitlab-ci.yml +include: + - template: Security/SAST.gitlab-ci.yml + - template: Security/Secret-Detection.gitlab-ci.yml + - template: Code-Quality.gitlab-ci.yml + # NOTE: Available with Ultimate subscription + # - template: Security/Dependency-Scanning.gitlab-ci.yml + # - template: Verify/Load-Performance-Testing.gitlab-ci.yml # # DOC: https://docs.gitlab.com/ee/user/application_security/dependency_scanning/ # gemnasium-dependency_scanning: @@ -112,7 +117,9 @@ dist-test-docs: - public reports: junit: report.xml - cobertura: coverage.xml + coverage_report: + coverage_format: cobertura + path: coverage.xml coverage: stage: test @@ -148,15 +155,15 @@ coverage: .deploy: stage: deploy environment: testing - cache: - # inherit all global cache settings - <<: *global_cache - resource_group: testing retry: 2 + when: manual needs: - job: dist-test-docs artifacts: true - when: on_success + cache: + # inherit all global cache settings + <<: *global_cache + resource_group: testing before_script: # - pip install setuptools wheel twine - pip install twine @@ -174,8 +181,10 @@ gitlab-packages: environment: name: testing url: https://gitlab.com/api/v4/projects/target-core/packages/pypi + when: on_success script: - !reference [.deploy, script] + after_script: # - TWINE_USERNAME=gitlab-ci-token TWINE_PASSWORD=${CI_JOB_TOKEN} twine upload --skip-existing --repository gitlab dist/* - TWINE_USERNAME=gitlab-ci-token TWINE_PASSWORD=${CI_JOB_TOKEN} twine upload --skip-existing --repository-url ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi dist/* @@ -185,11 +194,9 @@ test-pypi: name: staging url: https://test.pypi.org resource_group: staging - when: manual rules: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH - script: - - !reference [.deploy, script] + after_script: - TWINE_USERNAME=__token__ TWINE_PASSWORD="$test_pypi_api_token" twine upload --skip-existing --repository testpypi dist/* pypi: @@ -198,15 +205,13 @@ pypi: name: production url: https://pypi.org resource_group: production - when: manual rules: - if: $CI_COMMIT_TAG - script: - - !reference [.deploy, script] + after_script: - TWINE_USERNAME=__token__ TWINE_PASSWORD="$pypi_api_token" twine upload --skip-existing dist/* artifacts: name: release - expire_in: 3 year + expire_in: 2 year paths: - report.xml - coverage.xml @@ -238,9 +243,16 @@ pages: CACHE_REQUEST_TIMEOUT: 5 dependencies: - dist-test-docs - script: [echo Pages website upload] + script: [echo Pages website Upload] artifacts: name: pages expire_in: 1 year paths: - public + +code_quality: + stage: .post + rules: + - if: $CODE_QUALITY_DISABLED + when: never + - if: $CI_COMMIT_TAG || $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH -- GitLab From 8bf691a19b9c63632f7a1460b34160afc3ad3d8f Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 11:32:05 +0100 Subject: [PATCH 05/15] #7 Static Types: Coverage Report config update --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 99102da..ff5f368 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -106,7 +106,7 @@ dist-test-docs: - mv cov_html public/coverage # TODO: remove # coverage: '/Code coverage: \d+\.\d+/' - coverage: '/Code coverage: ^TOTAL.+?(\d+\%)$/' + # coverage: '/Code coverage: ^TOTAL.+?(\d+\%)$/' artifacts: when: always paths: -- GitLab From 95f64bf1f8d2f6d7037845122555a9a4028f3c42 Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 13:40:32 +0100 Subject: [PATCH 06/15] #7 Static Types: get_logger --- .gitlab-ci.yml | 33 +++++++++++++++++++++----------- README.md | 3 +-- docs/conf.py | 10 ++++++++-- docs/target.rst | 13 +++++++++++-- pyproject.toml | 2 +- setup.cfg | 10 +++++++++- target/__init__.py | 47 ++++++++++++++++++++++++++++++++++++++++++++-- target/logger.py | 4 ++-- 8 files changed, 99 insertions(+), 23 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ff5f368..7120a08 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -128,6 +128,8 @@ coverage: retry: 2 dependencies: - dist-test-docs + variables: + CODECOV_TOKEN: $codecov_token before_script: # - pip install codecov # - pip install tox @@ -141,10 +143,9 @@ coverage: chmod +x ./codecov script: # NOTE: https://docs.gitlab.com/ee/ci/yaml/script.html - # ./codecov - # --token $codecov_token + # ./codecov --token $codecov_token - > - CODECOV_TOKEN="$codecov_token" ./codecov + ./codecov --file "coverage.xml" --name "codecov-$CI_PROJECT_NAME" --branch "$CI_COMMIT_BRANCH" @@ -154,9 +155,9 @@ coverage: .deploy: stage: deploy + when: manual environment: testing retry: 2 - when: manual needs: - job: dist-test-docs artifacts: true @@ -173,20 +174,25 @@ coverage: # Publish after build # - twine upload -u "__token__" -p "pypi-$test_pypi_api_token" --skip-existing $artifact # - twine upload -u "__token__" -p "pypi-$test_pypi_api_token" --skip-existing dist/* + after_script: + # - TWINE_USERNAME=gitlab-ci-token TWINE_PASSWORD=${CI_JOB_TOKEN} twine upload --skip-existing --repository gitlab dist/* + # - TWINE_USERNAME=gitlab-ci-token TWINE_PASSWORD=${CI_JOB_TOKEN} twine upload --skip-existing --repository-url ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi dist/* + - twine upload --skip-existing dist/* gitlab-packages: # https://docs.gitlab.com/ee/user/packages/pypi_repository/ extends: .deploy stage: test + when: on_success environment: name: testing url: https://gitlab.com/api/v4/projects/target-core/packages/pypi - when: on_success - script: - - !reference [.deploy, script] + variables: + TWINE_USERNAME: gitlab-ci-token + TWINE_PASSWORD: ${CI_JOB_TOKEN} + TWINE_REPOSITORY_URL: ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi after_script: - # - TWINE_USERNAME=gitlab-ci-token TWINE_PASSWORD=${CI_JOB_TOKEN} twine upload --skip-existing --repository gitlab dist/* - - TWINE_USERNAME=gitlab-ci-token TWINE_PASSWORD=${CI_JOB_TOKEN} twine upload --skip-existing --repository-url ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi dist/* + - !reference [.deploy, after_script] test-pypi: extends: .deploy @@ -197,7 +203,10 @@ test-pypi: rules: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH after_script: - - TWINE_USERNAME=__token__ TWINE_PASSWORD="$test_pypi_api_token" twine upload --skip-existing --repository testpypi dist/* + - > + TWINE_USERNAME=__token__ TWINE_PASSWORD="$test_pypi_api_token" + TWINE_REPOSITORY=testpypi + twine upload --skip-existing dist/* pypi: extends: .deploy @@ -208,7 +217,9 @@ pypi: rules: - if: $CI_COMMIT_TAG after_script: - - TWINE_USERNAME=__token__ TWINE_PASSWORD="$pypi_api_token" twine upload --skip-existing dist/* + - > + TWINE_USERNAME=__token__ TWINE_PASSWORD="$pypi_api_token" + twine upload --skip-existing dist/* artifacts: name: release expire_in: 2 year diff --git a/README.md b/README.md index d720f78..ec7478b 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,7 @@ [![PyPI version](https://badge.fury.io/py/target-core.svg)](https://badge.fury.io/py/target-core) [![PyPi project installs](https://img.shields.io/pypi/dm/target-core.svg?maxAge=2592000&label=installs&color=%2327B1FF)](https://pypi.org/project/target-core) -[Singer](https://www.singer.io/) target that uploads loads data to S3 in JSONL format -following the [Singer spec](https://github.com/singer-io/getting-started/blob/master/docs/SPEC.md). +[**Singer**](https://www.singer.io/) target core provide safe tools to easily build new `targets` following the [*Singer spec*](https://github.com/singer-io/getting-started/blob/master/docs/SPEC.md) *convention* and *protocol*. ## How to use it diff --git a/docs/conf.py b/docs/conf.py index e854b3f..7fed373 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ copyright = '2022, Eddy ∆' author = 'Eddy ∆' # The full version, including alpha/beta/rc tags -release = '0.0.0' +release = '0.0.1' # -- General configuration --------------------------------------------------- @@ -30,7 +30,13 @@ release = '0.0.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ['sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc'] +extensions = [ + 'sphinx.ext.todo', + 'sphinx.ext.viewcode', + 'sphinx.ext.autodoc', + 'sphinx.ext.napoleon', # Numpy doc style + 'sphinx.ext.autosummary', +] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/docs/target.rst b/docs/target.rst index d6d4696..e7ec03c 100644 --- a/docs/target.rst +++ b/docs/target.rst @@ -1,5 +1,14 @@ -target package -============== +Welcome to Target Core package documentation! +============================================= + +`Singer `_ **target-core** provide safe tools to easily build new `targets` +following the `Singer spec `_ *convention* and *protocol*. + +.. note:: + This project is under active development. + +.. autosummary:: + :toctree: generated Submodules ---------- diff --git a/pyproject.toml b/pyproject.toml index a53d241..13ebca8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [tool.mypy] # TODO: unlock later -ignore_errors = true +# ignore_errors = true show_error_context = true ignore_missing_imports = true diff --git a/setup.cfg b/setup.cfg index c9e3df6..e7cddae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,7 +40,11 @@ dist = wheel # build deploy = twine -docs = sphinx-rtd-theme +docs = + sphinx + sphinx-rtd-theme + sphinx-automodapi + numpydoc [options.packages.find] exclude = @@ -76,6 +80,10 @@ max-complexity = 10 builder = html warning-is-error = true # keep-going = true +# project = 'Target Core' +# version = attr: target.__version__ +# release = attr: target.__version__ +# source-dir = 'docs' [tox:tox] # requires = tox-pipenv diff --git a/target/__init__.py b/target/__init__.py index 15400fd..b661b2a 100644 --- a/target/__init__.py +++ b/target/__init__.py @@ -102,8 +102,23 @@ def float_to_decimal(value): return value -def get_target_key(stream, config, date_time=None): - '''Creates and returns an S3 key for the stream''' +def get_target_key(stream: str, config, date_time=None): + '''Creates and returns an S3 key for the stream + + Parameters + ---------- + stream : str + incoming stream name that is written in the file + config : dict + configuration dictionary + date_time : datetime + Date used in the path template + + Returns + ------- + out : ``str`` + The formatted path. + ''' # NOTE: Replace dynamic tokens key = config.get('path_template').format(stream=stream, date_time=date_time, uuid=uuid4()) @@ -113,6 +128,34 @@ def get_target_key(stream, config, date_time=None): def persist_lines(messages, config, save_records=save_jsonl_file): + '''Process the lines received from the Singer Tap. + + This is the core of the messages processing. + Each line is processed function of its type, and the *RECORD* lines are saved using and functions provided as an argument. + By default they are written as a *jsonl* in the *work_dir* working directory provided in the default `config` file. + + Parameters + ---------- + config : dict + configuration dictionary. + date_time : datetime + Date (``datetime``) used in the path template + + Raises + ------ + json.decoder.JSONDecodeError + If the line structure is inconsistent or cnotains errors. + + Returns + ------- + out : list[dict, dict] + A `state` closure info. + + See Also + -------- + `Singer spec `_ *convention* and *protocol*. + ''' + state = None schemas = {} key_properties = {} diff --git a/target/logger.py b/target/logger.py index 30c250d..7aac87a 100644 --- a/target/logger.py +++ b/target/logger.py @@ -1,8 +1,8 @@ from pathlib import Path -from logging import config, getLogger +from logging import config, getLogger, Logger -def get_logger(): +def get_logger() -> Logger: '''Return a Logger instance appropriate for using in a Tap or a Target.''' # See # https://docs.python.org/3.5/library/logging.config.html#logging.config.fileConfig -- GitLab From b3f4664eb2e9443c994d1792a439de380d99e383 Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 13:51:29 +0100 Subject: [PATCH 07/15] #7 Static Types: lint --- docs/conf.py | 2 +- target/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 7fed373..0dd4167 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -34,7 +34,7 @@ extensions = [ 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', # Numpy doc style + 'sphinx.ext.napoleon', # Numpy doc style 'sphinx.ext.autosummary', ] diff --git a/target/__init__.py b/target/__init__.py index b661b2a..8d45513 100644 --- a/target/__init__.py +++ b/target/__init__.py @@ -137,7 +137,7 @@ def persist_lines(messages, config, save_records=save_jsonl_file): Parameters ---------- config : dict - configuration dictionary. + configuration dictionary. date_time : datetime Date (``datetime``) used in the path template -- GitLab From 4c7495f1365050d2d1f6949063d901e7ddbd4fad Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 13:56:48 +0100 Subject: [PATCH 08/15] #7 Static Types: static config --- .gitlab-ci.yml | 5 ++++- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7120a08..0782d40 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -86,6 +86,9 @@ lint-static: dist-test-docs: stage: build + needs: + - job: lint-static + artifacts: false environment: testing resource_group: testing retry: 2 @@ -171,10 +174,10 @@ coverage: script: # - python setup.py sdist bdist_wheel - twine check dist/* + after_script: # Publish after build # - twine upload -u "__token__" -p "pypi-$test_pypi_api_token" --skip-existing $artifact # - twine upload -u "__token__" -p "pypi-$test_pypi_api_token" --skip-existing dist/* - after_script: # - TWINE_USERNAME=gitlab-ci-token TWINE_PASSWORD=${CI_JOB_TOKEN} twine upload --skip-existing --repository gitlab dist/* # - TWINE_USERNAME=gitlab-ci-token TWINE_PASSWORD=${CI_JOB_TOKEN} twine upload --skip-existing --repository-url ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi dist/* - twine upload --skip-existing dist/* diff --git a/pyproject.toml b/pyproject.toml index 13ebca8..a53d241 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [tool.mypy] # TODO: unlock later -# ignore_errors = true +ignore_errors = true show_error_context = true ignore_missing_imports = true -- GitLab From 8fab913ca9169db52511bf7d6343e1f19b9ec8ab Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 14:05:54 +0100 Subject: [PATCH 09/15] #7 Static Types: CI update --- .gitlab-ci.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0782d40..b1825d4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -24,8 +24,8 @@ image: python:latest include: - template: Security/SAST.gitlab-ci.yml - - template: Security/Secret-Detection.gitlab-ci.yml - template: Code-Quality.gitlab-ci.yml + # - template: Security/Secret-Detection.gitlab-ci.yml # NOTE: Available with Ultimate subscription # - template: Security/Dependency-Scanning.gitlab-ci.yml # - template: Verify/Load-Performance-Testing.gitlab-ci.yml @@ -99,11 +99,15 @@ dist-test-docs: # policy: pull # dependencies: # - build + variables: + # Use slow compression for artifacts, resulting in smaller archives + ARTIFACT_COMPRESSION_LEVEL: "slowest" + TOX_PARALLEL_NO_SPINNER: 1 before_script: - pip install tox script: # - python setup.py test - - TOX_PARALLEL_NO_SPINNER=1 tox --develop --parallel + - tox --develop --parallel after_script: - mv .tox/docs_out/ public/ - mv cov_html public/coverage -- GitLab From d434d503dab6c0778405741d11bc68d11aba2e35 Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 14:24:56 +0100 Subject: [PATCH 10/15] #7 Static Types: CI before_script --- .gitlab-ci.yml | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b1825d4..ee7d5ce 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -80,6 +80,7 @@ lint-static: resource_group: testing retry: 2 before_script: + - !reference [before_script] - pip install tox script: - tox --develop --parallel -e lint,static @@ -104,6 +105,7 @@ dist-test-docs: ARTIFACT_COMPRESSION_LEVEL: "slowest" TOX_PARALLEL_NO_SPINNER: 1 before_script: + - !reference [before_script] - pip install tox script: # - python setup.py test @@ -141,6 +143,7 @@ coverage: # - pip install codecov # - pip install tox - | + echo Install codecov curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import # One-time step curl -Os https://uploader.codecov.io/latest/linux/codecov curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM @@ -203,30 +206,28 @@ gitlab-packages: test-pypi: extends: .deploy + rules: + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH environment: name: staging url: https://test.pypi.org resource_group: staging - rules: - - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH - after_script: - - > - TWINE_USERNAME=__token__ TWINE_PASSWORD="$test_pypi_api_token" - TWINE_REPOSITORY=testpypi - twine upload --skip-existing dist/* + variables: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: $test_pypi_api_token + TWINE_REPOSITORY_URL: testpypi pypi: extends: .deploy + rules: + - if: $CI_COMMIT_TAG environment: name: production url: https://pypi.org resource_group: production - rules: - - if: $CI_COMMIT_TAG - after_script: - - > - TWINE_USERNAME=__token__ TWINE_PASSWORD="$pypi_api_token" - twine upload --skip-existing dist/* + variables: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: $pypi_api_token artifacts: name: release expire_in: 2 year -- GitLab From 34d525200e84f60fa768a5253e823760d3cd48f4 Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 16:48:11 +0100 Subject: [PATCH 11/15] #7 Static Types: Mypy Static test covered --- .gitlab-ci.yml | 13 +++++-------- pyproject.toml | 2 +- target/__init__.py | 42 ++++++++++++++++++++++-------------------- target/file.py | 23 +++++++++++++---------- target/s3.py | 12 ++++++------ tests/test_target.py | 1 + 6 files changed, 48 insertions(+), 45 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ee7d5ce..76289fa 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -64,10 +64,6 @@ cache: &global_cache before_script: # - python --version # For debugging - # - pip install virtualenv - # - virtualenv venv - # - source venv/bin/activate - - python --version # For debugging # - pip install --upgrade pip virtualenv # - virtualenv venv - python -m venv venv @@ -80,7 +76,6 @@ lint-static: resource_group: testing retry: 2 before_script: - - !reference [before_script] - pip install tox script: - tox --develop --parallel -e lint,static @@ -105,7 +100,6 @@ dist-test-docs: ARTIFACT_COMPRESSION_LEVEL: "slowest" TOX_PARALLEL_NO_SPINNER: 1 before_script: - - !reference [before_script] - pip install tox script: # - python setup.py test @@ -132,16 +126,18 @@ dist-test-docs: coverage: stage: test + needs: + - job: dist-test-docs + artifacts: true environment: testing resource_group: testing retry: 2 - dependencies: - - dist-test-docs variables: CODECOV_TOKEN: $codecov_token before_script: # - pip install codecov # - pip install tox + - echo -e "\e[0Ksection_start:`date +%s`:my_first_section[collapsed=true]\r\e[0KCodecov installation" - | echo Install codecov curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import # One-time step @@ -151,6 +147,7 @@ coverage: gpgv codecov.SHA256SUM.sig codecov.SHA256SUM shasum -a 256 -c codecov.SHA256SUM chmod +x ./codecov + - echo -e "\e[0Ksection_end:`date +%s`:my_first_section\r\e[0K" script: # NOTE: https://docs.gitlab.com/ee/ci/yaml/script.html # ./codecov --token $codecov_token diff --git a/pyproject.toml b/pyproject.toml index a53d241..13ebca8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [tool.mypy] # TODO: unlock later -ignore_errors = true +# ignore_errors = true show_error_context = true ignore_missing_imports = true diff --git a/target/__init__.py b/target/__init__.py index 8d45513..4322fc3 100644 --- a/target/__init__.py +++ b/target/__init__.py @@ -2,6 +2,7 @@ __version__ = '0.0.1' +from typing import Any, Callable, Dict, TextIO, Tuple, Optional import argparse import json from pathlib import Path @@ -29,7 +30,7 @@ CONFIG_PARAMS = { } -def add_metadata_columns_to_schema(schema_message): +def add_metadata_columns_to_schema(schema_message: Dict) -> Dict: '''Metadata _sdc columns according to the stitch documentation at https://www.stitchdata.com/docs/data-structure/integration-schemas#sdc-columns @@ -47,7 +48,7 @@ def add_metadata_columns_to_schema(schema_message): return schema_message -def add_metadata_values_to_record(record_message, schema_message, timestamp): +def add_metadata_values_to_record(record_message: Dict, schema_message: Dict, timestamp: datetime.datetime) -> Dict: '''Populate metadata _sdc columns from incoming record message The location of the required attributes are fixed in the stream ''' @@ -64,7 +65,7 @@ def add_metadata_values_to_record(record_message, schema_message, timestamp): return record_message['record'] -def remove_metadata_values_from_record(record_message): +def remove_metadata_values_from_record(record_message: Dict) -> Dict: '''Removes every metadata _sdc column from a given record message ''' for key in { @@ -82,7 +83,7 @@ def remove_metadata_values_from_record(record_message): return record_message['record'] -def emit_state(state): +def emit_state(state: Optional[Any]) -> None: if state is not None: line = json.dumps(state) LOGGER.debug('Emitting state {}'.format(line)) @@ -90,7 +91,7 @@ def emit_state(state): sys.stdout.flush() -def float_to_decimal(value): +def float_to_decimal(value: Any) -> Any: '''Walk the given data structure and turn all instances of float into double.''' if isinstance(value, float): @@ -102,7 +103,7 @@ def float_to_decimal(value): return value -def get_target_key(stream: str, config, date_time=None): +def get_target_key(stream: str, config: Dict[str, Any], date_time: datetime.datetime = None) -> str: '''Creates and returns an S3 key for the stream Parameters @@ -121,13 +122,13 @@ def get_target_key(stream: str, config, date_time=None): ''' # NOTE: Replace dynamic tokens - key = config.get('path_template').format(stream=stream, date_time=date_time, uuid=uuid4()) + key = config['path_template'].format(stream=stream, date_time=date_time, uuid=uuid4()) prefix = config.get('key_prefix', '') return str(Path(key).parent / f'{prefix}{Path(key).name}') if prefix else key -def persist_lines(messages, config, save_records=save_jsonl_file): +def persist_lines(messages: TextIO, config: Dict, save_records: Callable = save_jsonl_file) -> Tuple[Optional[Any], Dict[Any, Any]]: '''Process the lines received from the Singer Tap. This is the core of the messages processing. @@ -159,8 +160,8 @@ def persist_lines(messages, config, save_records=save_jsonl_file): state = None schemas = {} key_properties = {} - validators = {} - file_data = {} + validators: Dict = {} + file_data: Dict = {} # NOTE: Use the system specific temp directory if no custom work_dir provided work_dir = Path(config.get('work_dir', gettempdir())).expanduser() @@ -168,7 +169,7 @@ def persist_lines(messages, config, save_records=save_jsonl_file): # NOTE: Create work_dir if not exists work_dir.mkdir(parents=True, exist_ok=True) - timezone = datetime.timezone(datetime.timedelta(hours=config.get('timezone_offset'))) if config.get('timezone_offset') is not None else None + timezone = datetime.timezone(datetime.timedelta(hours=config['timezone_offset'])) if config.get('timezone_offset') is not None else None now = datetime.datetime.now(timezone) for line in messages: @@ -195,9 +196,7 @@ def persist_lines(messages, config, save_records=save_jsonl_file): record_to_load = add_metadata_values_to_record(m, {}, now) if config.get('add_metadata_columns') else remove_metadata_values_from_record(m) file_data[stream]['file_data'].append(record_to_load) - # NOTE: write the lines into the temporary file when received data over 64Mb default memory buffer - if sys.getsizeof(file_data[stream]['file_data']) > config.get('memory_buffer'): - save_records(file_data[stream], config) + save_records(file_data[stream], config) state = None @@ -235,20 +234,23 @@ def persist_lines(messages, config, save_records=save_jsonl_file): else: LOGGER.warning('Unknown line type "{}" in line "{}"'.format(m['type'], m)) - for _, file_info in file_data.items(): - save_records(file_info, config) - return state, file_data -def main(): +def main() -> None: parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', help='Config file', required=True) args = parser.parse_args() + config = config_compression(config_file(args.config)) - state, _ = persist_lines( + state, file_data = persist_lines( sys.stdin, - config_compression(config_file(args.config))) + config, + save_jsonl_file) + + config['memory_buffer'] = 0 + for _, file_info in file_data.items(): + save_jsonl_file(file_info, config) emit_state(state) LOGGER.debug('Exiting normally') diff --git a/target/file.py b/target/file.py index 27c7988..16f1308 100644 --- a/target/file.py +++ b/target/file.py @@ -1,3 +1,5 @@ +from typing import Dict, Any +import sys import gzip import lzma import json @@ -8,12 +10,12 @@ from target.logger import get_logger LOGGER = get_logger() -def config_file(config_path, datetime_format={ - 'date_time_format': '%FT%T.%f'}): +def config_file(config_path: str, datetime_format: Dict = { + 'date_time_format': '%FT%T.%f'}) -> Dict: - path_template_default = '{stream}-{date_time:%s}.json' % datetime_format['date_time_format'] + path_template_default: str = '{stream}-{date_time:%s}.json' % datetime_format['date_time_format'] - config = { + config: Dict[str, Any] = { 'path_template': path_template_default, 'memory_buffer': 64e6 } @@ -28,8 +30,8 @@ def config_file(config_path, datetime_format={ return config -def config_compression(config_default): - config = { +def config_compression(config_default: Dict) -> Dict: + config: Dict[str, Any] = { 'compression': 'none' } config.update(config_default) @@ -56,10 +58,11 @@ def config_compression(config_default): return config -def save_jsonl_file(file_data, config): - if any(file_data['file_data']): - with config.get('open_func')(file_data['file_name'], 'at', encoding='utf-8') as output_file: +def save_jsonl_file(file_data: Dict, config: Dict[str, Any]) -> None: + # NOTE: write the lines into the temporary file when received data over 64Mb default memory buffer + if sys.getsizeof(file_data['file_data']) > config.get('memory_buffer', 0) and any(file_data['file_data']): + with config['open_func'](file_data['file_name'], 'at', encoding='utf-8') as output_file: output_file.writelines((json.dumps(record) + '\n' for record in file_data['file_data'])) del file_data['file_data'][:] - LOGGER.debug("'{}' file saved using open_func '{}'".format(file_data['file_name'], config.get('open_func').__name__)) + LOGGER.debug("'{}' file saved using open_func '{}'".format(file_data['file_name'], config['open_func'].__name__)) diff --git a/target/s3.py b/target/s3.py index 2a62aa9..05558e3 100644 --- a/target/s3.py +++ b/target/s3.py @@ -2,14 +2,14 @@ import os import backoff import boto3 +from typing import Callable, Dict, Any from botocore.exceptions import ClientError from target.logger import get_logger - LOGGER = get_logger() -def retry_pattern(): +def retry_pattern() -> Callable: return backoff.on_exception( backoff.expo, ClientError, @@ -18,12 +18,12 @@ def retry_pattern(): factor=10) -def log_backoff_attempt(details): +def log_backoff_attempt(details: Dict) -> None: LOGGER.info("Error detected communicating with Amazon, triggering backoff: %d try", details.get("tries")) @retry_pattern() -def create_client(config): +def create_client(config: Dict) -> Any: LOGGER.info("Attempting to create AWS session") # Get the required parameters from config file and/or environment variables @@ -51,8 +51,8 @@ def create_client(config): # pylint: disable=too-many-arguments @retry_pattern() -def upload_file(s3_client, filename, bucket, s3_key, - encryption_type=None, encryption_key=None): +def upload_file(s3_client: Any, filename: str, bucket: str, s3_key: str, + encryption_type: str = None, encryption_key: str = None) -> None: if encryption_type is None or encryption_type.lower() == "none": # No encryption config (defaults to settings on the bucket): diff --git a/tests/test_target.py b/tests/test_target.py index 7ed49f1..6be0ba2 100644 --- a/tests/test_target.py +++ b/tests/test_target.py @@ -309,6 +309,7 @@ def test_get_target_key(config): def test_persist_lines(caplog, config, input_data, input_multi_stream_data, invalid_row_data, invalid_order_data, state, file_metadata): '''TEST : simple persist_lines call''' + config['memory_buffer'] = 0 output_state, output_file_metadata = persist_lines(input_multi_stream_data, config) file_paths = set(path for path in Path(config['work_dir']).iterdir()) -- GitLab From b12f9b3dd809950f2e445039cc2662bef6d1f83c Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 16:57:50 +0100 Subject: [PATCH 12/15] #7 Static Types: Gitlab Pages website pushed on PR merge --- .gitlab-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 76289fa..2f9d173 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -189,7 +189,6 @@ coverage: gitlab-packages: # https://docs.gitlab.com/ee/user/packages/pypi_repository/ extends: .deploy - stage: test when: on_success environment: name: testing @@ -198,8 +197,8 @@ gitlab-packages: TWINE_USERNAME: gitlab-ci-token TWINE_PASSWORD: ${CI_JOB_TOKEN} TWINE_REPOSITORY_URL: ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi - after_script: - - !reference [.deploy, after_script] + script: + - !reference [.deploy, script] test-pypi: extends: .deploy @@ -241,7 +240,8 @@ pypi: pages: stage: deploy rules: - - if: $CI_COMMIT_TAG + # - if: $CI_COMMIT_TAG + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH environment: name: production url: https://omegax.gitlab.com/target-core/index.html -- GitLab From 9260407c1db4dc1e350cb01e4d7e2096d72325ef Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 17:11:10 +0100 Subject: [PATCH 13/15] #7 Static Types: Gitlab Pages website dependency --- .gitlab-ci.yml | 3 +++ docs/conf.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2f9d173..e88bc13 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -259,6 +259,9 @@ pages: CACHE_REQUEST_TIMEOUT: 5 dependencies: - dist-test-docs + needs: + - job: dist-test-docs + artifacts: true script: [echo Pages website Upload] artifacts: name: pages diff --git a/docs/conf.py b/docs/conf.py index 0dd4167..492c6ae 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,9 +10,9 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import os -import sys -sys.path.insert(0, os.path.abspath('..')) +from os.path import abspath +from sys import path +path.insert(0, abspath('..')) # -- Project information ----------------------------------------------------- -- GitLab From fa6b69425c3e4cabd362deefcfb3006d9f6f4b38 Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 17:23:00 +0100 Subject: [PATCH 14/15] #7 Static Types: Stricter --- pyproject.toml | 6 +++--- target/__init__.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 13ebca8..bfed952 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,13 +30,13 @@ disallow_untyped_defs = true warn_redundant_casts = true warn_unused_configs = true warn_unused_ignores = true +disallow_untyped_calls = true +no_implicit_reexport = true +strict_equality = true # The following need to have changes made to be able to enable them: # disallow_any_generics = true -# disallow_untyped_calls = true # no_implicit_optional = true -# no_implicit_reexport = true -# strict_equality = true # warn_return_any = true [[tool.mypy.overrides]] # Overrides for currently untyped modules diff --git a/target/__init__.py b/target/__init__.py index 4322fc3..dabc48d 100644 --- a/target/__init__.py +++ b/target/__init__.py @@ -103,7 +103,7 @@ def float_to_decimal(value: Any) -> Any: return value -def get_target_key(stream: str, config: Dict[str, Any], date_time: datetime.datetime = None) -> str: +def get_target_key(stream: str, config: Dict[str, Any], date_time: datetime.datetime) -> str: '''Creates and returns an S3 key for the stream Parameters -- GitLab From 7b0ed151f4d858ae8b94356a61da451b5510bbad Mon Sep 17 00:00:00 2001 From: Eddy Delta Date: Fri, 15 Apr 2022 17:25:36 +0100 Subject: [PATCH 15/15] #7 Static Types: Cleaning --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bfed952..3f7c2b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,9 +6,6 @@ requires = [ build-backend = "setuptools.build_meta" [tool.mypy] -# TODO: unlock later -# ignore_errors = true - show_error_context = true ignore_missing_imports = true -- GitLab