From be9f36dbdc229e387b4a303621660188214f47ed Mon Sep 17 00:00:00 2001 From: jo Date: Wed, 15 Jun 2022 15:56:52 +0200 Subject: [PATCH] feat(analyzer): move compute_md5 to shared library --- .../pipeline/analyze_metadata.py | 17 +---------------- .../tests/pipeline/analyze_metadata_test.py | 6 +----- shared/libretime_shared/files.py | 17 +++++++++++++++++ shared/tests/files_test.py | 10 ++++++++++ 4 files changed, 29 insertions(+), 21 deletions(-) create mode 100644 shared/libretime_shared/files.py create mode 100644 shared/tests/files_test.py diff --git a/analyzer/libretime_analyzer/pipeline/analyze_metadata.py b/analyzer/libretime_analyzer/pipeline/analyze_metadata.py index 648b57ffa..c3df59dca 100644 --- a/analyzer/libretime_analyzer/pipeline/analyze_metadata.py +++ b/analyzer/libretime_analyzer/pipeline/analyze_metadata.py @@ -1,9 +1,9 @@ -import hashlib from datetime import timedelta from pathlib import Path from typing import Any, Dict import mutagen +from libretime_shared.files import compute_md5 from loguru import logger @@ -108,18 +108,3 @@ def analyze_metadata(filepath_: str, metadata: Dict[str, Any]): continue return metadata - - -def compute_md5(filepath: Path) -> str: - """ - Compute a file md5sum. - """ - with filepath.open("rb") as file: - buffer = hashlib.md5() # nosec - while True: - blob = file.read(8192) - if not blob: - break - buffer.update(blob) - - return buffer.hexdigest() diff --git a/analyzer/tests/pipeline/analyze_metadata_test.py b/analyzer/tests/pipeline/analyze_metadata_test.py index b1ac6c4ca..f0598d672 100644 --- a/analyzer/tests/pipeline/analyze_metadata_test.py +++ b/analyzer/tests/pipeline/analyze_metadata_test.py @@ -2,7 +2,7 @@ from pathlib import Path import pytest -from libretime_analyzer.pipeline.analyze_metadata import analyze_metadata, compute_md5 +from libretime_analyzer.pipeline.analyze_metadata import analyze_metadata from ..fixtures import FILE_INVALID_DRM, FILE_INVALID_TXT, FILES_TAGGED @@ -48,7 +48,3 @@ def test_analyze_metadata_unparsable_file(): "hidden": False, "md5": "4d5e4b1c8e8febbd31fa9ce7f088beae", } - - -def test_compute_md5(): - assert compute_md5(FILE_INVALID_TXT) == "4d5e4b1c8e8febbd31fa9ce7f088beae" diff --git a/shared/libretime_shared/files.py b/shared/libretime_shared/files.py new file mode 100644 index 000000000..51f207977 --- /dev/null +++ b/shared/libretime_shared/files.py @@ -0,0 +1,17 @@ +import hashlib +from pathlib import Path + + +def compute_md5(filepath: Path) -> str: + """ + Compute a file md5sum. + """ + with filepath.open("rb") as file: + buffer = hashlib.md5() # nosec + while True: + blob = file.read(8192) + if not blob: + break + buffer.update(blob) + + return buffer.hexdigest() diff --git a/shared/tests/files_test.py b/shared/tests/files_test.py new file mode 100644 index 000000000..afbc462f6 --- /dev/null +++ b/shared/tests/files_test.py @@ -0,0 +1,10 @@ +from pathlib import Path + +from libretime_shared.files import compute_md5 + + +def test_compute_md5(tmp_path: Path) -> None: + tmp_file = tmp_path / "somefile.txt" + tmp_file.write_text("some test") + + assert compute_md5(tmp_file) == "f1b75ac7689ff88e1ecc40c84b115785"