From 834d74f29ac39d57ac6277e4c9fe8aefa56efbf5 Mon Sep 17 00:00:00 2001 From: Christoph Ladurner Date: Wed, 1 Oct 2025 23:05:55 +0200 Subject: [PATCH 1/4] fix(chore): DeprecationWarning stdlib * datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). * the decision was made to move to utc aware timestamps BREAKING CHANGE: change of mapping --- invenio_stats/__init__.py | 4 ++-- invenio_stats/aggregations.py | 20 ++++++++++++------- invenio_stats/bookmark.py | 5 +++-- invenio_stats/cli.py | 1 + invenio_stats/contrib/event_builders.py | 5 +++-- .../file_download/os-v2/file-download-v1.json | 4 ++-- .../record_view/os-v2/record-view-v1.json | 2 +- invenio_stats/processors.py | 8 ++++---- invenio_stats/tasks.py | 11 +++++++--- tests/conftest.py | 12 +++++------ tests/contrib/test_event_builders.py | 13 ++++++------ tests/helpers.py | 7 ++++--- tests/test_aggregations.py | 5 +++-- tests/test_prefixing.py | 1 + tests/test_processors.py | 2 +- 15 files changed, 59 insertions(+), 41 deletions(-) diff --git a/invenio_stats/__init__.py b/invenio_stats/__init__.py index e42c725..1d09d0c 100644 --- a/invenio_stats/__init__.py +++ b/invenio_stats/__init__.py @@ -3,7 +3,7 @@ # This file is part of Invenio. # Copyright (C) 2017-2024 CERN. # Copyright (C) 2022-2023 TU Wien. -# Copyright (C) 2024 Graz University of Technology. +# Copyright (C) 2024-2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -171,7 +171,7 @@ def register_events(): from invenio_stats.proxies import current_stats event = { - "timestamp": datetime.datetime.utcnow().isoformat(), + "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(), "mydata": "somedata" } diff --git a/invenio_stats/aggregations.py b/invenio_stats/aggregations.py index 1525f1e..eec9c98 100644 --- a/invenio_stats/aggregations.py +++ b/invenio_stats/aggregations.py @@ -3,6 +3,7 @@ # This file is part of Invenio. # Copyright (C) 2017-2019 CERN. # Copyright (C) 2022 TU Wien. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -10,7 +11,7 @@ """Aggregation classes.""" import math -from datetime import datetime +from datetime import datetime, timezone from dateutil import parser from dateutil.relativedelta import relativedelta @@ -168,7 +169,7 @@ def _get_oldest_event_timestamp(self): # indexed but the indices have not been refreshed yet. if len(result) == 0: return None - return parser.parse(result[0]["timestamp"]) + return parser.parse(result[0]["timestamp"]).replace(tzinfo=timezone.utc) def _split_date_range(self, lower_limit, upper_limit): """Return dict of rounded dates in range, split by aggregation interval. @@ -259,7 +260,9 @@ def agg_iter(self, dt, previous_bookmark): "value_as_string", None ) if last_update_aggr and previous_bookmark: - last_date = datetime.fromisoformat(last_update_aggr.rstrip("Z")) + last_date = datetime.fromisoformat( + last_update_aggr.rstrip("Z") + ).replace(tzinfo=timezone.utc) if last_date < previous_bookmark: continue @@ -267,7 +270,9 @@ def agg_iter(self, dt, previous_bookmark): aggregation_data["timestamp"] = interval_date.isoformat() aggregation_data[self.field] = aggregation["key"] aggregation_data["count"] = aggregation["doc_count"] - aggregation_data["updated_timestamp"] = datetime.utcnow().isoformat() + aggregation_data["updated_timestamp"] = datetime.now( + timezone.utc + ).isoformat() if self.metric_fields: for f in self.metric_fields: @@ -293,9 +298,10 @@ def agg_iter(self, dt, previous_bookmark): } def _upper_limit(self, end_date): + max_ = datetime.max.replace(tzinfo=timezone.utc) return min( - end_date or datetime.max, # ignore if `None` - datetime.utcnow(), + end_date or max_, # ignore if `None` + datetime.now(timezone.utc), ) def run(self, start_date=None, end_date=None, update_bookmark=True): @@ -317,7 +323,7 @@ def run(self, start_date=None, end_date=None, update_bookmark=True): # Let's get the timestamp before we start the aggregation. # This will be used for the next iteration. Some events might be processed twice if not end_date: - end_date = datetime.utcnow().isoformat() + end_date = datetime.now(timezone.utc).isoformat() results = [] for dt_key, dt in sorted(dates.items()): diff --git a/invenio_stats/bookmark.py b/invenio_stats/bookmark.py index d615722..cae96e5 100644 --- a/invenio_stats/bookmark.py +++ b/invenio_stats/bookmark.py @@ -3,13 +3,14 @@ # This file is part of Invenio. # Copyright (C) 2017-2019 CERN. # Copyright (C) 2022 TU Wien. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. """BookMark used by aggregations.""" from collections import OrderedDict -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from functools import wraps from invenio_search.engine import dsl, search @@ -107,7 +108,7 @@ def get_bookmark(self, refresh_time=60): # This means that some events might be processed twice if refresh_time: my_date -= timedelta(seconds=refresh_time) - return my_date + return my_date.replace(tzinfo=timezone.utc) @_ensure_index_exists def list_bookmarks(self, start_date=None, end_date=None, limit=None): diff --git a/invenio_stats/cli.py b/invenio_stats/cli.py index bd49086..7a91536 100644 --- a/invenio_stats/cli.py +++ b/invenio_stats/cli.py @@ -3,6 +3,7 @@ # This file is part of Invenio. # Copyright (C) 2018 CERN. # Copyright (C) 2022 TU Wien. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. diff --git a/invenio_stats/contrib/event_builders.py b/invenio_stats/contrib/event_builders.py index 667b8bd..10644ed 100644 --- a/invenio_stats/contrib/event_builders.py +++ b/invenio_stats/contrib/event_builders.py @@ -3,6 +3,7 @@ # This file is part of Invenio. # Copyright (C) 2017-2018 CERN. # Copyright (C) 2022 TU Wien. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -21,7 +22,7 @@ def file_download_event_builder(event, sender_app, obj=None, **kwargs): event.update( { # When: - "timestamp": datetime.datetime.utcnow().isoformat(), + "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(), # What: "bucket_id": str(obj.bucket_id), "file_id": str(obj.file_id), @@ -52,7 +53,7 @@ def record_view_event_builder(event, sender_app, pid=None, record=None, **kwargs event.update( { # When: - "timestamp": datetime.datetime.utcnow().isoformat(), + "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(), # What: "record_id": str(record.id), "pid_type": pid.pid_type, diff --git a/invenio_stats/contrib/file_download/os-v2/file-download-v1.json b/invenio_stats/contrib/file_download/os-v2/file-download-v1.json index 6ce8e22..2f9caba 100644 --- a/invenio_stats/contrib/file_download/os-v2/file-download-v1.json +++ b/invenio_stats/contrib/file_download/os-v2/file-download-v1.json @@ -12,7 +12,7 @@ "match_mapping_type": "date", "mapping": { "type": "date", - "format": "strict_date_hour_minute_second" + "format": "strict_date_optional_time" } } } @@ -23,7 +23,7 @@ "properties": { "timestamp": { "type": "date", - "format": "strict_date_hour_minute_second" + "format": "strict_date_optional_time" }, "bucket_id": { "type": "keyword" diff --git a/invenio_stats/contrib/record_view/os-v2/record-view-v1.json b/invenio_stats/contrib/record_view/os-v2/record-view-v1.json index 6a1aec0..90589f9 100644 --- a/invenio_stats/contrib/record_view/os-v2/record-view-v1.json +++ b/invenio_stats/contrib/record_view/os-v2/record-view-v1.json @@ -12,7 +12,7 @@ "properties": { "timestamp": { "type": "date", - "format": "strict_date_hour_minute_second" + "format": "strict_date_optional_time" }, "record_id": { "type": "keyword" diff --git a/invenio_stats/processors.py b/invenio_stats/processors.py index 7deac8a..f0bfab1 100644 --- a/invenio_stats/processors.py +++ b/invenio_stats/processors.py @@ -3,6 +3,7 @@ # This file is part of Invenio. # Copyright (C) 2017-2024 CERN. # Copyright (C) 2022 TU Wien. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -10,7 +11,7 @@ """Events indexer.""" import hashlib -from datetime import datetime +from datetime import datetime, timezone from functools import partial from time import mktime @@ -21,7 +22,6 @@ from invenio_search import current_search_client from invenio_search.engine import search from invenio_search.utils import prefix_index -from pytz import utc from .utils import get_anonymization_salt, get_geoip @@ -202,10 +202,10 @@ def actionsiter(self): # This is to improve search engine performances. ts = ts.replace(microsecond=0) msg["timestamp"] = ts.isoformat() - msg["updated_timestamp"] = datetime.utcnow().isoformat() + msg["updated_timestamp"] = datetime.now(timezone.utc).isoformat() # apply timestamp windowing in order to group events too close in time if self.double_click_window > 0: - timestamp = mktime(utc.localize(ts).utctimetuple()) + timestamp = mktime(ts.utctimetuple()) ts = ts.fromtimestamp( timestamp // self.double_click_window * self.double_click_window ) diff --git a/invenio_stats/tasks.py b/invenio_stats/tasks.py index 4eb4f89..8042bed 100644 --- a/invenio_stats/tasks.py +++ b/invenio_stats/tasks.py @@ -3,13 +3,14 @@ # This file is part of Invenio. # Copyright (C) 2016-2018 CERN. # Copyright (C) 2022 TU Wien. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. """Celery background tasks.""" -from datetime import timedelta +from datetime import timedelta, timezone from celery import shared_task from dateutil.parser import parse as dateutil_parse @@ -50,8 +51,12 @@ def aggregate_events( aggregations, start_date=None, end_date=None, update_bookmark=True ): """Aggregate indexed events.""" - start_date = dateutil_parse(start_date) if start_date else None - end_date = dateutil_parse(end_date) if end_date else None + start_date = ( + dateutil_parse(start_date).replace(tzinfo=timezone.utc) if start_date else None + ) + end_date = ( + dateutil_parse(end_date).replace(tzinfo=timezone.utc) if end_date else None + ) results = [] for aggr_name in aggregations: aggr_cfg = current_stats.aggregations[aggr_name] diff --git a/tests/conftest.py b/tests/conftest.py index 05cbdb8..fbe0854 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -332,19 +332,19 @@ def request_headers(): def mock_datetime(): """Mock datetime.datetime. - Use set_utcnow to set the current utcnow time. + Use set_now to set the current now time. """ class NewDate(datetime.datetime): - _utcnow = (2017, 1, 1) + _now = (2017, 1, 1) @classmethod - def set_utcnow(cls, value): - cls._utcnow = value + def set_now(cls, value): + cls._now = value @classmethod - def utcnow(cls): - return cls(*cls._utcnow) + def now(cls, tzinfo): + return cls(*cls._now, tzinfo=tzinfo) yield NewDate diff --git a/tests/contrib/test_event_builders.py b/tests/contrib/test_event_builders.py index fdadf27..5444362 100644 --- a/tests/contrib/test_event_builders.py +++ b/tests/contrib/test_event_builders.py @@ -2,13 +2,14 @@ # # This file is part of Invenio. # Copyright (C) 2017-2018 CERN. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. """Test event builders.""" -import datetime +from datetime import datetime, timezone from unittest.mock import patch from invenio_stats.contrib.event_builders import ( @@ -18,10 +19,10 @@ from invenio_stats.utils import get_user -class NewDate(datetime.datetime): +class NewDate(datetime): @classmethod - def utcnow(cls): - return cls(2017, 1, 1) + def now(cls, tzinfo): + return cls(2017, 1, 1, tzinfo=tzinfo) headers = { @@ -42,7 +43,7 @@ def test_file_download_event_builder(app, mock_user_ctx, sequential_ids, objects file_download_event_builder(event, app, file_obj) assert event == { # When: - "timestamp": NewDate.utcnow().isoformat(), + "timestamp": NewDate.now(tzinfo=timezone.utc).isoformat(), # What: "bucket_id": str(file_obj.bucket_id), "file_id": str(file_obj.file_id), @@ -62,7 +63,7 @@ def test_record_view_event_builder(app, mock_user_ctx, record, pid): record_view_event_builder(event, app, pid, record) assert event == { # When: - "timestamp": NewDate.utcnow().isoformat(), + "timestamp": NewDate.now(tzinfo=timezone.utc).isoformat(), # What: "record_id": str(record.id), "pid_type": pid.pid_type, diff --git a/tests/helpers.py b/tests/helpers.py index 41a1939..62e9a66 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -2,6 +2,7 @@ # # This file is part of Invenio. # Copyright (C) 2018 CERN. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -23,14 +24,14 @@ def get_queue_size(queue_name): def mock_date(*date_parts): - """Mocked 'datetime.utcnow()'.""" + """Mocked 'datetime.now()'.""" class MockDate(datetime.datetime): """datetime.datetime mock.""" @classmethod - def utcnow(cls): + def now(cls, tzinfo=datetime.timezone.utc): """Override to return 'current_date'.""" - return cls(*date_parts) + return cls(*date_parts, tzinfo=tzinfo) return MockDate diff --git a/tests/test_aggregations.py b/tests/test_aggregations.py index 2cfba1f..9a0b9fa 100644 --- a/tests/test_aggregations.py +++ b/tests/test_aggregations.py @@ -2,6 +2,7 @@ # # This file is part of Invenio. # Copyright (C) 2017-2018 CERN. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -58,8 +59,8 @@ def test_get_bookmark(app, search_clear, indexed_events): stat_agg.run() current_search.flush_and_refresh(index="*") assert stat_agg.bookmark_api.get_bookmark() == datetime.datetime( - 2017, 1, 7, 11, 9, 9 # Note that the bookmark is one minute older - ) + 2017, 1, 7, 11, 9, 9, tzinfo=datetime.timezone.utc + ) # Note that the bookmark is one minute older def test_overwriting_aggregations(app, search_clear, mock_event_queue): diff --git a/tests/test_prefixing.py b/tests/test_prefixing.py index 3523f09..e5a1841 100644 --- a/tests/test_prefixing.py +++ b/tests/test_prefixing.py @@ -2,6 +2,7 @@ # # This file is part of Invenio. # Copyright (C) 2019 CERN. +# Copyright (C) 2025 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. diff --git a/tests/test_processors.py b/tests/test_processors.py index 3018f7c..0bf19a0 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -313,7 +313,7 @@ def bulk(client, generator, *args, **kwargs): event = build_file_unique_id(event) event = test_preprocessor1(event) event = test_preprocessor2(event) - event["updated_timestamp"] = "2017-06-02T12:00:00" + event["updated_timestamp"] = "2017-06-02T12:00:00+00:00" _id = hash_id("2017-01-01T00:00:00", event) expected_docs.append( { From 638eea8a23f48fcaa5a035a454ef43e7f1c6e6c0 Mon Sep 17 00:00:00 2001 From: Christoph Ladurner Date: Tue, 27 Jan 2026 21:31:52 +0100 Subject: [PATCH 2/4] chore(black): update formatting to >= 26.0 --- tests/test_events.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_events.py b/tests/test_events.py index d8931ae..b5d7286 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -7,6 +7,7 @@ # under the terms of the MIT License; see LICENSE file for more details. """Events tests.""" + from invenio_queues.proxies import current_queues from invenio_stats.proxies import current_stats From 39f9ef0e16337a87de83c317f4a67ea77bb2c3cd Mon Sep 17 00:00:00 2001 From: Christoph Ladurner Date: Tue, 27 Jan 2026 21:33:04 +0100 Subject: [PATCH 3/4] chore(setup): bump dependencies --- setup.cfg | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/setup.cfg b/setup.cfg index 0d2ecfe..426106e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,7 +3,7 @@ # This file is part of Invenio. # Copyright (C) 2017-2018 CERN. # Copyright (C) 2022 TU Wien. -# Copyright (C) 2024-2025 Graz University of Technology. +# Copyright (C) 2024-2026 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -30,7 +30,7 @@ zip_safe = False install_requires = counter-robots>=2018.6 invenio-base>=2.0.0,<3.0.0 - invenio-cache>=2.0.0,<3.0.0 + invenio-cache>=3.0.0,<4.0.0 invenio-celery>=2.0.0,<3.0.0 invenio-queues>=1.0.0a2 maxminddb-geolite2>=2018.703 @@ -40,14 +40,14 @@ install_requires = [options.extras_require] tests = - pytest-black-ng>=0.4.0 - invenio-accounts>=6.0.0,<7.0.0 - invenio-app>=2.0.0,<3.0.0 - invenio-db[postgresql]>=2.0.0,<3.0.0 - invenio-files-rest>=3.0.0,<4.0.0 - invenio-records>=3.0.0,<4.0.0 - invenio-records-ui>=2.0.0,<3.0.0 - pytest-invenio>=3.1.0,<4.0.0 + pytest-black>=0.6.0 + invenio-accounts>=7.0.0,<8.0.0 + invenio-app>=3.0.0,<4.0.0 + invenio-db[postgresql]>=2.2.0,<3.0.0 + invenio-files-rest>=4.0.0,<5.0.0 + invenio-records>=4.0.0,<5.0.0 + invenio-records-ui>=3.0.0,<4.0.0 + pytest-invenio>=4.0.0,<5.0.0 Sphinx>=5 elasticsearch7 = invenio-search[elasticsearch7]>=3.0.0,<4.0.0 From 01a0fa608d0789a6da8b81aabd86bd9aa7e8ec41 Mon Sep 17 00:00:00 2001 From: Christoph Ladurner Date: Thu, 29 Jan 2026 10:24:20 +0100 Subject: [PATCH 4/4] release: v6.0.0 --- CHANGES.rst | 11 ++++++++++- invenio_stats/__init__.py | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index dde8791..e11e8e5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ .. This file is part of Invenio. Copyright (C) 2017-2025 CERN. - Copyright (C) 2024 Graz University of Technology. + Copyright (C) 2024-2026 Graz University of Technology. Invenio is free software; you can redistribute it and/or modify it under the terms of the MIT License; see LICENSE file for more details. @@ -10,6 +10,15 @@ Changes ======= +Version v6.0.0 (released 2026-01-29) + +- chore(setup): bump dependencies +- chore(black): update formatting to >= 26.0 +- fix(chore): DeprecationWarning stdlib +- fix: DeprecationWarning warn use warning +- tests: extend support to Python 3.14 +- i18n:push translations + Version 5.1.1 (release 2025-06-09) - tests: fix issues with CI diff --git a/invenio_stats/__init__.py b/invenio_stats/__init__.py index 1d09d0c..c3e557c 100644 --- a/invenio_stats/__init__.py +++ b/invenio_stats/__init__.py @@ -3,7 +3,7 @@ # This file is part of Invenio. # Copyright (C) 2017-2024 CERN. # Copyright (C) 2022-2023 TU Wien. -# Copyright (C) 2024-2025 Graz University of Technology. +# Copyright (C) 2024-2026 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -441,7 +441,7 @@ def register_queries(): from .ext import InvenioStats from .proxies import current_stats -__version__ = "5.1.1" +__version__ = "6.0.0" __all__ = ( "__version__",