Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
..
This file is part of Invenio.
Copyright (C) 2017-2025 CERN.
Copyright (C) 2024 Graz University of Technology.
Copyright (C) 2024-2026 Graz University of Technology.

Invenio is free software; you can redistribute it and/or modify it
under the terms of the MIT License; see LICENSE file for more details.
Expand All @@ -10,6 +10,15 @@
Changes
=======

Version v6.0.0 (released 2026-01-29)

- chore(setup): bump dependencies
- chore(black): update formatting to >= 26.0
- fix(chore): DeprecationWarning stdlib
- fix: DeprecationWarning warn use warning
- tests: extend support to Python 3.14
- i18n:push translations

Version 5.1.1 (release 2025-06-09)

- tests: fix issues with CI
Expand Down
6 changes: 3 additions & 3 deletions invenio_stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# This file is part of Invenio.
# Copyright (C) 2017-2024 CERN.
# Copyright (C) 2022-2023 TU Wien.
# Copyright (C) 2024 Graz University of Technology.
# Copyright (C) 2024-2026 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand Down Expand Up @@ -171,7 +171,7 @@ def register_events():
from invenio_stats.proxies import current_stats

event = {
"timestamp": datetime.datetime.utcnow().isoformat(),
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
"mydata": "somedata"
}

Expand Down Expand Up @@ -441,7 +441,7 @@ def register_queries():
from .ext import InvenioStats
from .proxies import current_stats

__version__ = "5.1.1"
__version__ = "6.0.0"

__all__ = (
"__version__",
Expand Down
20 changes: 13 additions & 7 deletions invenio_stats/aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
# This file is part of Invenio.
# Copyright (C) 2017-2019 CERN.
# Copyright (C) 2022 TU Wien.
# Copyright (C) 2025 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Aggregation classes."""

import math
from datetime import datetime
from datetime import datetime, timezone

from dateutil import parser
from dateutil.relativedelta import relativedelta
Expand Down Expand Up @@ -168,7 +169,7 @@ def _get_oldest_event_timestamp(self):
# indexed but the indices have not been refreshed yet.
if len(result) == 0:
return None
return parser.parse(result[0]["timestamp"])
return parser.parse(result[0]["timestamp"]).replace(tzinfo=timezone.utc)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More as a note to self: I checked implications with respect to using pytz.UTC.localize vs replace(tzinfo=timezone.utc) and replace() is fine for us. UTC doesn't have daylight time transitions so localize() would not really differ. The transition to storing with UTC will also fix cases of time springing forward or back causing events that are 1h apart to seem to have occurred at same time for example.


def _split_date_range(self, lower_limit, upper_limit):
"""Return dict of rounded dates in range, split by aggregation interval.
Expand Down Expand Up @@ -259,15 +260,19 @@ def agg_iter(self, dt, previous_bookmark):
"value_as_string", None
)
if last_update_aggr and previous_bookmark:
last_date = datetime.fromisoformat(last_update_aggr.rstrip("Z"))
last_date = datetime.fromisoformat(
last_update_aggr.rstrip("Z")
).replace(tzinfo=timezone.utc)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Concurs with my understanding that the previous stored date / date at this time, despite not including timezone information, was considered to be UTC already. So being explicit and using replace is fine.

if last_date < previous_bookmark:
continue

aggregation_data = {}
aggregation_data["timestamp"] = interval_date.isoformat()
aggregation_data[self.field] = aggregation["key"]
aggregation_data["count"] = aggregation["doc_count"]
aggregation_data["updated_timestamp"] = datetime.utcnow().isoformat()
aggregation_data["updated_timestamp"] = datetime.now(
timezone.utc
).isoformat()

if self.metric_fields:
for f in self.metric_fields:
Expand All @@ -293,9 +298,10 @@ def agg_iter(self, dt, previous_bookmark):
}

def _upper_limit(self, end_date):
max_ = datetime.max.replace(tzinfo=timezone.utc)
return min(
end_date or datetime.max, # ignore if `None`
datetime.utcnow(),
end_date or max_, # ignore if `None`
datetime.now(timezone.utc),
)

def run(self, start_date=None, end_date=None, update_bookmark=True):
Expand All @@ -317,7 +323,7 @@ def run(self, start_date=None, end_date=None, update_bookmark=True):
# Let's get the timestamp before we start the aggregation.
# This will be used for the next iteration. Some events might be processed twice
if not end_date:
end_date = datetime.utcnow().isoformat()
end_date = datetime.now(timezone.utc).isoformat()

results = []
for dt_key, dt in sorted(dates.items()):
Expand Down
5 changes: 3 additions & 2 deletions invenio_stats/bookmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
# This file is part of Invenio.
# Copyright (C) 2017-2019 CERN.
# Copyright (C) 2022 TU Wien.
# Copyright (C) 2025 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""BookMark used by aggregations."""

from collections import OrderedDict
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from functools import wraps

from invenio_search.engine import dsl, search
Expand Down Expand Up @@ -107,7 +108,7 @@ def get_bookmark(self, refresh_time=60):
# This means that some events might be processed twice
if refresh_time:
my_date -= timedelta(seconds=refresh_time)
return my_date
return my_date.replace(tzinfo=timezone.utc)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apriori this should be fine. But if going forward the bookmark.date is stored with UTC timezone, then this should not be necessary. (maybe part of the migration script updates the bookmark date too to be sure)


@_ensure_index_exists
def list_bookmarks(self, start_date=None, end_date=None, limit=None):
Expand Down
1 change: 1 addition & 0 deletions invenio_stats/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# This file is part of Invenio.
# Copyright (C) 2018 CERN.
# Copyright (C) 2022 TU Wien.
# Copyright (C) 2025 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand Down
5 changes: 3 additions & 2 deletions invenio_stats/contrib/event_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# This file is part of Invenio.
# Copyright (C) 2017-2018 CERN.
# Copyright (C) 2022 TU Wien.
# Copyright (C) 2025 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand All @@ -21,7 +22,7 @@ def file_download_event_builder(event, sender_app, obj=None, **kwargs):
event.update(
{
# When:
"timestamp": datetime.datetime.utcnow().isoformat(),
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A migration of the OpenSearch mapping(s) will have to be done before this code runs like you've mentioned. Document the steps in the docs-invenio-rdm release notes for v14 (that were used for demo site if I recall correctly) and that transition should be good.

# What:
"bucket_id": str(obj.bucket_id),
"file_id": str(obj.file_id),
Expand Down Expand Up @@ -52,7 +53,7 @@ def record_view_event_builder(event, sender_app, pid=None, record=None, **kwargs
event.update(
{
# When:
"timestamp": datetime.datetime.utcnow().isoformat(),
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
# What:
"record_id": str(record.id),
"pid_type": pid.pid_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"match_mapping_type": "date",
"mapping": {
"type": "date",
"format": "strict_date_hour_minute_second"
"format": "strict_date_optional_time"

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we are on the same page, this will allow: "2026-01-14" or "2026-01-14T01:13:44.123456789-04:00", whereas previous only allowed "2026-01-14T01:13:44". A migration will be needed to convert those. Then and in the future, because all documents are saved with the same timezone (UTC) we should be good when aggregating.

It is possible to set "strict_date_hour_minute_second||strict_date_optional_time" to keep both formats but aggregation will be affected for transition period and it becomes harder to reason about how dates are stored and what that will do. So "strict_date_optional_time" is fine by me.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i didn't respond on the A migration will be needed to convert those. as i understand the whole event generation and aggregation steps, we only have to update the template which is used to create the events for file download and record view so file-download-v1.json and record-view-v1.json, because strict_date_hour_minute_seconds is too restrictive, but strict_date_optional_time allowes the +00:00 timezone addition which is now provided from the rest of the system.

the aggregation instead always allowed the +00:00 because it is already on strict_date_optional_time see and see . as i understand the whole things, this means, that events created before of the last aggregation step can be processed and the new-ones with +00:00 can be processed too.

}
}
}
Expand All @@ -23,7 +23,7 @@
"properties": {
"timestamp": {
"type": "date",
"format": "strict_date_hour_minute_second"
"format": "strict_date_optional_time"
},
"bucket_id": {
"type": "keyword"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"properties": {
"timestamp": {
"type": "date",
"format": "strict_date_hour_minute_second"
"format": "strict_date_optional_time"
},
"record_id": {
"type": "keyword"
Expand Down
8 changes: 4 additions & 4 deletions invenio_stats/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
# This file is part of Invenio.
# Copyright (C) 2017-2024 CERN.
# Copyright (C) 2022 TU Wien.
# Copyright (C) 2025 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Events indexer."""

import hashlib
from datetime import datetime
from datetime import datetime, timezone
from functools import partial
from time import mktime

Expand All @@ -21,7 +22,6 @@
from invenio_search import current_search_client
from invenio_search.engine import search
from invenio_search.utils import prefix_index
from pytz import utc

from .utils import get_anonymization_salt, get_geoip

Expand Down Expand Up @@ -202,10 +202,10 @@ def actionsiter(self):
# This is to improve search engine performances.
ts = ts.replace(microsecond=0)
msg["timestamp"] = ts.isoformat()
msg["updated_timestamp"] = datetime.utcnow().isoformat()
msg["updated_timestamp"] = datetime.now(timezone.utc).isoformat()
# apply timestamp windowing in order to group events too close in time
if self.double_click_window > 0:
timestamp = mktime(utc.localize(ts).utctimetuple())
timestamp = mktime(ts.utctimetuple())
ts = ts.fromtimestamp(
timestamp // self.double_click_window * self.double_click_window
)
Expand Down
11 changes: 8 additions & 3 deletions invenio_stats/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
# This file is part of Invenio.
# Copyright (C) 2016-2018 CERN.
# Copyright (C) 2022 TU Wien.
# Copyright (C) 2025 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Celery background tasks."""

from datetime import timedelta
from datetime import timedelta, timezone

from celery import shared_task
from dateutil.parser import parse as dateutil_parse
Expand Down Expand Up @@ -50,8 +51,12 @@ def aggregate_events(
aggregations, start_date=None, end_date=None, update_bookmark=True
):
"""Aggregate indexed events."""
start_date = dateutil_parse(start_date) if start_date else None
end_date = dateutil_parse(end_date) if end_date else None
start_date = (
dateutil_parse(start_date).replace(tzinfo=timezone.utc) if start_date else None
)
end_date = (
dateutil_parse(end_date).replace(tzinfo=timezone.utc) if end_date else None
)
results = []
for aggr_name in aggregations:
aggr_cfg = current_stats.aggregations[aggr_name]
Expand Down
20 changes: 10 additions & 10 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# This file is part of Invenio.
# Copyright (C) 2017-2018 CERN.
# Copyright (C) 2022 TU Wien.
# Copyright (C) 2024-2025 Graz University of Technology.
# Copyright (C) 2024-2026 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand All @@ -30,7 +30,7 @@ zip_safe = False
install_requires =
counter-robots>=2018.6
invenio-base>=2.0.0,<3.0.0
invenio-cache>=2.0.0,<3.0.0
invenio-cache>=3.0.0,<4.0.0
invenio-celery>=2.0.0,<3.0.0
invenio-queues>=1.0.0a2
maxminddb-geolite2>=2018.703
Expand All @@ -40,14 +40,14 @@ install_requires =

[options.extras_require]
tests =
pytest-black-ng>=0.4.0
invenio-accounts>=6.0.0,<7.0.0
invenio-app>=2.0.0,<3.0.0
invenio-db[postgresql]>=2.0.0,<3.0.0
invenio-files-rest>=3.0.0,<4.0.0
invenio-records>=3.0.0,<4.0.0
invenio-records-ui>=2.0.0,<3.0.0
pytest-invenio>=3.1.0,<4.0.0
pytest-black>=0.6.0
invenio-accounts>=7.0.0,<8.0.0
invenio-app>=3.0.0,<4.0.0
invenio-db[postgresql]>=2.2.0,<3.0.0
invenio-files-rest>=4.0.0,<5.0.0
invenio-records>=4.0.0,<5.0.0
invenio-records-ui>=3.0.0,<4.0.0
pytest-invenio>=4.0.0,<5.0.0
Sphinx>=5
elasticsearch7 =
invenio-search[elasticsearch7]>=3.0.0,<4.0.0
Expand Down
12 changes: 6 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,19 +332,19 @@ def request_headers():
def mock_datetime():
"""Mock datetime.datetime.

Use set_utcnow to set the current utcnow time.
Use set_now to set the current now time.
"""

class NewDate(datetime.datetime):
_utcnow = (2017, 1, 1)
_now = (2017, 1, 1)

@classmethod
def set_utcnow(cls, value):
cls._utcnow = value
def set_now(cls, value):
cls._now = value

@classmethod
def utcnow(cls):
return cls(*cls._utcnow)
def now(cls, tzinfo):
return cls(*cls._now, tzinfo=tzinfo)

yield NewDate

Expand Down
13 changes: 7 additions & 6 deletions tests/contrib/test_event_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
#
# This file is part of Invenio.
# Copyright (C) 2017-2018 CERN.
# Copyright (C) 2025 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Test event builders."""

import datetime
from datetime import datetime, timezone
from unittest.mock import patch

from invenio_stats.contrib.event_builders import (
Expand All @@ -18,10 +19,10 @@
from invenio_stats.utils import get_user


class NewDate(datetime.datetime):
class NewDate(datetime):

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like to use https://time-machine.readthedocs.io/en/latest/ for these kinds of tests. (it's fine as-is, just mentioning if not aware)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, i try to avoid third party as much as possible and do what ever is possible with the stdlib.

@classmethod
def utcnow(cls):
return cls(2017, 1, 1)
def now(cls, tzinfo):
return cls(2017, 1, 1, tzinfo=tzinfo)


headers = {
Expand All @@ -42,7 +43,7 @@ def test_file_download_event_builder(app, mock_user_ctx, sequential_ids, objects
file_download_event_builder(event, app, file_obj)
assert event == {
# When:
"timestamp": NewDate.utcnow().isoformat(),
"timestamp": NewDate.now(tzinfo=timezone.utc).isoformat(),
# What:
"bucket_id": str(file_obj.bucket_id),
"file_id": str(file_obj.file_id),
Expand All @@ -62,7 +63,7 @@ def test_record_view_event_builder(app, mock_user_ctx, record, pid):
record_view_event_builder(event, app, pid, record)
assert event == {
# When:
"timestamp": NewDate.utcnow().isoformat(),
"timestamp": NewDate.now(tzinfo=timezone.utc).isoformat(),
# What:
"record_id": str(record.id),
"pid_type": pid.pid_type,
Expand Down
Loading
Loading