-
Notifications
You must be signed in to change notification settings - Fork 29
wip: utcnow #163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
wip: utcnow #163
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,14 +3,15 @@ | |
| # This file is part of Invenio. | ||
| # Copyright (C) 2017-2019 CERN. | ||
| # Copyright (C) 2022 TU Wien. | ||
| # Copyright (C) 2025 Graz University of Technology. | ||
| # | ||
| # Invenio is free software; you can redistribute it and/or modify it | ||
| # under the terms of the MIT License; see LICENSE file for more details. | ||
|
|
||
| """Aggregation classes.""" | ||
|
|
||
| import math | ||
| from datetime import datetime | ||
| from datetime import datetime, timezone | ||
|
|
||
| from dateutil import parser | ||
| from dateutil.relativedelta import relativedelta | ||
|
|
@@ -168,7 +169,7 @@ def _get_oldest_event_timestamp(self): | |
| # indexed but the indices have not been refreshed yet. | ||
| if len(result) == 0: | ||
| return None | ||
| return parser.parse(result[0]["timestamp"]) | ||
| return parser.parse(result[0]["timestamp"]).replace(tzinfo=timezone.utc) | ||
|
|
||
| def _split_date_range(self, lower_limit, upper_limit): | ||
| """Return dict of rounded dates in range, split by aggregation interval. | ||
|
|
@@ -259,15 +260,19 @@ def agg_iter(self, dt, previous_bookmark): | |
| "value_as_string", None | ||
| ) | ||
| if last_update_aggr and previous_bookmark: | ||
| last_date = datetime.fromisoformat(last_update_aggr.rstrip("Z")) | ||
| last_date = datetime.fromisoformat( | ||
| last_update_aggr.rstrip("Z") | ||
| ).replace(tzinfo=timezone.utc) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 Concurs with my understanding that the previous stored date / date at this time, despite not including timezone information, was considered to be UTC already. So being explicit and using |
||
| if last_date < previous_bookmark: | ||
| continue | ||
|
|
||
| aggregation_data = {} | ||
| aggregation_data["timestamp"] = interval_date.isoformat() | ||
| aggregation_data[self.field] = aggregation["key"] | ||
| aggregation_data["count"] = aggregation["doc_count"] | ||
| aggregation_data["updated_timestamp"] = datetime.utcnow().isoformat() | ||
| aggregation_data["updated_timestamp"] = datetime.now( | ||
| timezone.utc | ||
| ).isoformat() | ||
|
|
||
| if self.metric_fields: | ||
| for f in self.metric_fields: | ||
|
|
@@ -293,9 +298,10 @@ def agg_iter(self, dt, previous_bookmark): | |
| } | ||
|
|
||
| def _upper_limit(self, end_date): | ||
| max_ = datetime.max.replace(tzinfo=timezone.utc) | ||
| return min( | ||
| end_date or datetime.max, # ignore if `None` | ||
| datetime.utcnow(), | ||
| end_date or max_, # ignore if `None` | ||
| datetime.now(timezone.utc), | ||
| ) | ||
|
|
||
| def run(self, start_date=None, end_date=None, update_bookmark=True): | ||
|
|
@@ -317,7 +323,7 @@ def run(self, start_date=None, end_date=None, update_bookmark=True): | |
| # Let's get the timestamp before we start the aggregation. | ||
| # This will be used for the next iteration. Some events might be processed twice | ||
| if not end_date: | ||
| end_date = datetime.utcnow().isoformat() | ||
| end_date = datetime.now(timezone.utc).isoformat() | ||
|
|
||
| results = [] | ||
| for dt_key, dt in sorted(dates.items()): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,13 +3,14 @@ | |
| # This file is part of Invenio. | ||
| # Copyright (C) 2017-2019 CERN. | ||
| # Copyright (C) 2022 TU Wien. | ||
| # Copyright (C) 2025 Graz University of Technology. | ||
| # | ||
| # Invenio is free software; you can redistribute it and/or modify it | ||
| # under the terms of the MIT License; see LICENSE file for more details. | ||
| """BookMark used by aggregations.""" | ||
|
|
||
| from collections import OrderedDict | ||
| from datetime import datetime, timedelta | ||
| from datetime import datetime, timedelta, timezone | ||
| from functools import wraps | ||
|
|
||
| from invenio_search.engine import dsl, search | ||
|
|
@@ -107,7 +108,7 @@ def get_bookmark(self, refresh_time=60): | |
| # This means that some events might be processed twice | ||
| if refresh_time: | ||
| my_date -= timedelta(seconds=refresh_time) | ||
| return my_date | ||
| return my_date.replace(tzinfo=timezone.utc) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Apriori this should be fine. But if going forward the bookmark.date is stored with UTC timezone, then this should not be necessary. (maybe part of the migration script updates the bookmark date too to be sure) |
||
|
|
||
| @_ensure_index_exists | ||
| def list_bookmarks(self, start_date=None, end_date=None, limit=None): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| # This file is part of Invenio. | ||
| # Copyright (C) 2017-2018 CERN. | ||
| # Copyright (C) 2022 TU Wien. | ||
| # Copyright (C) 2025 Graz University of Technology. | ||
| # | ||
| # Invenio is free software; you can redistribute it and/or modify it | ||
| # under the terms of the MIT License; see LICENSE file for more details. | ||
|
|
@@ -21,7 +22,7 @@ def file_download_event_builder(event, sender_app, obj=None, **kwargs): | |
| event.update( | ||
| { | ||
| # When: | ||
| "timestamp": datetime.datetime.utcnow().isoformat(), | ||
| "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A migration of the OpenSearch mapping(s) will have to be done before this code runs like you've mentioned. Document the steps in the docs-invenio-rdm release notes for v14 (that were used for demo site if I recall correctly) and that transition should be good. |
||
| # What: | ||
| "bucket_id": str(obj.bucket_id), | ||
| "file_id": str(obj.file_id), | ||
|
|
@@ -52,7 +53,7 @@ def record_view_event_builder(event, sender_app, pid=None, record=None, **kwargs | |
| event.update( | ||
| { | ||
| # When: | ||
| "timestamp": datetime.datetime.utcnow().isoformat(), | ||
| "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(), | ||
| # What: | ||
| "record_id": str(record.id), | ||
| "pid_type": pid.pid_type, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,7 @@ | |
| "match_mapping_type": "date", | ||
| "mapping": { | ||
| "type": "date", | ||
| "format": "strict_date_hour_minute_second" | ||
| "format": "strict_date_optional_time" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So we are on the same page, this will allow: It is possible to set
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i didn't respond on the the aggregation instead always allowed the |
||
| } | ||
| } | ||
| } | ||
|
|
@@ -23,7 +23,7 @@ | |
| "properties": { | ||
| "timestamp": { | ||
| "type": "date", | ||
| "format": "strict_date_hour_minute_second" | ||
| "format": "strict_date_optional_time" | ||
| }, | ||
| "bucket_id": { | ||
| "type": "keyword" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,13 +2,14 @@ | |
| # | ||
| # This file is part of Invenio. | ||
| # Copyright (C) 2017-2018 CERN. | ||
| # Copyright (C) 2025 Graz University of Technology. | ||
| # | ||
| # Invenio is free software; you can redistribute it and/or modify it | ||
| # under the terms of the MIT License; see LICENSE file for more details. | ||
|
|
||
| """Test event builders.""" | ||
|
|
||
| import datetime | ||
| from datetime import datetime, timezone | ||
| from unittest.mock import patch | ||
|
|
||
| from invenio_stats.contrib.event_builders import ( | ||
|
|
@@ -18,10 +19,10 @@ | |
| from invenio_stats.utils import get_user | ||
|
|
||
|
|
||
| class NewDate(datetime.datetime): | ||
| class NewDate(datetime): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like to use https://time-machine.readthedocs.io/en/latest/ for these kinds of tests. (it's fine as-is, just mentioning if not aware)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, i try to avoid third party as much as possible and do what ever is possible with the stdlib. |
||
| @classmethod | ||
| def utcnow(cls): | ||
| return cls(2017, 1, 1) | ||
| def now(cls, tzinfo): | ||
| return cls(2017, 1, 1, tzinfo=tzinfo) | ||
|
|
||
|
|
||
| headers = { | ||
|
|
@@ -42,7 +43,7 @@ def test_file_download_event_builder(app, mock_user_ctx, sequential_ids, objects | |
| file_download_event_builder(event, app, file_obj) | ||
| assert event == { | ||
| # When: | ||
| "timestamp": NewDate.utcnow().isoformat(), | ||
| "timestamp": NewDate.now(tzinfo=timezone.utc).isoformat(), | ||
| # What: | ||
| "bucket_id": str(file_obj.bucket_id), | ||
| "file_id": str(file_obj.file_id), | ||
|
|
@@ -62,7 +63,7 @@ def test_record_view_event_builder(app, mock_user_ctx, record, pid): | |
| record_view_event_builder(event, app, pid, record) | ||
| assert event == { | ||
| # When: | ||
| "timestamp": NewDate.utcnow().isoformat(), | ||
| "timestamp": NewDate.now(tzinfo=timezone.utc).isoformat(), | ||
| # What: | ||
| "record_id": str(record.id), | ||
| "pid_type": pid.pid_type, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More as a note to self: I checked implications with respect to using
pytz.UTC.localizevsreplace(tzinfo=timezone.utc)andreplace()is fine for us. UTC doesn't have daylight time transitions solocalize()would not really differ. The transition to storing with UTC will also fix cases of time springing forward or back causing events that are 1h apart to seem to have occurred at same time for example.