Skip to content

Commit 82ea64b

Browse files
committed
Consolidate SQLAlchemy connection pool option handling
Pool configuration parameters are now passed directly within the database 'options' block and parsed by get_engine() rather than store_db_parameters(). This streamlines internal processing and formalizes connection pool settings within the pygeoapi configuration schema. Updated tests reflect this shift in responsibility.
1 parent 9838aa9 commit 82ea64b

4 files changed

Lines changed: 76 additions & 123 deletions

File tree

pygeoapi/process/manager/postgresql.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ def __init__(self, manager_def: dict):
9595
self.db_user,
9696
self._db_password,
9797
self.db_conn,
98-
self.db_pool_options,
9998
**self.db_options
10099
)
101100
self.table_output = self.output_dir is None
@@ -340,4 +339,4 @@ def get_table_model(
340339

341340
Base.metadata.create_all(engine, tables=[Jobs], checkfirst=True)
342341

343-
return Jobs
342+
return Jobs

pygeoapi/provider/sql.py

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ def __init__(
134134
self.db_user,
135135
self._db_password,
136136
self.db_conn,
137-
self.db_pool_options,
138137
**self.db_options
139138
)
140139
self.table_model = get_table_model(
@@ -616,31 +615,32 @@ def store_db_parameters(
616615
connection_data.get('search_path') or
617616
options.pop('search_path', ['public'])
618617
)
619-
# Connection-pool tuning. These are popped out of ``options`` so they
620-
# are NOT passed to the DBAPI as connect_args, and are coerced to a
621-
# hashable form so get_engine() can stay functools.cache()-able.
622-
# Defaults keep SQLAlchemy's QueuePool sizing but, unlike SQLAlchemy's
623-
# default of -1, recycle connections after an hour so that pooled
624-
# connections cannot sit IDLE on the server indefinitely.
625-
pool_defaults = {
626-
'pool_size': 5,
627-
'max_overflow': 10,
628-
'pool_recycle': -1, # SQLAlchemy default; preserves current behaviour
629-
'pool_timeout': 30,
630-
'pool_pre_ping': True,
631-
}
632-
self.db_pool_options = tuple(sorted(
633-
(key, type(default)(options.pop(key, default)))
634-
for key, default in pool_defaults.items()
635-
))
636-
618+
# Connection-pool tuning keys (pool_size, max_overflow, pool_recycle,
619+
# pool_timeout, pool_pre_ping) are intentionally left in ``options`` and
620+
# flow through ``db_options`` to get_engine(), which separates them from
621+
# the DBAPI connect_args. Their types are validated by the config JSON
622+
# Schema, so no coercion is performed here.
637623
self.db_options = {
638624
k: v
639625
for k, v in options.items()
640626
if not isinstance(v, dict)
641627
}
642628

643629

630+
#: Connection-pool tuning keys recognised by get_engine(). These configure
631+
#: SQLAlchemy's QueuePool rather than the DBAPI, so get_engine() separates
632+
#: them from connect_args. The defaults reproduce pygeoapi's previous
633+
#: behaviour exactly: SQLAlchemy's own QueuePool defaults, except for
634+
#: pool_pre_ping, which was previously hardcoded to True.
635+
POOL_OPTION_DEFAULTS = {
636+
'pool_size': 5,
637+
'max_overflow': 10,
638+
'pool_recycle': -1, # SQLAlchemy default; never recycles connections
639+
'pool_timeout': 30,
640+
'pool_pre_ping': True,
641+
}
642+
643+
644644
@functools.cache
645645
def get_engine(
646646
driver_name: str,
@@ -650,7 +650,6 @@ def get_engine(
650650
user: str,
651651
password: str,
652652
conn_str: Optional[str] = None,
653-
pool_options: tuple[tuple[str, Any], ...] = (),
654653
**connect_args
655654
) -> Engine:
656655
"""
@@ -663,12 +662,11 @@ def get_engine(
663662
:param user: database user
664663
:param password: database password
665664
:param conn_str: optional connection URL
666-
:param pool_options: hashable tuple of (key, value) pairs controlling
667-
the connection pool (pool_size, max_overflow,
668-
pool_recycle, pool_timeout, pool_pre_ping). Passed
669-
as a tuple rather than a dict so this function can
670-
remain functools.cache()-able.
671-
:param connect_args: custom connection arguments to pass to create_engine()
665+
:param connect_args: keyword arguments forwarded from the provider's
666+
``options`` block. Connection-pool tuning keys (see
667+
POOL_OPTION_DEFAULTS) are extracted and applied to
668+
the engine's pool; any remaining keys are passed to
669+
the DBAPI as connect_args.
672670
673671
:returns: SQL Alchemy engine
674672
"""
@@ -682,15 +680,26 @@ def get_engine(
682680
database=database
683681
)
684682

683+
# Separate connection-pool tuning from DBAPI connect args. Pool keys are
684+
# applied to create_engine() directly; everything left in connect_args is
685+
# forwarded to the DBAPI. get_engine() stays functools.cache()-able
686+
# because connect_args values are hashable scalars, so engine sharing per
687+
# process is preserved; providers with differing pool config (or any
688+
# other option) correctly get distinct engines.
689+
pool_options = {
690+
key: connect_args.pop(key, default)
691+
for key, default in POOL_OPTION_DEFAULTS.items()
692+
}
693+
685694
engine = create_engine(
686695
conn_str,
687696
connect_args=connect_args,
688-
**dict(pool_options)
697+
**pool_options
689698
)
690699

691700
LOGGER.debug(
692701
f'Created engine for {repr(engine.url)} '
693-
f'with pool options {dict(pool_options)}.'
702+
f'with pool options {pool_options}.'
694703
)
695704

696705
return engine
@@ -927,4 +936,4 @@ def get(self, identifier, crs_transform_spec=None, **kwargs):
927936
else feature_id
928937
)
929938

930-
return feature
939+
return feature

pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -734,8 +734,23 @@ definitions:
734734
type: integer
735735
keepalives_interval:
736736
type: integer
737+
pool_size:
738+
type: integer
739+
description: persistent connections held open per worker process
740+
max_overflow:
741+
type: integer
742+
description: extra short-lived connections allowed above pool_size
743+
pool_recycle:
744+
type: integer
745+
description: recreate connections older than this many seconds (-1 never recycles)
746+
pool_timeout:
747+
type: integer
748+
description: seconds to wait for a connection from the pool before erroring
749+
pool_pre_ping:
750+
type: boolean
751+
description: test connections with a lightweight ping before use
737752
required:
738753
- server
739754
- logging
740755
- metadata
741-
- resources
756+
- resources
Lines changed: 21 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,29 @@
11
# =================================================================
2-
# Tests for configurable SQLAlchemy connection-pool options on the
3-
# SQL provider. These exercise store_db_parameters() directly and do
4-
# not require a live database, so they run in standard CI.
2+
# Test that get_engine() separates SQLAlchemy connection-pool tuning
3+
# options from DBAPI connect_args. This is the contract introduced by
4+
# the configurable-pool change; it needs no live database.
55
# =================================================================
66

7-
import pytest
7+
from unittest import mock
88

9-
from pygeoapi.provider.sql import store_db_parameters
9+
from pygeoapi.provider import sql
1010

1111

12-
class _Dummy:
13-
"""Minimal stand-in for a provider/manager instance."""
14-
default_port = 5432
15-
16-
17-
CONN = {'host': 'h', 'dbname': 'd', 'user': 'u', 'password': 'p'}
18-
19-
20-
def test_pool_options_defaults_preserve_current_behaviour():
21-
obj = _Dummy()
22-
store_db_parameters(obj, dict(CONN), {})
23-
pool = dict(obj.db_pool_options)
24-
# Defaults must match pre-existing effective behaviour:
25-
# pool_pre_ping was hardcoded True; pool_recycle was unset (-1).
26-
assert pool['pool_size'] == 5
27-
assert pool['max_overflow'] == 10
28-
assert pool['pool_timeout'] == 30
29-
assert pool['pool_pre_ping'] is True
30-
assert pool['pool_recycle'] == -1
31-
32-
33-
def test_pool_options_are_overridable_and_typed():
34-
obj = _Dummy()
35-
store_db_parameters(
36-
obj, dict(CONN),
37-
{'pool_size': 2, 'max_overflow': 3, 'pool_recycle': 300},
12+
@mock.patch.object(sql, 'create_engine')
13+
def test_get_engine_separates_pool_options_from_connect_args(mock_create):
14+
sql.get_engine.cache_clear()
15+
sql.get_engine(
16+
'postgresql+psycopg2', 'h', 5432, 'd', 'u', 'p', None,
17+
pool_size=2, pool_recycle=300, connect_timeout=10,
3818
)
39-
pool = dict(obj.db_pool_options)
40-
assert pool['pool_size'] == 2 and isinstance(pool['pool_size'], int)
41-
assert pool['max_overflow'] == 3
42-
assert pool['pool_recycle'] == 300
43-
# untouched keys keep defaults
44-
assert pool['pool_timeout'] == 30
45-
assert pool['pool_pre_ping'] is True
46-
47-
48-
def test_pool_options_not_leaked_to_dbapi_connect_args():
49-
obj = _Dummy()
50-
store_db_parameters(
51-
obj, dict(CONN),
52-
{'connect_timeout': 10, 'pool_size': 2, 'pool_recycle': 300},
53-
)
54-
for k in ('pool_size', 'max_overflow', 'pool_recycle',
55-
'pool_timeout', 'pool_pre_ping'):
56-
assert k not in obj.db_options
57-
# genuine DBAPI connect args still pass through
58-
assert obj.db_options['connect_timeout'] == 10
59-
60-
61-
def test_dict_valued_options_still_filtered():
62-
obj = _Dummy()
63-
store_db_parameters(
64-
obj, dict(CONN),
65-
{'pool_size': 2, 'zoom': {'min': 0, 'max': 22}},
66-
)
67-
assert 'zoom' not in obj.db_options
68-
assert dict(obj.db_pool_options)['pool_size'] == 2
69-
70-
71-
def test_pool_options_hashable_and_deterministic():
72-
a, b = _Dummy(), _Dummy()
73-
store_db_parameters(a, dict(CONN), {'pool_size': 2})
74-
store_db_parameters(b, dict(CONN), {'pool_size': 2})
75-
# identical config -> identical key -> shared engine via functools.cache
76-
assert a.db_pool_options == b.db_pool_options
77-
assert hash(a.db_pool_options) == hash(b.db_pool_options)
78-
79-
c = _Dummy()
80-
store_db_parameters(c, dict(CONN), {'pool_size': 9})
81-
# differing pool config -> distinct key (separate engine, by design)
82-
assert c.db_pool_options != a.db_pool_options
83-
84-
85-
def test_pool_options_coexist_with_search_path():
86-
obj = _Dummy()
87-
store_db_parameters(
88-
obj, dict(CONN),
89-
{'search_path': ['published', 'public'], 'pool_size': 4},
90-
)
91-
assert obj.db_search_path == ('published', 'public')
92-
assert dict(obj.db_pool_options)['pool_size'] == 4
93-
9419

95-
@pytest.mark.parametrize('bad', [{'pool_size': 'two'}])
96-
def test_non_integer_pool_value_raises(bad):
97-
# type coercion surfaces bad config loudly rather than silently
98-
with pytest.raises(ValueError):
99-
store_db_parameters(_Dummy(), dict(CONN), bad)
20+
_, kwargs = mock_create.call_args
21+
# pool keys are applied to the engine (QueuePool), with overrides
22+
# honoured and unset pool keys falling back to the documented defaults
23+
assert kwargs['pool_size'] == 2
24+
assert kwargs['pool_recycle'] == 300
25+
assert kwargs['max_overflow'] == 10
26+
assert kwargs['pool_timeout'] == 30
27+
assert kwargs['pool_pre_ping'] is True
28+
# genuine DBAPI args are forwarded via connect_args; pool keys are not
29+
assert kwargs['connect_args'] == {'connect_timeout': 10}

0 commit comments

Comments
 (0)