Skip to content

Commit 82c9851

Browse files
GH1654 Pandas 3.0 support (#1741)
* GH1654 Pandas 3.0 support * GH1654 PR Feedback * GH1654 Improve testing
1 parent 4f6f533 commit 82c9851

4 files changed

Lines changed: 134 additions & 12 deletions

File tree

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,62 @@
1-
from pandas._typing import (
2-
Dtype,
3-
Scalar,
4-
npt,
5-
)
1+
from typing import overload
2+
3+
import numpy as np
4+
5+
from pandas._typing import Scalar
66

77
from pandas.core.dtypes.base import ExtensionDtype
88
from pandas.core.dtypes.dtypes import (
99
register_extension_dtype as register_extension_dtype,
1010
)
1111

1212
class SparseDtype(ExtensionDtype):
13+
@overload
14+
def __init__(
15+
self,
16+
dtype: type[bool | np.bool_],
17+
fill_value: bool | None = None,
18+
) -> None: ...
19+
@overload
20+
def __init__(
21+
self,
22+
dtype: type[int | np.integer],
23+
fill_value: int | None = None,
24+
) -> None: ...
25+
@overload
26+
def __init__(
27+
self,
28+
dtype: type[float | np.floating],
29+
fill_value: float | None = None,
30+
) -> None: ...
31+
@overload
1332
def __init__(
1433
self,
15-
dtype: Dtype | npt.DTypeLike = ...,
34+
dtype: type[complex | np.complexfloating],
35+
fill_value: complex | None = None,
36+
) -> None: ...
37+
@overload
38+
def __init__(
39+
self,
40+
dtype: type[np.datetime64],
41+
fill_value: np.datetime64 | None = None,
42+
) -> None: ...
43+
@overload
44+
def __init__(
45+
self,
46+
dtype: type[np.timedelta64],
47+
fill_value: np.timedelta64 | None = None,
48+
) -> None: ...
49+
@overload
50+
def __init__(
51+
self,
52+
dtype: type[str | bytes] | str | np.dtype[np.generic] | ExtensionDtype = ...,
1653
fill_value: Scalar | None = None,
1754
) -> None: ...
1855
@property
56+
def subtype(
57+
self,
58+
) -> (
59+
np.dtype
60+
): ... # TODO: pandas-dev/pandas-stubs#1654 make the class Generic so we can embed the subtype more precisely
61+
@property
1962
def fill_value(self) -> Scalar | None: ...

pandas-stubs/core/series.pyi

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4206,6 +4206,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
42064206
def item(self) -> S1: ...
42074207
def kurt(
42084208
self,
4209+
*,
42094210
axis: AxisIndex | None = 0,
42104211
skipna: _bool = True,
42114212
numeric_only: _bool = False,
@@ -4232,8 +4233,9 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
42324233
fill_value: float | None = None,
42334234
axis: AxisIndex = ...,
42344235
) -> Series[_bool]: ...
4235-
def max(
4236+
def max( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] # ty: ignore[invalid-method-override] # pyrefly: ignore[bad-override]
42364237
self,
4238+
*,
42374239
axis: AxisIndex | None = 0,
42384240
skipna: _bool = True,
42394241
level: None = None,
@@ -4243,6 +4245,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
42434245
@overload
42444246
def mean(
42454247
self: Series[Never],
4248+
*,
42464249
axis: AxisIndex | None = ...,
42474250
skipna: _bool = ...,
42484251
level: None = None,
@@ -4252,6 +4255,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
42524255
@overload
42534256
def mean(
42544257
self: Series[Timestamp],
4258+
*,
42554259
axis: AxisIndex | None = ...,
42564260
skipna: _bool = ...,
42574261
level: None = None,
@@ -4261,6 +4265,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
42614265
@overload
42624266
def mean(
42634267
self: SupportsGetItem[Scalar, SupportsTruedivInt[S2]],
4268+
*,
42644269
axis: AxisIndex | None = 0,
42654270
skipna: _bool = True,
42664271
level: None = None,
@@ -4270,6 +4275,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
42704275
@overload
42714276
def median(
42724277
self: Series[Never],
4278+
*,
42734279
axis: AxisIndex | None = 0,
42744280
skipna: _bool = True,
42754281
level: None = None,
@@ -4279,6 +4285,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
42794285
@overload
42804286
def median(
42814287
self: Series[complex],
4288+
*,
42824289
axis: AxisIndex | None = 0,
42834290
skipna: _bool = True,
42844291
level: None = None,
@@ -4288,6 +4295,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
42884295
@overload
42894296
def median(
42904297
self: SupportsGetItem[Scalar, SupportsTruedivInt[S2]],
4298+
*,
42914299
axis: AxisIndex | None = 0,
42924300
skipna: _bool = True,
42934301
level: None = None,
@@ -4297,14 +4305,16 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
42974305
@overload
42984306
def median(
42994307
self: Series[Timestamp],
4308+
*,
43004309
axis: AxisIndex | None = 0,
43014310
skipna: _bool = True,
43024311
level: None = None,
43034312
numeric_only: _bool = False,
43044313
**kwargs: Any,
43054314
) -> Timestamp: ...
4306-
def min(
4315+
def min( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] # ty: ignore[invalid-method-override] # pyrefly: ignore[bad-override]
43074316
self,
4317+
*,
43084318
axis: AxisIndex | None = 0,
43094319
skipna: _bool = True,
43104320
level: None = None,
@@ -4336,6 +4346,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
43364346
) -> Series[S1]: ...
43374347
def prod(
43384348
self,
4349+
*,
43394350
axis: AxisIndex | None = 0,
43404351
skipna: _bool | None = True,
43414352
numeric_only: _bool = False,
@@ -4399,6 +4410,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
43994410
) -> Series[S1]: ...
44004411
def sem(
44014412
self,
4413+
*,
44024414
axis: AxisIndex | None = 0,
44034415
skipna: _bool | None = True,
44044416
ddof: int = 1,
@@ -4407,6 +4419,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
44074419
) -> Scalar: ...
44084420
def skew(
44094421
self,
4422+
*,
44104423
axis: AxisIndex | None = 0,
44114424
skipna: _bool | None = True,
44124425
numeric_only: _bool = False,
@@ -4415,6 +4428,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
44154428
@overload
44164429
def std(
44174430
self: Series[Never],
4431+
*,
44184432
axis: AxisIndex | None = 0,
44194433
skipna: _bool | None = True,
44204434
ddof: int = 1,
@@ -4424,6 +4438,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
44244438
@overload
44254439
def std(
44264440
self: Series[complex],
4441+
*,
44274442
axis: AxisIndex | None = 0,
44284443
skipna: _bool | None = True,
44294444
level: None = None,
@@ -4434,6 +4449,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
44344449
@overload
44354450
def std(
44364451
self: Series[Timestamp],
4452+
*,
44374453
axis: AxisIndex | None = 0,
44384454
skipna: _bool | None = True,
44394455
level: None = None,
@@ -4444,6 +4460,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
44444460
@overload
44454461
def std(
44464462
self: SupportsGetItem[Scalar, SupportsTruedivInt[S2]],
4463+
*,
44474464
axis: AxisIndex | None = 0,
44484465
skipna: _bool | None = True,
44494466
ddof: int = 1,
@@ -4452,6 +4469,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
44524469
) -> S2: ...
44534470
def sum(
44544471
self: SupportsGetItem[Scalar, _SupportsAdd[_T]],
4472+
*,
44554473
axis: AxisIndex | None = 0,
44564474
skipna: _bool | None = ...,
44574475
numeric_only: _bool = ...,
@@ -4623,6 +4641,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
46234641
@overload
46244642
def var(
46254643
self: Series[Never],
4644+
*,
46264645
axis: AxisIndex | None = 0,
46274646
skipna: _bool | None = True,
46284647
ddof: int = 1,
@@ -4632,6 +4651,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
46324651
@overload
46334652
def var(
46344653
self: Series[Timedelta] | Series[Timestamp],
4654+
*,
46354655
axis: AxisIndex | None = 0,
46364656
skipna: _bool | None = True,
46374657
ddof: int = 1,
@@ -4641,6 +4661,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
46414661
@overload
46424662
def var(
46434663
self: Series[complex],
4664+
*,
46444665
axis: AxisIndex | None = 0,
46454666
skipna: _bool | None = True,
46464667
ddof: int = 1,
@@ -4650,6 +4671,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
46504671
@overload
46514672
def var(
46524673
self: SupportsGetItem[Scalar, SupportsTruedivInt[S2]],
4674+
*,
46534675
axis: AxisIndex | None = 0,
46544676
skipna: _bool | None = True,
46554677
ddof: int = 1,

tests/series/test_series.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,18 @@ def test_types_any() -> None:
167167
check(assert_type(pd.Series([False, False]).any(bool_only=False), np.bool), np.bool)
168168
check(assert_type(pd.Series([np.nan]).any(skipna=False), np.bool), np.bool)
169169

170+
if TYPE_CHECKING_INVALID_USAGE:
171+
pd.Series([False, True]).any(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
172+
170173

171174
def test_types_all() -> None:
172175
check(assert_type(pd.Series([False, False]).all(), np.bool), np.bool)
173176
check(assert_type(pd.Series([False, False]).all(bool_only=False), np.bool), np.bool)
174177
check(assert_type(pd.Series([np.nan]).all(skipna=False), np.bool), np.bool)
175178

179+
if TYPE_CHECKING_INVALID_USAGE:
180+
pd.Series([False, True]).all(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
181+
176182

177183
def test_types_csv(tmp_path: Path) -> None:
178184
s = pd.Series(data=[1, 2, 3])
@@ -475,6 +481,9 @@ def test_types_median() -> None:
475481
check(assert_type(s.median(skipna=False), float), float)
476482
check(assert_type(s.median(numeric_only=False), float), float)
477483

484+
if TYPE_CHECKING_INVALID_USAGE:
485+
s.median(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
486+
478487

479488
def test_types_sum() -> None:
480489
s = pd.Series([1, 2, 3, np.nan])
@@ -533,13 +542,23 @@ def test_types_min() -> None:
533542
)
534543
check(assert_type(s.min(skipna=False), float), np.floating)
535544

545+
if TYPE_CHECKING_INVALID_USAGE:
546+
s.min(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
547+
536548

537549
def test_types_max() -> None:
538550
s = pd.Series([1, 2, 3, np.nan])
539-
s.max()
540-
s.max(axis=0)
541-
s.groupby(level=0).max()
542-
s.max(skipna=False)
551+
check(assert_type(s.max(), float), np.floating)
552+
check(assert_type(s.max(axis=0), float), np.floating)
553+
check(
554+
assert_type(s.groupby(level=0).max(), "pd.Series[float]"),
555+
pd.Series,
556+
np.floating,
557+
)
558+
check(assert_type(s.max(skipna=False), float), np.floating)
559+
560+
if TYPE_CHECKING_INVALID_USAGE:
561+
s.max(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
543562

544563

545564
def test_types_groupby_level() -> None:
@@ -942,6 +961,15 @@ def test_types_groupby_methods() -> None:
942961
np.integer,
943962
)
944963

964+
if TYPE_CHECKING_INVALID_USAGE:
965+
s.sum(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
966+
s.prod(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
967+
s.std(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
968+
s.var(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
969+
s.sem(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
970+
s.skew(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
971+
s.kurt(0) # type: ignore[misc] # pyright: ignore[reportCallIssue]
972+
945973

946974
def test_groupby_result() -> None:
947975
# GH 142

tests/test_dtypes.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,35 @@ def test_sparse_dtype() -> None:
150150
check(assert_type(s_dt.fill_value, Scalar | None), int)
151151

152152

153+
def test_sparse_dtype_fill_value_subtype_compatibility() -> None:
154+
# int subtype: default fill_value is 0
155+
s_dt_int = pd.SparseDtype(int)
156+
check(assert_type(s_dt_int.subtype, np.dtype), np.dtypes.Int64DType)
157+
check(assert_type(s_dt_int.fill_value, Scalar | None), int)
158+
159+
# float subtype: default fill_value is np.nan
160+
s_dt_float = pd.SparseDtype(float)
161+
check(assert_type(s_dt_float.subtype, np.dtype), np.dtypes.Float64DType)
162+
check(assert_type(s_dt_float.fill_value, Scalar | None), float)
163+
164+
# bool subtype: default fill_value is False
165+
s_dt_bool = pd.SparseDtype(bool)
166+
check(assert_type(s_dt_bool.subtype, np.dtype), np.dtypes.BoolDType)
167+
check(assert_type(s_dt_bool.fill_value, Scalar | None), bool)
168+
169+
# datetime64 subtype: default fill_value is NaT
170+
s_dt_dt = pd.SparseDtype(np.datetime64)
171+
check(assert_type(s_dt_dt.subtype, np.dtype), np.dtypes.DateTime64DType)
172+
check(assert_type(s_dt_dt.fill_value, Scalar | None), np.datetime64)
173+
174+
# passing a fill_value incompatible with the subtype is both a static type error
175+
# and a runtime ValueError
176+
if TYPE_CHECKING_INVALID_USAGE:
177+
pd.SparseDtype(
178+
int, fill_value="hello" # type: ignore[arg-type] # pyright: ignore[reportCallIssue, reportArgumentType]
179+
)
180+
181+
153182
@pytest.mark.parametrize("storage", ["python", "pyarrow", None])
154183
@pytest.mark.parametrize("na_value", [pd.NA, float("nan")])
155184
def test_string_dtype(

0 commit comments

Comments
 (0)