Skip to content

Commit 98fafb2

Browse files
committed
fix: Include index coordinates in to_dataframe when name differs from dimension (Fixes #10851)
1 parent 3c6b050 commit 98fafb2

File tree

2 files changed

+68
-5
lines changed

2 files changed

+68
-5
lines changed

xarray/core/dataset.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7202,11 +7202,23 @@ def to_pandas(self) -> pd.Series | pd.DataFrame:
72027202

72037203
def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
72047204
from xarray.core.extension_array import PandasExtensionArray
7205-
7206-
# All and only non-index arrays (whether data or coordinates) should
7207-
# become columns in the output DataFrame. Excluding indexes rather
7208-
# than dims handles the case of a MultiIndex along a single dimension.
7209-
columns_in_order = [k for k in self.variables if k not in self.xindexes]
7205+
from xarray.core.indexes import PandasIndex, PandasMultiIndex
7206+
7207+
# All non-index variables become columns. For indexes, I excluded:
7208+
# 1. PandasMultiIndex components (A, B from a MultiIndex)
7209+
# 2. PandasIndex where name matches dim (e.g., 'x' indexing dim 'x')
7210+
# 3. Any index whose name matches a dimension
7211+
# This allows PandasIndex coords created via set_xindex with a different
7212+
# name (e.g., 'pf' indexing dim 'pos') to be included as columns.
7213+
indexes_to_exclude = set()
7214+
for name, idx in self.xindexes.items():
7215+
if (
7216+
isinstance(idx, PandasMultiIndex)
7217+
or (isinstance(idx, PandasIndex) and name == idx.dim)
7218+
or name in self.dims
7219+
):
7220+
indexes_to_exclude.add(name)
7221+
columns_in_order = [k for k in self.variables if k not in indexes_to_exclude]
72107222
non_extension_array_columns = [
72117223
k
72127224
for k in columns_in_order

xarray/tests/test_issue_10851.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
"""Test for issue #10851: Dataset Index not included in to_dataframe when name differs from dimension."""
2+
import numpy as np
3+
import pandas as pd
4+
5+
import xarray as xr
6+
7+
8+
class TestToDataFrameIndexColumn:
9+
"""Tests for to_dataframe including index coordinates with different names."""
10+
11+
def test_to_dataframe_includes_index_with_different_name(self):
12+
"""Index coordinates with name different from dimension should be in columns."""
13+
ds_temp = xr.Dataset(
14+
data_vars=dict(temp=(["time", "pos"], np.array([[5, 10, 15, 20, 25]]))),
15+
coords=dict(
16+
pf=("pos", [1.0, 2.0, 4.2, 8.0, 10.0]),
17+
time=("time", [pd.to_datetime("2025-01-01")]),
18+
),
19+
).set_xindex("pf")
20+
21+
df = ds_temp.to_dataframe()
22+
23+
assert "pf" in df.columns
24+
assert "temp" in df.columns
25+
np.testing.assert_array_equal(df["pf"].values, [1.0, 2.0, 4.2, 8.0, 10.0])
26+
27+
def test_to_dataframe_still_excludes_matching_dim_index(self):
28+
"""Index coordinates where name matches dimension should not be in columns."""
29+
ds = xr.Dataset(
30+
data_vars=dict(temp=(["x"], [1, 2, 3])),
31+
coords=dict(x=("x", [10, 20, 30])),
32+
)
33+
34+
df = ds.to_dataframe()
35+
36+
assert "temp" in df.columns
37+
assert "x" not in df.columns
38+
39+
def test_to_dataframe_roundtrip_with_set_xindex(self):
40+
"""Dataset with set_xindex should roundtrip to DataFrame correctly."""
41+
ds = xr.Dataset(
42+
data_vars=dict(val=(["dim"], [100, 200, 300])),
43+
coords=dict(coord_idx=("dim", ["a", "b", "c"])),
44+
).set_xindex("coord_idx")
45+
46+
df = ds.to_dataframe()
47+
48+
assert "coord_idx" in df.columns
49+
assert "val" in df.columns
50+
assert list(df["coord_idx"]) == ["a", "b", "c"]
51+
assert list(df["val"]) == [100, 200, 300]

0 commit comments

Comments
 (0)