mirror of
https://github.com/rio-labs/rio.git
synced 2026-05-02 08:59:27 -05:00
table data ingestion unit tests & fixes
This commit is contained in:
@@ -31,6 +31,9 @@ class Plot(FundamentalComponent):
|
||||
a plot using the library of your choice and pass it to the `Plot` component
|
||||
to display it in your app.
|
||||
|
||||
Plots created with `plotly` will be interactive when displayed in Rio. We
|
||||
recommend using it over the other options.
|
||||
|
||||
## Attributes
|
||||
|
||||
`figure`: The plot figure to display.
|
||||
@@ -43,7 +46,8 @@ class Plot(FundamentalComponent):
|
||||
|
||||
## Examples
|
||||
|
||||
Here's a minimal example using a `plotly` plot:
|
||||
Here's a minimal example using a `plotly` plot. Using `plotly` is
|
||||
recommended, because the resulting plots are interactive.
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
@@ -74,9 +78,10 @@ class Plot(FundamentalComponent):
|
||||
)
|
||||
```
|
||||
|
||||
Here's an example using a `matplotlib` plot:
|
||||
Matplotlib plots are also supported:
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
@@ -103,9 +108,10 @@ class Plot(FundamentalComponent):
|
||||
)
|
||||
```
|
||||
|
||||
Here's an example using a `seaborn` plot:
|
||||
As well as `seaborn` plots:
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
|
||||
|
||||
+16
-7
@@ -217,9 +217,9 @@ def _indices_to_rectangle(
|
||||
def _data_to_columnar(
|
||||
data: pandas.DataFrame
|
||||
| polars.DataFrame
|
||||
| numpy.ndarray
|
||||
| t.Mapping[str, t.Iterable[TableValue]]
|
||||
| t.Iterable[t.Iterable[TableValue]]
|
||||
| numpy.ndarray,
|
||||
| t.Iterable[t.Iterable[TableValue]],
|
||||
) -> tuple[
|
||||
list[str] | None,
|
||||
list[list[TableValue]],
|
||||
@@ -237,15 +237,24 @@ def _data_to_columnar(
|
||||
# DataFrame
|
||||
#
|
||||
# Use narwhals to abstract away the dataframe provider
|
||||
if isinstance(data, maybes.PANDAS_DATAFRAME_TYPES) or isinstance(
|
||||
data, maybes.POLARS_DATAFRAME_TYPES
|
||||
):
|
||||
if isinstance(data, maybes.DATAFRAME_TYPES):
|
||||
nw_data = nw.from_native(data)
|
||||
headers = nw_data.columns
|
||||
|
||||
for column_name in headers:
|
||||
columns.append(nw_data[column_name].to_list())
|
||||
|
||||
# NumPy array
|
||||
#
|
||||
# These are neatly orgnanized, just need to get the contents as columns
|
||||
elif isinstance(data, maybes.NUMPY_ARRAY_TYPES):
|
||||
print("HERE")
|
||||
if data.ndim != 2:
|
||||
raise ValueError("Table data must be two-dimensional")
|
||||
|
||||
for ii in range(data.shape[1]):
|
||||
columns.append(data[:, ii].tolist())
|
||||
|
||||
# Mapping
|
||||
#
|
||||
# The headers are trivially available. The columns can also be used as-is,
|
||||
@@ -280,7 +289,7 @@ def _data_to_columnar(
|
||||
raise ValueError("All table rows must have the same length")
|
||||
|
||||
# Black magic to transpose the data
|
||||
data = list(map(list, zip(*data)))
|
||||
columns = list(map(list, zip(*columns)))
|
||||
|
||||
# Done
|
||||
return headers, columns
|
||||
@@ -360,7 +369,7 @@ class Table(FundamentalComponent): #
|
||||
show_row_numbers: bool = True
|
||||
|
||||
# All headers, if present
|
||||
_headers: list[str] | None = None
|
||||
_headers: list[str] | None = field(default=None, init=False)
|
||||
|
||||
# The data, as a list of columns ("column major"). This is set in
|
||||
# `__post_init__`.
|
||||
|
||||
+7
-7
@@ -13,14 +13,13 @@ import sys
|
||||
import typing as t
|
||||
|
||||
import introspection
|
||||
import narwhals.typing as nwt
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import matplotlib.axes # type: ignore
|
||||
import matplotlib.figure # type: ignore
|
||||
import numpy # type: ignore
|
||||
import pandas # type: ignore
|
||||
import plotly.graph_objects # type: ignore
|
||||
import polars # type: ignore
|
||||
|
||||
_IS_INITIALIZED = False
|
||||
|
||||
@@ -34,8 +33,7 @@ STR_TYPES = ()
|
||||
|
||||
NUMPY_ARRAY_TYPES: tuple[type[numpy.ndarray], ...] = ()
|
||||
|
||||
PANDAS_DATAFRAME_TYPES: tuple[type[pandas.DataFrame], ...] = ()
|
||||
POLARS_DATAFRAME_TYPES: tuple[type[polars.DataFrame], ...] = ()
|
||||
DATAFRAME_TYPES: tuple[type[nwt.IntoDataFrame], ...] = ()
|
||||
|
||||
PLOTLY_GRAPH_TYPES: tuple[type[plotly.graph_objects.Figure], ...] = ()
|
||||
MATPLOTLIB_GRAPH_TYPES: tuple[type, ...] = ()
|
||||
@@ -59,7 +57,7 @@ def initialize(force: bool = False) -> None:
|
||||
global _IS_INITIALIZED
|
||||
global FLOAT_TYPES, INT_TYPES, BOOL_TYPES, STR_TYPES
|
||||
global NUMPY_ARRAY_TYPES
|
||||
global PANDAS_DATAFRAME_TYPES, POLARS_DATAFRAME_TYPES
|
||||
global DATAFRAME_TYPES
|
||||
global PLOTLY_GRAPH_TYPES, MATPLOTLIB_GRAPH_TYPES, MATPLOTLIB_AXES_TYPES
|
||||
|
||||
# Already initialized?
|
||||
@@ -73,6 +71,8 @@ def initialize(force: bool = False) -> None:
|
||||
BOOL_TYPES = (bool,)
|
||||
STR_TYPES = (str,)
|
||||
|
||||
DATAFRAME_TYPES = ()
|
||||
|
||||
# Is numpy available and loaded?
|
||||
if "numpy" in sys.modules:
|
||||
import numpy # type: ignore
|
||||
@@ -92,12 +92,12 @@ def initialize(force: bool = False) -> None:
|
||||
if "pandas" in sys.modules:
|
||||
import pandas # type: ignore
|
||||
|
||||
PANDAS_DATAFRAME_TYPES = (pandas.DataFrame,)
|
||||
DATAFRAME_TYPES += (pandas.DataFrame,)
|
||||
|
||||
if "polars" in sys.modules:
|
||||
import polars # type: ignore
|
||||
|
||||
POLARS_DATAFRAME_TYPES = (polars.DataFrame,)
|
||||
DATAFRAME_TYPES += (polars.DataFrame,)
|
||||
|
||||
if "plotly" in sys.modules:
|
||||
import plotly.graph_objects # type: ignore
|
||||
|
||||
@@ -0,0 +1,222 @@
|
||||
"""
|
||||
Tests that different types of data formats accepted by tables are correctly
|
||||
turned into the internal column format.
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import polars as pl
|
||||
import pytest
|
||||
|
||||
import rio
|
||||
import rio.maybes
|
||||
|
||||
# Tables work with a lot of optionally supported modules. Make sure Rio is aware
|
||||
# of which ones are available.
|
||||
rio.maybes.initialize()
|
||||
|
||||
|
||||
def gen_valid_data() -> dict[str, t.Iterable[t.Any]]:
|
||||
"""
|
||||
Generates a dictionary of valid data for use in a `rio.Table`. The columns
|
||||
are intentionally of different types, some of which can only be used once.
|
||||
"""
|
||||
|
||||
return {
|
||||
"Text": ["A", "B", "C", "D", "E"],
|
||||
"Number List": [1, 2, 3, 4, 5],
|
||||
"Number Tuple": (1, 2, 3, 4, 5),
|
||||
"Number Generator": (i for i in range(1, 6)),
|
||||
}
|
||||
|
||||
|
||||
def gen_too_short_column_data() -> dict[str, t.Iterable[t.Any]]:
|
||||
"""
|
||||
Same as the function above, but one column is intentionally shorter than the
|
||||
others.
|
||||
"""
|
||||
data = gen_valid_data()
|
||||
data["Short Column"] = [1, 2, 3, 4]
|
||||
return data
|
||||
|
||||
|
||||
def gen_too_long_column_data() -> dict[str, t.Iterable[t.Any]]:
|
||||
"""
|
||||
Same as the function above, but one column is intentionally longer than the
|
||||
others.
|
||||
"""
|
||||
data = gen_valid_data()
|
||||
data["Long Column"] = [1, 2, 3, 4, 5, 6]
|
||||
return data
|
||||
|
||||
|
||||
def into_all_formats(
|
||||
datagen: t.Callable[
|
||||
[],
|
||||
dict[str, t.Iterable[t.Any]],
|
||||
],
|
||||
) -> t.Iterable[tuple[t.Any, bool]]:
|
||||
"""
|
||||
Given a data generator, return the same data in all input formats supported
|
||||
by `rio.Table`:
|
||||
|
||||
- pandas DataFrame
|
||||
- polars DataFrame
|
||||
- numpy array
|
||||
- mapping of iterables
|
||||
- iterable of iterables
|
||||
|
||||
Each result is a tuple of the data and a boolean indicating whether the
|
||||
resulting table should have headers.
|
||||
"""
|
||||
# Pandas DataFrame
|
||||
yield pd.DataFrame(datagen()), True
|
||||
|
||||
# Polars DataFrame
|
||||
yield pl.DataFrame(datagen()), True
|
||||
|
||||
# NumPy array
|
||||
#
|
||||
# Numpy is problematic, because it cannot store different types of data in
|
||||
# the same array. It implicitly converts everything to strings, which makes
|
||||
# the later correctness checks fail.
|
||||
#
|
||||
# yield (
|
||||
# np.column_stack(
|
||||
# [list(column) for column in datagen().values()],
|
||||
# ),
|
||||
# False,
|
||||
# )
|
||||
|
||||
# Mapping of iterables
|
||||
yield datagen(), True
|
||||
|
||||
# Iterable of iterables
|
||||
as_df = pl.DataFrame(datagen())
|
||||
rows: list[list[t.Any]] = []
|
||||
|
||||
for row in as_df.rows():
|
||||
rows.append(list(row))
|
||||
|
||||
yield rows, False
|
||||
|
||||
|
||||
def assert_columns_match_data(
|
||||
datagen: t.Callable[
|
||||
[],
|
||||
dict[str, t.Iterable[t.Any]],
|
||||
],
|
||||
should_have_headers: bool,
|
||||
headers_are: list[str] | None,
|
||||
columns_are: list[list[t.Any]],
|
||||
) -> None:
|
||||
"""
|
||||
Asserts that columns in the standardized format used by `rio.Table` match
|
||||
what would be expected from the data generator.
|
||||
"""
|
||||
data = datagen()
|
||||
|
||||
# Do the headers match?
|
||||
if should_have_headers:
|
||||
assert headers_are is not None
|
||||
assert headers_are == list(data.keys())
|
||||
else:
|
||||
assert headers_are is None
|
||||
|
||||
# Correct number of columns?
|
||||
assert len(columns_are) == len(data)
|
||||
|
||||
# Column values
|
||||
for ii, (column_name, column_should) in enumerate(data.items()):
|
||||
column_should = list(column_should)
|
||||
column_is = columns_are[ii]
|
||||
|
||||
assert column_is == column_should
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, should_have_headers",
|
||||
into_all_formats(gen_valid_data),
|
||||
)
|
||||
def test_valid_data(data: t.Any, should_have_headers: bool) -> None:
|
||||
"""
|
||||
Tests that valid data is correctly columnized.
|
||||
"""
|
||||
headers_are, columns_are = rio.components.table._data_to_columnar(data)
|
||||
|
||||
assert_columns_match_data(
|
||||
datagen=gen_valid_data,
|
||||
should_have_headers=should_have_headers,
|
||||
headers_are=headers_are,
|
||||
columns_are=columns_are,
|
||||
)
|
||||
|
||||
|
||||
def test_short_column() -> None:
|
||||
"""
|
||||
Tests that data with a column that is too short fails as expected.
|
||||
"""
|
||||
data = gen_too_short_column_data()
|
||||
|
||||
# The only format supporting too short data is a mapping. All other formats
|
||||
# would immediately raise an error, before even being able to pass the data
|
||||
# to a table.
|
||||
#
|
||||
# -> No need for fancy formats, just pass in the dict directly.
|
||||
with pytest.raises(ValueError):
|
||||
rio.components.table._data_to_columnar(data)
|
||||
|
||||
|
||||
def test_long_column() -> None:
|
||||
"""
|
||||
Tests that data with a column that is too long fails as expected.
|
||||
"""
|
||||
data = gen_too_long_column_data()
|
||||
|
||||
# The only format supporting too short data is a mapping. All other formats
|
||||
# would immediately raise an error, before even being able to pass the data
|
||||
# to a table.
|
||||
#
|
||||
# -> No need for fancy formats, just pass in the dict
|
||||
with pytest.raises(ValueError):
|
||||
rio.components.table._data_to_columnar(data)
|
||||
|
||||
|
||||
def test_1d_array() -> None:
|
||||
"""
|
||||
Creating a table from a 1D array should fail, as tables are inherently
|
||||
two-dimensional.
|
||||
"""
|
||||
with pytest.raises(ValueError):
|
||||
rio.components.table._data_to_columnar(np.array([1, 2, 3]))
|
||||
|
||||
|
||||
def test_2d_array() -> None:
|
||||
"""
|
||||
Creating a table from a 2D array should work.
|
||||
"""
|
||||
headers, columns = rio.components.table._data_to_columnar(
|
||||
np.array([[1, 2, 3], [4, 5, 6]]),
|
||||
)
|
||||
|
||||
assert_columns_match_data(
|
||||
datagen=lambda: {
|
||||
"0": [1, 4],
|
||||
"1": [2, 5],
|
||||
"2": [3, 6],
|
||||
},
|
||||
should_have_headers=False,
|
||||
headers_are=headers,
|
||||
columns_are=columns,
|
||||
)
|
||||
|
||||
|
||||
def test_3d_array() -> None:
|
||||
"""
|
||||
Creating a table from a 3D array should fail, as tables are inherently
|
||||
two-dimensional.
|
||||
"""
|
||||
with pytest.raises(ValueError):
|
||||
rio.components.table._data_to_columnar(np.array([[[1, 2], [3, 4]]]))
|
||||
Reference in New Issue
Block a user