2021-01-09 15:20:56 +01:00
|
|
|
""" test feather-format compat """
|
|
|
|
from distutils.version import LooseVersion
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
import pandas.util._test_decorators as td
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
import pandas._testing as tm
|
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
from pandas.io.feather_format import read_feather, to_feather # noqa: E402 isort:skip
|
2021-01-09 15:20:56 +01:00
|
|
|
|
|
|
|
pyarrow = pytest.importorskip("pyarrow")
|
|
|
|
|
|
|
|
|
|
|
|
pyarrow_version = LooseVersion(pyarrow.__version__)
|
|
|
|
filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse")
|
|
|
|
|
|
|
|
|
|
|
|
@filter_sparse
|
|
|
|
@pytest.mark.single
|
|
|
|
class TestFeather:
|
|
|
|
def check_error_on_write(self, df, exc):
|
|
|
|
# check that we are raising the exception
|
|
|
|
# on writing
|
|
|
|
|
|
|
|
with pytest.raises(exc):
|
|
|
|
with tm.ensure_clean() as path:
|
|
|
|
to_feather(df, path)
|
|
|
|
|
|
|
|
def check_round_trip(self, df, expected=None, write_kwargs={}, **read_kwargs):
|
|
|
|
|
|
|
|
if expected is None:
|
|
|
|
expected = df
|
|
|
|
|
|
|
|
with tm.ensure_clean() as path:
|
|
|
|
to_feather(df, path, **write_kwargs)
|
|
|
|
|
|
|
|
result = read_feather(path, **read_kwargs)
|
|
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
|
|
def test_error(self):
|
|
|
|
|
|
|
|
for obj in [
|
|
|
|
pd.Series([1, 2, 3]),
|
|
|
|
1,
|
|
|
|
"foo",
|
|
|
|
pd.Timestamp("20130101"),
|
|
|
|
np.array([1, 2, 3]),
|
|
|
|
]:
|
|
|
|
self.check_error_on_write(obj, ValueError)
|
|
|
|
|
|
|
|
def test_basic(self):
|
|
|
|
|
|
|
|
df = pd.DataFrame(
|
|
|
|
{
|
|
|
|
"string": list("abc"),
|
|
|
|
"int": list(range(1, 4)),
|
|
|
|
"uint": np.arange(3, 6).astype("u1"),
|
|
|
|
"float": np.arange(4.0, 7.0, dtype="float64"),
|
|
|
|
"float_with_null": [1.0, np.nan, 3],
|
|
|
|
"bool": [True, False, True],
|
|
|
|
"bool_with_null": [True, np.nan, False],
|
|
|
|
"cat": pd.Categorical(list("abc")),
|
|
|
|
"dt": pd.DatetimeIndex(
|
|
|
|
list(pd.date_range("20130101", periods=3)), freq=None
|
|
|
|
),
|
|
|
|
"dttz": pd.DatetimeIndex(
|
|
|
|
list(pd.date_range("20130101", periods=3, tz="US/Eastern")),
|
|
|
|
freq=None,
|
|
|
|
),
|
|
|
|
"dt_with_null": [
|
|
|
|
pd.Timestamp("20130101"),
|
|
|
|
pd.NaT,
|
|
|
|
pd.Timestamp("20130103"),
|
|
|
|
],
|
|
|
|
"dtns": pd.DatetimeIndex(
|
2021-01-30 22:29:33 +01:00
|
|
|
list(pd.date_range("20130101", periods=3, freq="ns")), freq=None,
|
2021-01-09 15:20:56 +01:00
|
|
|
),
|
|
|
|
}
|
|
|
|
)
|
|
|
|
if pyarrow_version >= LooseVersion("0.16.1.dev"):
|
|
|
|
df["periods"] = pd.period_range("2013", freq="M", periods=3)
|
|
|
|
df["timedeltas"] = pd.timedelta_range("1 day", periods=3)
|
|
|
|
# TODO temporary disable due to regression in pyarrow 0.17.1
|
|
|
|
# https://github.com/pandas-dev/pandas/issues/34255
|
|
|
|
# df["intervals"] = pd.interval_range(0, 3, 3)
|
|
|
|
|
|
|
|
assert df.dttz.dtype.tz.zone == "US/Eastern"
|
|
|
|
self.check_round_trip(df)
|
|
|
|
|
|
|
|
def test_duplicate_columns(self):
|
|
|
|
|
|
|
|
# https://github.com/wesm/feather/issues/53
|
|
|
|
# not currently able to handle duplicate columns
|
|
|
|
df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
|
|
|
|
self.check_error_on_write(df, ValueError)
|
|
|
|
|
|
|
|
def test_stringify_columns(self):
|
|
|
|
|
|
|
|
df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy()
|
|
|
|
self.check_error_on_write(df, ValueError)
|
|
|
|
|
|
|
|
def test_read_columns(self):
|
|
|
|
# GH 24025
|
|
|
|
df = pd.DataFrame(
|
|
|
|
{
|
|
|
|
"col1": list("abc"),
|
|
|
|
"col2": list(range(1, 4)),
|
|
|
|
"col3": list("xyz"),
|
|
|
|
"col4": list(range(4, 7)),
|
|
|
|
}
|
|
|
|
)
|
|
|
|
columns = ["col1", "col3"]
|
|
|
|
self.check_round_trip(df, expected=df[columns], columns=columns)
|
|
|
|
|
|
|
|
@td.skip_if_no("pyarrow", min_version="0.17.1")
|
|
|
|
def read_columns_different_order(self):
|
|
|
|
# GH 33878
|
|
|
|
df = pd.DataFrame({"A": [1, 2], "B": ["x", "y"], "C": [True, False]})
|
|
|
|
self.check_round_trip(df, columns=["B", "A"])
|
|
|
|
|
|
|
|
def test_unsupported_other(self):
|
|
|
|
|
|
|
|
# mixed python objects
|
|
|
|
df = pd.DataFrame({"a": ["a", 1, 2.0]})
|
|
|
|
# Some versions raise ValueError, others raise ArrowInvalid.
|
|
|
|
self.check_error_on_write(df, Exception)
|
|
|
|
|
|
|
|
def test_rw_use_threads(self):
|
|
|
|
df = pd.DataFrame({"A": np.arange(100000)})
|
|
|
|
self.check_round_trip(df, use_threads=True)
|
|
|
|
self.check_round_trip(df, use_threads=False)
|
|
|
|
|
|
|
|
def test_write_with_index(self):
|
|
|
|
|
|
|
|
df = pd.DataFrame({"A": [1, 2, 3]})
|
|
|
|
self.check_round_trip(df)
|
|
|
|
|
|
|
|
# non-default index
|
|
|
|
for index in [
|
|
|
|
[2, 3, 4],
|
|
|
|
pd.date_range("20130101", periods=3),
|
|
|
|
list("abc"),
|
|
|
|
[1, 3, 4],
|
|
|
|
pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]),
|
|
|
|
]:
|
|
|
|
|
|
|
|
df.index = index
|
|
|
|
self.check_error_on_write(df, ValueError)
|
|
|
|
|
|
|
|
# index with meta-data
|
|
|
|
df.index = [0, 1, 2]
|
|
|
|
df.index.name = "foo"
|
|
|
|
self.check_error_on_write(df, ValueError)
|
|
|
|
|
|
|
|
# column multi-index
|
|
|
|
df.index = [0, 1, 2]
|
|
|
|
df.columns = pd.MultiIndex.from_tuples([("a", 1)])
|
|
|
|
self.check_error_on_write(df, ValueError)
|
|
|
|
|
|
|
|
def test_path_pathlib(self):
|
|
|
|
df = tm.makeDataFrame().reset_index()
|
|
|
|
result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
|
|
|
|
tm.assert_frame_equal(df, result)
|
|
|
|
|
|
|
|
def test_path_localpath(self):
|
|
|
|
df = tm.makeDataFrame().reset_index()
|
|
|
|
result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
|
|
|
|
tm.assert_frame_equal(df, result)
|
|
|
|
|
|
|
|
@td.skip_if_no("pyarrow", min_version="0.16.1.dev")
|
|
|
|
def test_passthrough_keywords(self):
|
|
|
|
df = tm.makeDataFrame().reset_index()
|
2021-01-30 22:29:33 +01:00
|
|
|
self.check_round_trip(df, write_kwargs=dict(version=1))
|
2021-01-09 15:20:56 +01:00
|
|
|
|
|
|
|
@td.skip_if_no("pyarrow")
|
|
|
|
@tm.network
|
|
|
|
def test_http_path(self, feather_file):
|
|
|
|
# GH 29055
|
|
|
|
url = (
|
|
|
|
"https://raw.githubusercontent.com/pandas-dev/pandas/master/"
|
|
|
|
"pandas/tests/io/data/feather/feather-0_3_1.feather"
|
|
|
|
)
|
|
|
|
expected = pd.read_feather(feather_file)
|
|
|
|
res = pd.read_feather(url)
|
|
|
|
tm.assert_frame_equal(expected, res)
|