Skip to content

Commit

Permalink
Merge branch 'main' into ref-plotting-logargs
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Nov 12, 2023
2 parents ddd69df + b2d9ec1 commit c61e86b
Show file tree
Hide file tree
Showing 28 changed files with 267 additions and 103 deletions.
8 changes: 4 additions & 4 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def setup(self, unique, sort, dtype):
"float": pd.Index(np.random.randn(N), dtype="float64"),
"object_str": string_index,
"object": pd.Index(np.arange(N), dtype="object"),
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
"datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
"datetime64[ns, tz]": pd.date_range(
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
"2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
),
"Int64": pd.array(np.arange(N), dtype="Int64"),
"boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
Expand Down Expand Up @@ -93,9 +93,9 @@ def setup(self, unique, keep, dtype):
"uint": pd.Index(np.arange(N), dtype="uint64"),
"float": pd.Index(np.random.randn(N), dtype="float64"),
"string": tm.makeStringIndex(N),
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
"datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
"datetime64[ns, tz]": pd.date_range(
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
"2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
),
"timestamp[ms][pyarrow]": pd.Index(
np.arange(N), dtype=pd.ArrowDtype(pa.timestamp("ms"))
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ class BinaryOpsMultiIndex:
param_names = ["func"]

def setup(self, func):
array = date_range("20200101 00:00", "20200102 0:00", freq="S")
array = date_range("20200101 00:00", "20200102 0:00", freq="s")
level_0_names = [str(i) for i in range(30)]

index = pd.MultiIndex.from_product([level_0_names, array])
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,9 +439,9 @@ def setup(self, inplace, dtype):
N, M = 10000, 100
if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"):
data = {
"datetime64[ns]": date_range("2011-01-01", freq="H", periods=N),
"datetime64[ns]": date_range("2011-01-01", freq="h", periods=N),
"datetime64[ns, tz]": date_range(
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
"2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
),
"timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"),
}
Expand Down Expand Up @@ -649,7 +649,7 @@ def time_series_nunique_nan(self):
class Duplicated:
def setup(self):
n = 1 << 20
t = date_range("2015-01-01", freq="S", periods=(n // 64))
t = date_range("2015-01-01", freq="s", periods=(n // 64))
xs = np.random.randn(n // 64).round(2)
self.df = DataFrame(
{
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/gil.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def run(dti):
def time_datetime_to_period(self):
@test_parallel(num_threads=2)
def run(dti):
dti.to_period("S")
dti.to_period("s")

run(self.dti)

Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def time_series_nth(self, dtype):

class DateAttributes:
def setup(self):
rng = date_range("1/1/2000", "12/31/2005", freq="H")
rng = date_range("1/1/2000", "12/31/2005", freq="h")
self.year, self.month, self.day = rng.year, rng.month, rng.day
self.ts = Series(np.random.randn(len(rng)), index=rng)

Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def setup(self, index):
N = 100000
indexes = {
"int": Index(np.arange(N), dtype=np.int64),
"datetime": date_range("2011-01-01", freq="S", periods=N),
"datetime": date_range("2011-01-01", freq="s", periods=N),
}
index = indexes[index]
self.s = Series(np.random.rand(N), index=index)
Expand Down Expand Up @@ -465,7 +465,7 @@ def time_loc_row(self, unique_cols):
class AssignTimeseriesIndex:
def setup(self):
N = 100000
idx = date_range("1/1/2000", periods=N, freq="H")
idx = date_range("1/1/2000", periods=N, freq="h")
self.df = DataFrame(np.random.randn(N, 1), columns=["A"], index=idx)

def time_frame_assign_timeseries_index(self):
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def time_unique_date_strings(self, cache, count):

class ToDatetimeISO8601:
def setup(self):
rng = date_range(start="1/1/2000", periods=20000, freq="H")
rng = date_range(start="1/1/2000", periods=20000, freq="h")
self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist()
self.strings_tz_space = [
Expand Down Expand Up @@ -276,7 +276,7 @@ def time_dup_string_tzoffset_dates(self, cache):
# GH 43901
class ToDatetimeInferDatetimeFormat:
def setup(self):
rng = date_range(start="1/1/2000", periods=100000, freq="H")
rng = date_range(start="1/1/2000", periods=100000, freq="h")
self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()

def time_infer_datetime_format(self):
Expand Down
12 changes: 6 additions & 6 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class ToCSVDatetimeIndex(BaseIO):
fname = "__test__.csv"

def setup(self):
rng = date_range("2000", periods=100_000, freq="S")
rng = date_range("2000", periods=100_000, freq="s")
self.data = DataFrame({"a": 1}, index=rng)

def time_frame_date_formatting_index(self):
Expand All @@ -102,15 +102,15 @@ def time_frame_date_no_format_index(self):
class ToCSVPeriod(BaseIO):
fname = "__test__.csv"

params = ([1000, 10000], ["D", "H"])
params = ([1000, 10000], ["D", "h"])
param_names = ["nobs", "freq"]

def setup(self, nobs, freq):
rng = period_range(start="2000-01-01", periods=nobs, freq=freq)
self.data = DataFrame(rng)
if freq == "D":
self.default_fmt = "%Y-%m-%d"
elif freq == "H":
elif freq == "h":
self.default_fmt = "%Y-%m-%d %H:00"

def time_frame_period_formatting_default(self, nobs, freq):
Expand All @@ -130,15 +130,15 @@ def time_frame_period_formatting(self, nobs, freq):
class ToCSVPeriodIndex(BaseIO):
fname = "__test__.csv"

params = ([1000, 10000], ["D", "H"])
params = ([1000, 10000], ["D", "h"])
param_names = ["nobs", "freq"]

def setup(self, nobs, freq):
rng = period_range(start="2000-01-01", periods=nobs, freq=freq)
self.data = DataFrame({"a": 1}, index=rng)
if freq == "D":
self.default_fmt = "%Y-%m-%d"
elif freq == "H":
elif freq == "h":
self.default_fmt = "%Y-%m-%d %H:00"

def time_frame_period_formatting_index(self, nobs, freq):
Expand Down Expand Up @@ -253,7 +253,7 @@ class ReadCSVConcatDatetime(StringIORewind):
iso8601 = "%Y-%m-%d %H:%M:%S"

def setup(self):
rng = date_range("1/1/2000", periods=50000, freq="S")
rng = date_range("1/1/2000", periods=50000, freq="s")
self.StringIO_input = StringIO("\n".join(rng.strftime(self.iso8601).tolist()))

def time_read_csv(self):
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _generate_dataframe():
df = DataFrame(
np.random.randn(N, C),
columns=[f"float{i}" for i in range(C)],
index=date_range("20000101", periods=N, freq="H"),
index=date_range("20000101", periods=N, freq="h"),
)
df["object"] = tm.makeStringIndex(N)
return df
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def setup(self, format):
self.df = DataFrame(
np.random.randn(N, C),
columns=[f"float{i}" for i in range(C)],
index=date_range("20000101", periods=N, freq="H"),
index=date_range("20000101", periods=N, freq="h"),
)
self.df["object"] = tm.makeStringIndex(N)
self.df.to_hdf(self.fname, "df", format=format)
Expand Down
10 changes: 5 additions & 5 deletions asv_bench/benchmarks/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def setup(self, orient, index):
N = 100000
indexes = {
"int": np.arange(N),
"datetime": date_range("20000101", periods=N, freq="H"),
"datetime": date_range("20000101", periods=N, freq="h"),
}
df = DataFrame(
np.random.randn(N, 5),
Expand All @@ -48,7 +48,7 @@ def setup(self, index):
N = 100000
indexes = {
"int": np.arange(N),
"datetime": date_range("20000101", periods=N, freq="H"),
"datetime": date_range("20000101", periods=N, freq="h"),
}
df = DataFrame(
np.random.randn(N, 5),
Expand Down Expand Up @@ -108,7 +108,7 @@ class ToJSON(BaseIO):
def setup(self, orient, frame):
N = 10**5
ncols = 5
index = date_range("20000101", periods=N, freq="H")
index = date_range("20000101", periods=N, freq="h")
timedeltas = timedelta_range(start=1, periods=N, freq="s")
datetimes = date_range(start=1, periods=N, freq="s")
ints = np.random.randint(100000000, size=N)
Expand Down Expand Up @@ -191,7 +191,7 @@ class ToJSONISO(BaseIO):

def setup(self, orient):
N = 10**5
index = date_range("20000101", periods=N, freq="H")
index = date_range("20000101", periods=N, freq="h")
timedeltas = timedelta_range(start=1, periods=N, freq="s")
datetimes = date_range(start=1, periods=N, freq="s")
self.df = DataFrame(
Expand All @@ -214,7 +214,7 @@ class ToJSONLines(BaseIO):
def setup(self):
N = 10**5
ncols = 5
index = date_range("20000101", periods=N, freq="H")
index = date_range("20000101", periods=N, freq="h")
timedeltas = timedelta_range(start=1, periods=N, freq="s")
datetimes = date_range(start=1, periods=N, freq="s")
ints = np.random.randint(100000000, size=N)
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def setup(self):
self.df = DataFrame(
np.random.randn(N, C),
columns=[f"float{i}" for i in range(C)],
index=date_range("20000101", periods=N, freq="H"),
index=date_range("20000101", periods=N, freq="h"),
)
self.df["object"] = tm.makeStringIndex(N)
self.df.to_pickle(self.fname)
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def setup(self, convert_dates):
self.df = DataFrame(
np.random.randn(N, C),
columns=[f"float{i}" for i in range(C)],
index=date_range("20000101", periods=N, freq="H"),
index=date_range("20000101", periods=N, freq="h"),
)
self.df["object"] = tm.makeStringIndex(self.N)
self.df["int8_"] = np.random.randint(
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ class JoinNonUnique:
# GH 6329
def setup(self):
date_index = date_range("01-Jan-2013", "23-Jan-2013", freq="min")
daily_dates = date_index.to_period("D").to_timestamp("S", "S")
daily_dates = date_index.to_period("D").to_timestamp("s", "s")
self.fracofday = date_index.values - daily_dates.values
self.fracofday = self.fracofday.astype("timedelta64[ns]")
self.fracofday = self.fracofday.astype(np.float64) / 86_400_000_000_000
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def time_from_ints_daily(self, freq, is_offset):

class DataFramePeriodColumn:
def setup(self):
self.rng = period_range(start="1/1/1990", freq="S", periods=20000)
self.rng = period_range(start="1/1/1990", freq="s", periods=20000)
self.df = DataFrame(index=range(len(self.rng)))

def time_setitem_period_column(self):
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def setup(self, dtype):
N = 10**6
data = {
"int": np.random.randint(1, 10, N),
"datetime": date_range("2000-01-01", freq="S", periods=N),
"datetime": date_range("2000-01-01", freq="s", periods=N),
}
self.s = Series(data[dtype])
if dtype == "datetime":
Expand Down Expand Up @@ -92,7 +92,7 @@ class Fillna:
def setup(self, dtype):
N = 10**6
if dtype == "datetime64[ns]":
data = date_range("2000-01-01", freq="S", periods=N)
data = date_range("2000-01-01", freq="s", periods=N)
na_value = NaT
elif dtype in ("float64", "Float64"):
data = np.random.randn(N)
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/strftime.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def time_frame_datetime_formatting_custom(self, nobs):

class PeriodStrftime:
timeout = 1500
params = ([1000, 10000], ["D", "H"])
params = ([1000, 10000], ["D", "h"])
param_names = ["nobs", "freq"]

def setup(self, nobs, freq):
Expand All @@ -67,7 +67,7 @@ def setup(self, nobs, freq):
self.data.set_index("i", inplace=True)
if freq == "D":
self.default_fmt = "%Y-%m-%d"
elif freq == "H":
elif freq == "h":
self.default_fmt = "%Y-%m-%d %H:00"

def time_frame_period_to_str(self, nobs, freq):
Expand Down
12 changes: 6 additions & 6 deletions asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def setup(self, index_type):
N = 100000
dtidxes = {
"dst": date_range(
start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S"
start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="s"
),
"repeated": date_range(start="2000", periods=N / 10, freq="s").repeat(10),
"tz_aware": date_range(start="2000", periods=N, freq="s", tz="US/Eastern"),
Expand Down Expand Up @@ -72,13 +72,13 @@ class TzLocalize:

def setup(self, tz):
dst_rng = date_range(
start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S"
start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="s"
)
self.index = date_range(start="10/29/2000", end="10/29/2000 00:59:59", freq="S")
self.index = date_range(start="10/29/2000", end="10/29/2000 00:59:59", freq="s")
self.index = self.index.append(dst_rng)
self.index = self.index.append(dst_rng)
self.index = self.index.append(
date_range(start="10/29/2000 2:00:00", end="10/29/2000 3:00:00", freq="S")
date_range(start="10/29/2000 2:00:00", end="10/29/2000 3:00:00", freq="s")
)

def time_infer_dst(self, tz):
Expand All @@ -90,7 +90,7 @@ class ResetIndex:
param_names = "tz"

def setup(self, tz):
idx = date_range(start="1/1/2000", periods=1000, freq="H", tz=tz)
idx = date_range(start="1/1/2000", periods=1000, freq="h", tz=tz)
self.df = DataFrame(np.random.randn(1000, 2), index=idx)

def time_reset_datetimeindex(self, tz):
Expand Down Expand Up @@ -255,7 +255,7 @@ def time_get_slice(self, monotonic):
class Lookup:
def setup(self):
N = 1500000
rng = date_range(start="1/1/2000", periods=N, freq="S")
rng = date_range(start="1/1/2000", periods=N, freq="s")
self.ts = Series(1, index=rng)
self.lookup_val = rng[N // 2]

Expand Down
9 changes: 8 additions & 1 deletion pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import pandas_dtype
from pandas.core.dtypes.inference import is_integer

import pandas as pd
Expand Down Expand Up @@ -203,7 +204,13 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
# Ignore non-existent columns from dtype mapping
# like other parsers do
if isinstance(self.dtype, dict):
self.dtype = {k: v for k, v in self.dtype.items() if k in frame.columns}
self.dtype = {
k: pandas_dtype(v)
for k, v in self.dtype.items()
if k in frame.columns
}
else:
self.dtype = pandas_dtype(self.dtype)
try:
frame = frame.astype(self.dtype)
except TypeError as e:
Expand Down
Loading

0 comments on commit c61e86b

Please sign in to comment.