gpt4 book ai didi

python - 如何将 statsmodels.tsa.seasonal.seasonal_decompose 与 Pandas 数据框一起使用

转载 作者:行者123 更新时间:2023-12-04 03:52:06 25 4
gpt4 key购买 nike

from statsmodels.tsa.seasonal import seasonal_decompose
def seasonal_decomp(df, model="additive"):
seasonal_df = None
seasonal_df = seasonal_decompose(df, model='additive')
return seasonal_df

seasonal_decomp(df)
错误
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-93-00543113a58a> in <module>
----> 1 seasonal_decompose(df, model='additive')

e:\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
197 else:
198 kwargs[new_arg_name] = new_arg_value
--> 199 return func(*args, **kwargs)
200
201 return cast(F, wrapper)

e:\Anaconda3\lib\site-packages\statsmodels\tsa\seasonal.py in seasonal_decompose(x, model, filt, period, two_sided, extrapolate_trend)
185 for s, name in zip((seasonal, trend, resid, x),
186 ('seasonal', 'trend', 'resid', None)):
--> 187 results.append(pw.wrap(s.squeeze(), columns=name))
188 return DecomposeResult(seasonal=results[0], trend=results[1],
189 resid=results[2], observed=results[3])

e:\Anaconda3\lib\site-packages\statsmodels\tools\validation\validation.py in wrap(self, obj, columns, append, trim_start, trim_end)
216 new.append(append if c is None else str(c) + '_' + append)
217 columns = new
--> 218 return pd.DataFrame(obj, columns=columns, index=index)
219 else:
220 raise ValueError('Can only wrap 1 or 2-d array_like')

e:\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
495 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
496 else:
--> 497 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
498
499 # For data is list-like, or Iterable (will consume into list)

e:\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, columns, dtype, copy)
201
202 # _prep_ndarray ensures that values.ndim == 2 at this point
--> 203 index, columns = _get_axes(
204 values.shape[0], values.shape[1], index=index, columns=columns
205 )

e:\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in _get_axes(N, K, index, columns)
460 columns = ibase.default_index(K)
461 else:
--> 462 columns = ensure_index(columns)
463 return index, columns
464

e:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in ensure_index(index_like, copy)
5612 index_like = copy_func(index_like)
5613
-> 5614 return Index(index_like)
5615
5616

e:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in __new__(cls, data, dtype, copy, name, tupleize_cols, **kwargs)
409
410 elif data is None or is_scalar(data):
--> 411 raise cls._scalar_data_error(data)
412 elif hasattr(data, "__array__"):
413 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)

TypeError: Index(...) must be called with a collection of some kind, 'seasonal' was passed
测试数据
df = pd.DataFrame.from_dict(data, orient='index')

data = {pd.Timestamp('2020-01-23 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-01-24 00:00:00'): {'LA': 1.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-01-25 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-01-26 00:00:00'): {'LA': 3.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-01-27 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-01-28 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-01-29 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-01-30 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 1.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-01-31 00:00:00'): {'LA': 2.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 2.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-01 00:00:00'): {'LA': 1.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-02 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 1.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-03 00:00:00'): {'LA': 3.0,
'NY': 0.0,
'Miami': 1.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-04 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-05 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-06 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-07 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-08 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-09 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-10 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-11 00:00:00'): {'LA': 1.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-12 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-13 00:00:00'): {'LA': 1.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-14 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-15 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-16 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-17 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-18 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-19 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-20 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-21 00:00:00'): {'LA': 2.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-22 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-23 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-24 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-25 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-26 00:00:00'): {'LA': 0.0,
'NY': 1.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-27 00:00:00'): {'LA': 1.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-28 00:00:00'): {'LA': 0.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-02-29 00:00:00'): {'LA': 8.0,
'NY': 1.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-03-01 00:00:00'): {'LA': 6.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-03-02 00:00:00'): {'LA': 23.0,
'NY': 0.0,
'Miami': 2.0,
'Seattle': 1.0,
'San Diego': 0.0},
pd.Timestamp('2020-03-03 00:00:00'): {'LA': 20.0,
'NY': 0.0,
'Miami': 0.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-03-04 00:00:00'): {'LA': 31.0,
'NY': 2.0,
'Miami': 23.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-03-05 00:00:00'): {'LA': 70.0,
'NY': 0.0,
'Miami': 2.0,
'Seattle': 1.0,
'San Diego': 1.0},
pd.Timestamp('2020-03-06 00:00:00'): {'LA': 48.0,
'NY': 9.0,
'Miami': 1.0,
'Seattle': 9.0,
'San Diego': 0.0},
pd.Timestamp('2020-03-07 00:00:00'): {'LA': 115.0,
'NY': 0.0,
'Miami': 3.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-03-08 00:00:00'): {'LA': 114.0,
'NY': 7.0,
'Miami': 5.0,
'Seattle': 4.0,
'San Diego': 2.0},
pd.Timestamp('2020-03-09 00:00:00'): {'LA': 68.0,
'NY': 5.0,
'Miami': 4.0,
'Seattle': 0.0,
'San Diego': 0.0},
pd.Timestamp('2020-03-10 00:00:00'): {'LA': 192.0,
'NY': 6.0,
'Miami': 13.0,
'Seattle': 3.0,
'San Diego': 4.0},
pd.Timestamp('2020-03-11 00:00:00'): {'LA': 398.0,
'NY': 7.0,
'Miami': 6.0,
'Seattle': 0.0,
'San Diego': 6.0},
pd.Timestamp('2020-03-12 00:00:00'): {'LA': 452.0,
'NY': 14.0,
'Miami': 11.0,
'Seattle': 8.0,
'San Diego': 4.0},
pd.Timestamp('2020-03-13 00:00:00'): {'LA': 596.0,
'NY': 99.0,
'Miami': 9.0,
'Seattle': 17.0,
'San Diego': 7.0},
pd.Timestamp('2020-03-14 00:00:00'): {'LA': 713.0,
'NY': 0.0,
'Miami': 20.0,
'Seattle': 14.0,
'San Diego': 14.0},
pd.Timestamp('2020-03-15 00:00:00'): {'LA': 98.0,
'NY': 11.0,
'Miami': 11.0,
'Seattle': 4.0,
'San Diego': 13.0},
pd.Timestamp('2020-03-16 00:00:00'): {'LA': 1392.0,
'NY': 38.0,
'Miami': 6.0,
'Seattle': 27.0,
'San Diego': 11.0},
pd.Timestamp('2020-03-17 00:00:00'): {'LA': 1781.0,
'NY': 121.0,
'Miami': 23.0,
'Seattle': 24.0,
'San Diego': 0.0},
pd.Timestamp('2020-03-18 00:00:00'): {'LA': 2776.0,
'NY': 51.0,
'Miami': 14.0,
'Seattle': 33.0,
'San Diego': 54.0},
pd.Timestamp('2020-03-19 00:00:00'): {'LA': 5240.0,
'NY': 249.0,
'Miami': 38.0,
'Seattle': 52.0,
'San Diego': 34.0},
pd.Timestamp('2020-03-20 00:00:00'): {'LA': 5322.0,
'NY': 172.0,
'Miami': 50.0,
'Seattle': 54.0,
'San Diego': 52.0},
pd.Timestamp('2020-03-21 00:00:00'): {'LA': 6346.0,
'NY': 228.0,
'Miami': 86.0,
'Seattle': 53.0,
'San Diego': 38.0},
pd.Timestamp('2020-03-22 00:00:00'): {'LA': 7936.0,
'NY': 525.0,
'Miami': 66.0,
'Seattle': 61.0,
'San Diego': 34.0}}

最佳答案

  • 问题就在这里, seasonal_decompose(df, model='additive') ,整个数据帧都被传递给 seasonal_decompose ,但你只能传递一列和一个日期时间索引。
  • 函数已更新为使用列表推导式计算每列的 .trend,然后将数据与 pandas.concat 组合成单个数据帧。

  • from statsmodels.tsa.seasonal import seasonal_decompose
    import pandas as pd

    # dataframe from sample; in this case the index is already a datetime
    df = pd.DataFrame.from_dict(data, orient='index')

    # if the index is not a datetime format
    df.index = pd.to_datetime(df.index)

    # perform seasonal decompose in a list comprehension on each column, return dataframe
    def season_decom(df, model='additive'):
    return pd.concat([pd.DataFrame({col: seasonal_decompose(df[col], model=model).trend}) for col in df.columns], axis=1)


    # call function
    df_seasonal = season_decom(df)

    # df_seasonal.head()
    LA NY Miami Seattle San Diego
    2020-01-23 NaN NaN NaN NaN NaN
    2020-01-24 NaN NaN NaN NaN NaN
    2020-01-25 NaN NaN NaN NaN NaN
    2020-01-26 0.571429 0.0 0.000000 0.000000 0.0
    2020-01-27 0.571429 0.0 0.142857 0.000000 0.0
    2020-01-28 0.714286 0.0 0.142857 0.285714 0.0
    2020-01-29 0.857143 0.0 0.142857 0.285714 0.0
    2020-01-30 0.428571 0.0 0.285714 0.285714 0.0
    2020-01-31 0.857143 0.0 0.428571 0.285714 0.0
    2020-02-01 0.857143 0.0 0.428571 0.285714 0.0
    简化版
  • 使用 seasonal_decompose
  • .apply 应用到每一列

    df_seasonal = df.apply(lambda x: seasonal_decompose(x, model='additive').trend)

    关于python - 如何将 statsmodels.tsa.seasonal.seasonal_decompose 与 Pandas 数据框一起使用,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64295560/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com