gpt4 book ai didi

python - 如何在Python中获取列作为行组合?

转载 作者:太空宇宙 更新时间:2023-11-03 16:47:52 24 4
gpt4 key购买 nike

我有一个以下格式的数据框:

country region  max min
A R1 141 86
A R2 215 72
A R3 473 36
B R1 384 79
B R2 439 88
B R3 172 54

我正在尝试将上面的 df 转换为下面的输出,逻辑为

对于A,如果第一个选项是最大放置最大测量,则其余所有最小对于 A 以外的情况,如果第二个选项是 max,则放置 max 测量值,其余均为 min

注意:还有其他默认选项(min)。

country region  measure Option1 Option2
A R1 86 min min
A R2 72 min min
A R3 36 min min
A R1 86 min max
A R2 72 min max
A R3 36 min max
A R1 86 min default
A R2 72 min default
A R3 36 min default
A R1 86 default min
A R2 72 default min
A R3 36 default min
A R1 86 default default
A R2 72 default default
A R3 36 default default
A R1 86 default max
A R2 72 default max
A R3 36 default max
A R1 141 max min
A R2 215 max min
A R3 473 max min
A R1 141 max default
A R2 215 max default
A R3 473 max default
A R1 141 max max
A R2 215 max max
A R3 473 max max
B R1 79 min min
B R2 88 min min
B R3 54 min min
B R1 384 min max
B R2 439 min max
B R3 172 min max
B R1 79 min default
B R2 88 min default
B R3 54 min default
B R1 79 default min
B R2 88 default min
B R3 54 default min
B R1 79 default default
B R2 88 default default
B R3 54 default default
B R1 384 default max
B R2 439 default max
B R3 172 default max
B R1 79 max min
B R2 88 max min
B R3 54 max min
B R1 79 max default
B R2 88 max default
B R3 54 max default
B R1 384 max max
B R2 439 max max
B R3 172 max max

我知道带有一些技巧的熔化选项可以在这里工作,但作为新手无法正确地构建它。

请帮忙

编辑1:

使用以下代码实现:

有人可以帮助我微调它并提高性能吗?

d1 = pd.melt(data, id_vars=['country','region'], value_vars=['max', 'min','default']).sort(['country','region']).reset_index(drop=True)

for ridx,i in enumerate(d1['value']):
if pd.isnull(i):
d1['value'].loc[ridx] = d1['value'].loc[ridx-1]
else:
pass

d2 = d1

from pandas import DataFrame, merge
d1['key'] = 1
d2['key'] = 1
d3 = merge(d1, d2, on='key')

d3 = d3.drop(['key'],axis=1)

for index, row in d3.iterrows():
if d3['region_x'].loc[index] == d3['region_y'].loc[index] and d3['country_x'].loc[index] == d3['country_y'].loc[index]:
pass
else:
d3 = d3.drop([index])
d3 = d3.reset_index(drop=True)

d3['rate'] = ""


for index, row in d3.iterrows():
if d3['country_x'].loc[index] == 'A':
d3['rate'].loc[index] = d3['value_x'].loc[index]
else:
d3['rate'].loc[index] = d3['value_y'].loc[index]

d3 = d3.drop(['value_x','country_y','region_y','value_y'],axis=1)

d3.columns = ['country','region','Option1','Option2','measure']
d3 = d3[['country','region','measure','Option1','Option2']]

问候,

最佳答案

您可以使用fillna , numpy.wherenumpy.in1dastype :

d1 = pd.melt(data, id_vars=['country','region'], value_vars=['max', 'min','default'])
.sort_values(['country','region']).reset_index(drop=True)

#fill NaN in column value method ffill
#(propagate last valid observation forward to next valid)
d1['value'] = d1['value'].fillna(method='ffill')

d1['key'] = 1
#you can use double d1
d3 = pd.merge(d1, d1, on='key')
d3 = d3.drop(['key'],axis=1)

#filter columns by conditions - boolean indexing
d3 = d3[(d3['region_x'] == d3['region_y']) & (d3['country_x'] == d3['country_y'])]
.reset_index(drop=True)

#if condition is true get value_x else value_y
#if neccesarry, convert to int by astype(int)
d3['rate'] = np.where(np.in1d(d3['country_x'], 'A'), d3['value_x'],d3['value_y']).astype(int)

d3 = d3.drop(['value_x','country_y','region_y','value_y'],axis=1)

d3.columns = ['country','region','Option1','Option2','measure']
d3 = d3[['country','region','measure','Option1','Option2']]

时间:

新的解决方案速度快了 300 倍,主要是因为没有循环 iterrows()

In [67]: %timeit old(data)
1 loops, best of 3: 2.46 s per loop

In [68]: %timeit new(data1)
100 loops, best of 3: 8.33 ms per loop

代码:

data1 = data.copy()

def old(data):
d1 = pd.melt(data, id_vars=['country','region'], value_vars=['max', 'min','default']).sort_values(['country','region']).reset_index(drop=True)

for ridx,i in enumerate(d1['value']):
if pd.isnull(i):
d1['value'].loc[ridx] = d1['value'].loc[ridx-1]
else:
pass
d2 = d1
d1['key'] = 1
d2['key'] = 1
d3 = pd.merge(d1, d2, on='key')

d3 = d3.drop(['key'],axis=1)

for index, row in d3.iterrows():
if d3['region_x'].loc[index] == d3['region_y'].loc[index] and d3['country_x'].loc[index] == d3['country_y'].loc[index]:
pass
else:
d3 = d3.drop([index])
d3 = d3.reset_index(drop=True)

d3['rate'] = ""

for index, row in d3.iterrows():
if d3['country_x'].loc[index] == 'A':
d3['rate'].loc[index] = d3['value_x'].loc[index]
else:
d3['rate'].loc[index] = d3['value_y'].loc[index]

d3 = d3.drop(['value_x','country_y','region_y','value_y'],axis=1)

d3.columns = ['country','region','Option1','Option2','measure']
d3 = d3[['country','region','measure','Option1','Option2']]
return d3
def new(data):
d1 = pd.melt(data, id_vars=['country','region'], value_vars=['max', 'min','default']).sort_values(['country','region']).reset_index(drop=True)

#fill NaN in column value method ffill (propagate last valid observation forward to next valid)
d1['value'] = d1['value'].fillna(method='ffill')

d1['key'] = 1
#you can use double d1
d3 = pd.merge(d1, d1, on='key')
d3 = d3.drop(['key'],axis=1)

#filter columns by conditions - boolean indexing
d3 = d3[(d3['region_x'] == d3['region_y']) & (d3['country_x'] == d3['country_y'])].reset_index(drop=True)

#if condition is true get value_x else value_y
#if neccesarry, convert to int by astype(int)
d3['rate'] = np.where(np.in1d(d3['country_x'], 'A'), d3['value_x'],d3['value_y']).astype(int)

d3 = d3.drop(['value_x','country_y','region_y','value_y'],axis=1)

d3.columns = ['country','region','Option1','Option2','measure']
d3 = d3[['country','region','measure','Option1','Option2']]
return d3


print old(data)
print new(data1)
print (new(data1) == old(data)).all()
country True
region True
measure True
Option1 True
Option2 True
dtype: bool

关于python - 如何在Python中获取列作为行组合?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36148501/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com