from __future__ import print_functionimport pandas as pdimport numpy as npnp.random.seed(1)dates = pd.date_range('20130101', periods=6)df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A', 'B', 'C', 'D'])
赋值,新增列数据
df.iloc[2,2], df.loc['2013-01-03', 'D']
df.A[df.A>0], df['F']
df.iloc[2,2] = 1111 # 设置行列编号为2,2的数据只为1df.loc['2013-01-03', 'D'] = 2222 # 设置行属性值为‘2013……’,列属性值为‘D’的值为2222df[df.A>0] = 0 # 只保留列属性为‘A’且大于0的值,全部数据中的其他数据都设置为0df.A[df.A>0] = 0 # 只更改列属性为‘A’的数据df['F'] = np.nan # 新增加一个属性列‘F’,所有的值为NaNdf['G'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6)) # 新增一个列‘G’
以下是所有的运行结果:
print(df)> A B C D> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141> 2013-01-04 -0.322417 -0.384054 1.133769 -1.099891> 2013-01-05 -0.172428 -0.877858 0.042214 0.582815> 2013-01-06 -1.100619 1.144724 0.901591 0.502494
df.iloc[2,2] = 1111print(df)> A B C D> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207> 2013-01-03 0.319039 -0.249370 1111.000000 -2.060141> 2013-01-04 -0.322417 -0.384054 1.133769 -1.099891> 2013-01-05 -0.172428 -0.877858 0.042214 0.582815> 2013-01-06 -1.100619 1.144724 0.901591 0.502494
df.loc['2013-01-03', 'D'] = 2222print(df)> A B C D> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207> 2013-01-03 0.319039 -0.249370 1111.000000 2222.000000> 2013-01-04 -0.322417 -0.384054 1.133769 -1.099891> 2013-01-05 -0.172428 -0.877858 0.042214 0.582815> 2013-01-06 -1.100619 1.144724 0.901591 0.502494
df[df.A < 0] = 0print(df)> A B C D> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141> 2013-01-04 0.000000 0.000000 0.000000 0.000000> 2013-01-05 0.000000 0.000000 0.000000 0.000000> 2013-01-06 0.000000 0.000000 0.000000 0.000000
df.A[df.A < 0] = 0print(df)> A B C D> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141> 2013-01-04 0.000000 -0.384054 1.133769 -1.099891> 2013-01-05 0.000000 -0.877858 0.042214 0.582815> 2013-01-06 0.000000 1.144724 0.901591 0.502494
df['E'] = np.nanprint(df)> A B C D E> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969 NaN> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207 NaN> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141 NaN> 2013-01-04 0.000000 -0.384054 1.133769 -1.099891 NaN> 2013-01-05 0.000000 -0.877858 0.042214 0.582815 NaN> 2013-01-06 0.000000 1.144724 0.901591 0.502494 NaN
df['G'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6))print(df)> A B C D E G> 2013-01-01 1.624345 -0.611756 -0.528172 -1.072969 NaN 1> 2013-01-02 0.865408 -2.301539 1.744812 -0.761207 NaN 2> 2013-01-03 0.319039 -0.249370 1.462108 -2.060141 NaN 3> 2013-01-04 0.000000 -0.384054 1.133769 -1.099891 NaN 4> 2013-01-05 0.000000 -0.877858 0.042214 0.582815 NaN 5> 2013-01-06 0.000000 1.144724 0.901591 0.502494 NaN 6
END