date_range时间序列 日期
periods个数,freq单位
文件操作 xlsx csv
read_csv
read_excel(文件名,表名)
to_csv保存
to_excel
Excel表格读写操作
import numpy as np
import pandas as pd
s = pd.Series([i * 2 for i in range(1, 11)])
print(type(s))
print(s)
dates = pd.date_range("20170301", periods=8)
df = pd.DataFrame(np.random.randn(8, 5), index=dates, columns=list("ABCDE"))
print(df["A"])
print(df[:3]) # 打印前3行
print(df["20170301":"20170302"]) # 打印对应范围的行
print(df.loc[dates[0]]) # 答应某一行
print(df.loc["20170301":"20170303", ["B", "D"]]) # 打印对应范围的行及其对应的属性值
print(df.at[dates[0], "C"]) # 打印某行某列的值
print(df.iloc[1:3]) # 打印1:3行
print(df.iloc[1:3, 2:4]) # 打印1:3行 2:4列
print(df.iat[1, 3]) # 打印1:3行
print(df[df.B > 0][df.A < 0])
print(df[df < 0.0])
print(df[df["E"].isin([1, 2])])
# set
sl = pd.Series(list(range(10, 18)), index=pd.date_range("20170301", periods=8))
df["F"] = sl # 插入一列
print(df)
df.at[dates[0], "A"] = 0
print(df)
df.iat[1, 1] = 1
df.loc[:, "D"] = np.array([4] * len(df))
print(df)
df2 = df.copy()
df2[df2 > 0] = -df2 # 将所有正数变为负数
print(df2)
# Miss values
df1 = df.reindex(index=dates[:4], columns=list("ABCD") + ["G"])
df1.loc[dates[0]:dates[1], "G"] = 1
print(df1)
print(df1.dropna()) # 丢弃缺失
print(df1.fillna(value=2)) # 缺失值补2
# Statistic
print(df.mean()) # 平均值
print(df.var()) # 方差
ss = pd.Series([1, 2, 4, np.nan, 5, 7, 9, 10], index=dates)
print(ss)
print(ss.shift(2)) # 把所有的值移后2
print(s.diff()) # 差分,不填数字则为一阶
print(s.value_counts()) # 每个值出现的次数
print(df)
print(df.apply(np.cumsum)) # 累加
print(df.apply(lambda x: x.max() - x.min())) # 极差
# Concat
pieces = [df[:2], df[-2:]]
print(pd.concat(pieces)) # 拼接前2行和后2行
left = pd.DataFrame({"key": ["x", "y"], "value": [1, 2]})
right = pd.DataFrame({"key": ["x", "z"], "value": [3, 4]})
print("left", left)
print("right", right)
print(pd.merge(left, right, on="key", how="right")) # 拼接,how:拼接方式,inner(去掉缺省值,outer(保留所有缺省值
df3 = pd.DataFrame({"A": ["a", "b", "c", "b"], "B": list(range(4))})
print(df3)
print(df3.groupby("A").sum()) # 对A聚合,两个B相加
# #Reshape
import datetime
df4 = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 6, # 此表格共有24行
'B': ['a', 'b', 'c'] * 8,
'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,
'D': np.random.randn(24), # 随机数
'E': np.random.randn(24),
'F': [datetime.datetime(2017, i, 1) for i in range(1, 13)] +
[datetime.datetime(2017, i, 15) for i in range(1, 13)]})
# pivot_table透视表
print(df4)
print(pd.pivot_table(df4, values="D", index=["A", "B"], columns=["C"])) # 输出值,输出项,列值
# time series
t_exam = pd.date_range("20170301", periods=10, freq="S")
print(t_exam)
# 画图
ts = pd.Series(np.random.randn(1000), index=pd.date_range("20170301", periods=1000))
ts = ts.cumsum()
from pylab import *
ts.plot()
show()
# 文件操作
df6= pd.read_excel("./panda.xlsx","Sheet1")
print(df6)
df6.to_csv("./panda.csv")
## time series 和 绘图
read_excel()读取Excel文件,保存用to_csv()或者to_excel()
pandas读取csv文件,用read_csv()语句