date_range时间序列 日期
periods个数,freq单位
文件操作 xlsx csv
read_csv
read_excel(文件名,表名)
to_csv保存
to_excel
Excel表格读写操作
import numpy as np import pandas as pd s = pd.Series([i * 2 for i in range(1, 11)]) print(type(s)) print(s) dates = pd.date_range("20170301", periods=8) df = pd.DataFrame(np.random.randn(8, 5), index=dates, columns=list("ABCDE")) print(df["A"]) print(df[:3]) # 打印前3行 print(df["20170301":"20170302"]) # 打印对应范围的行 print(df.loc[dates[0]]) # 答应某一行 print(df.loc["20170301":"20170303", ["B", "D"]]) # 打印对应范围的行及其对应的属性值 print(df.at[dates[0], "C"]) # 打印某行某列的值 print(df.iloc[1:3]) # 打印1:3行 print(df.iloc[1:3, 2:4]) # 打印1:3行 2:4列 print(df.iat[1, 3]) # 打印1:3行 print(df[df.B > 0][df.A < 0]) print(df[df < 0.0]) print(df[df["E"].isin([1, 2])]) # set sl = pd.Series(list(range(10, 18)), index=pd.date_range("20170301", periods=8)) df["F"] = sl # 插入一列 print(df) df.at[dates[0], "A"] = 0 print(df) df.iat[1, 1] = 1 df.loc[:, "D"] = np.array([4] * len(df)) print(df) df2 = df.copy() df2[df2 > 0] = -df2 # 将所有正数变为负数 print(df2) # Miss values df1 = df.reindex(index=dates[:4], columns=list("ABCD") + ["G"]) df1.loc[dates[0]:dates[1], "G"] = 1 print(df1) print(df1.dropna()) # 丢弃缺失 print(df1.fillna(value=2)) # 缺失值补2 # Statistic print(df.mean()) # 平均值 print(df.var()) # 方差 ss = pd.Series([1, 2, 4, np.nan, 5, 7, 9, 10], index=dates) print(ss) print(ss.shift(2)) # 把所有的值移后2 print(s.diff()) # 差分,不填数字则为一阶 print(s.value_counts()) # 每个值出现的次数 print(df) print(df.apply(np.cumsum)) # 累加 print(df.apply(lambda x: x.max() - x.min())) # 极差 # Concat pieces = [df[:2], df[-2:]] print(pd.concat(pieces)) # 拼接前2行和后2行 left = pd.DataFrame({"key": ["x", "y"], "value": [1, 2]}) right = pd.DataFrame({"key": ["x", "z"], "value": [3, 4]}) print("left", left) print("right", right) print(pd.merge(left, right, on="key", how="right")) # 拼接,how:拼接方式,inner(去掉缺省值,outer(保留所有缺省值 df3 = pd.DataFrame({"A": ["a", "b", "c", "b"], "B": list(range(4))}) print(df3) print(df3.groupby("A").sum()) # 对A聚合,两个B相加 # #Reshape import datetime df4 = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 6, # 此表格共有24行 'B': ['a', 'b', 'c'] * 8, 'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4, 'D': np.random.randn(24), # 随机数 'E': np.random.randn(24), 'F': [datetime.datetime(2017, i, 1) for i in range(1, 13)] + [datetime.datetime(2017, i, 15) for i in range(1, 13)]}) # pivot_table透视表 print(df4) print(pd.pivot_table(df4, values="D", index=["A", "B"], columns=["C"])) # 输出值,输出项,列值 # time series t_exam = pd.date_range("20170301", periods=10, freq="S") print(t_exam) # 画图 ts = pd.Series(np.random.randn(1000), index=pd.date_range("20170301", periods=1000)) ts = ts.cumsum() from pylab import * ts.plot() show() # 文件操作 df6= pd.read_excel("./panda.xlsx","Sheet1") print(df6) df6.to_csv("./panda.csv")
## time series 和 绘图
read_excel()读取Excel文件,保存用to_csv()或者to_excel()
pandas读取csv文件,用read_csv()语句