Python数据分析-基础技术篇_技术笔记

libigfat 2020-06-27

date_range时间序列日期
periods个数，freq单位
文件操作 xlsx csv
read_csv
read_excel(文件名,表名)
to_csv保存
to_excel

0赞 · 0采集
Shepherd9527 2020-04-04

Excel表格读写操作

截图
0赞 · 0采集

青椒肉丝OVO 2019-11-22

import numpy as np
import pandas as pd


s = pd.Series([i * 2 for i in range(1, 11)])
print(type(s))
print(s)
dates = pd.date_range("20170301", periods=8)
df = pd.DataFrame(np.random.randn(8, 5), index=dates, columns=list("ABCDE"))
print(df["A"])
print(df[:3])  # 打印前3行
print(df["20170301":"20170302"])  # 打印对应范围的行
print(df.loc[dates[0]])  # 答应某一行
print(df.loc["20170301":"20170303", ["B", "D"]])  # 打印对应范围的行及其对应的属性值
print(df.at[dates[0], "C"])  # 打印某行某列的值
print(df.iloc[1:3])  # 打印1:3行
print(df.iloc[1:3, 2:4])  # 打印1:3行 2:4列
print(df.iat[1, 3])  # 打印1:3行
print(df[df.B > 0][df.A < 0])
print(df[df < 0.0])
print(df[df["E"].isin([1, 2])])

# set
sl = pd.Series(list(range(10, 18)), index=pd.date_range("20170301", periods=8))
df["F"] = sl  # 插入一列
print(df)
df.at[dates[0], "A"] = 0
print(df)
df.iat[1, 1] = 1
df.loc[:, "D"] = np.array([4] * len(df))
print(df)
df2 = df.copy()
df2[df2 > 0] = -df2  # 将所有正数变为负数
print(df2)

# Miss values
df1 = df.reindex(index=dates[:4], columns=list("ABCD") + ["G"])
df1.loc[dates[0]:dates[1], "G"] = 1
print(df1)
print(df1.dropna())  # 丢弃缺失
print(df1.fillna(value=2))  # 缺失值补2

# Statistic
print(df.mean())  # 平均值
print(df.var())  # 方差
ss = pd.Series([1, 2, 4, np.nan, 5, 7, 9, 10], index=dates)
print(ss)
print(ss.shift(2))  # 把所有的值移后2
print(s.diff())  # 差分，不填数字则为一阶
print(s.value_counts())  # 每个值出现的次数
print(df)
print(df.apply(np.cumsum))  # 累加
print(df.apply(lambda x: x.max() - x.min()))  # 极差

# Concat
pieces = [df[:2], df[-2:]]
print(pd.concat(pieces))  # 拼接前2行和后2行
left = pd.DataFrame({"key": ["x", "y"], "value": [1, 2]})
right = pd.DataFrame({"key": ["x", "z"], "value": [3, 4]})
print("left", left)
print("right", right)
print(pd.merge(left, right, on="key", how="right"))  # 拼接，how：拼接方式，inner(去掉缺省值，outer（保留所有缺省值
df3 = pd.DataFrame({"A": ["a", "b", "c", "b"], "B": list(range(4))})
print(df3)
print(df3.groupby("A").sum())  # 对A聚合，两个B相加

# #Reshape
import datetime

df4 = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 6,  # 此表格共有24行

                    'B': ['a', 'b', 'c'] * 8,

                    'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,

                    'D': np.random.randn(24),  # 随机数

                    'E': np.random.randn(24),

                    'F': [datetime.datetime(2017, i, 1) for i in range(1, 13)] +

                         [datetime.datetime(2017, i, 15) for i in range(1, 13)]})

# pivot_table透视表
print(df4)
print(pd.pivot_table(df4, values="D", index=["A", "B"], columns=["C"]))  # 输出值，输出项，列值

# time series
t_exam = pd.date_range("20170301", periods=10, freq="S")
print(t_exam)

# 画图
ts = pd.Series(np.random.randn(1000), index=pd.date_range("20170301", periods=1000))
ts = ts.cumsum()
from pylab import *
ts.plot()
show()

# 文件操作
df6= pd.read_excel("./panda.xlsx","Sheet1")
print(df6)
df6.to_csv("./panda.csv")

0赞 · 0采集

慕后端9323003 2019-10-06

## time series 和绘图

截图
0赞 · 0采集
慕雪743630 2018-12-21

老师大数据处理的时候很慢，我想把三个表格合起来但是处理的很慢

0赞 · 0采集
慕村4567611 2018-04-24

read_excel（）读取Excel文件，保存用to_csv（）或者to_excel()

0赞 · 0采集
慕村4567611 2018-04-24

pandas读取csv文件，用read_csv（）语句

0赞 · 0采集

数据加载中...