Python数据分析-基础技术篇_技术笔记

慕粉1653322654 2020-07-07

#encoding=utf-8
import numpy as np
import pandas as pd



def main():
        #Data structure
        s=pd.Series([i*2 for i in range(1,11)])
        print(type(s))
        dates=pd.date_range('20170301',periods=8)
        df=pd.DataFrame(np.random.randn(8,5),index=dates,columns=list('ABCDE'))
        print(df)
        # basic
        print(df.head(3))  # head()打印出前几行
        print(df.tail(3))  # tail()打印出后几行
        print(df.index)  # index()打印出索引
        print(df.values)  # values()打印出values
        print(df.T)  # T 转置
        print(df.sort_values("C"))  # 第C列的values按从小到大的顺序排序
        print(df.sort_index(axis=1, ascending=False))  # 将index进行降序排序
        print(df.describe())  # 打印出所有属性值 的数量 平均值 标准差 最小值 最大值等
        #select
        print(type(df["A"]))
        print(df[:3])
        print(df.loc[dates[0]])
        print(df.iloc[1:3])
        print(df[df.B>0][df.A<0])
if __name__ == '__main__':
              main()

0赞 · 0采集

libigfat 2020-06-27

head打印前几行
tail打印后几行
index打印主键
values打印数据
sort排序，直接指定按那个标签排序
describe可以大致分析数据的一些特征，平均值，标准差等
可以通过[]直接下标索引切片
多个series组成dataframe

0赞 · 0采集
慕后端9323003 2019-10-05

print(df.head(5)) # 前5行

0赞 · 0采集
weixin_慕斯卡7541814 2019-03-03

#Basic print(df.head（3）) #head()打印出前几行 print（df.tail(3)） #tail()打印出后几行 print（df.index） #index()打印出索引 print(df.values) #values()打印出values print(df.T) #T 转置 print（df.sort(columns="C")） #第C列的values按从小到大的顺序排序 print(df.sort_index(axis=1,ascending=False)) #将index进行降序排序 print(df.describe()) #打印出所有属性值的数量平均值标准差最小值最大值等

2赞 · 0采集

慕粉0948086830 2018-12-04

print(df.head(3))
print(df.tail(3))
print(df.index)
print(df.values)
print(df.T)
print(df.sort(columns=“C”））
print（df.sort_index(axis=1,ascending=False))
print(df.describe())
print(df["A"])
print(df[:3])
print(df["20170301":"20170304"])
print(df.loc[dates[0]])
print(df.loc["20170301":"20170304",["B","D"]])
print(df[df.B>0][df.A<0])

0赞 · 1采集

MOOC扛把子 2018-11-18

插眼, pandas 真不错

0赞 · 0采集
慕斯0241343 2018-06-11

快乐

0赞 · 0采集
人在梦游中 2018-05-21
```
print(df.sort_values(by="C"))
```
2赞 · 1采集
Xixibb 2017-12-01

1234

截图
0赞 · 1采集
Xixibb 2017-12-01

1234

截图
0赞 · 0采集
Xixibb 2017-12-01

1234

截图
0赞 · 0采集
Xixibb 2017-12-01

1234

截图
0赞 · 0采集
Xixibb 2017-12-01

1234

截图
0赞 · 0采集
劝君惜取少年时 2017-10-31

#Set 对dataframe的属性进行设置 sl=pd.Series(list(range(10,18)),index=pd.date_range("20170301",periods=8)) df["F"]=s1 print (df) df.at[dates[0],"A"]=0 #将第0行A列的数值改为0 print (df) df.iat[1,1]=1 df.loc[:,D]=np.array([4]*len(df)) #将D列改为4？ print(df) df2=df.copy() #拷贝dataframe df2[df2>0]=-df2 #将所有大于0的数改为负数 print(df2)

截图
0赞 · 0采集
劝君惜取少年时 2017-10-31

#select 选择数据切片？ print(type(df["A"])) #dataframe 其实是由一个个series组成的 print(df[:3]) #前三行 print(df["20170301":"20170304"]) #利用index进行切片 1号到4号 print(df.loc[dates[0]]) print(df.loc["20170301":"20170304","B","D"]) #1号到4号B列和D列的数据 print（df.at[dates[0],"C"]） #利用at表示特定值 dates[0]:1号 "C":C列 print(df.iloc[1:3,2:4]) #1到3不包括1,2到4不包括2 print(df.iloc[1,4]) #1行4列 print(df.iat[1,4]) print(df.[df.B>0][df.A<0])

截图
0赞 · 0采集
劝君惜取少年时 2017-10-31

#Basic print(df.head（3）) #head()打印出前几行 print（df.tail(3)） #tail()打印出后几行 print（df.index） #index()打印出索引 print(df.values) #values()打印出values print(df.T) #T 转置 print（df.sort(columns="C")） #第C列的values按从小到大的顺序排序 print(df.sort_index(axis=1,ascending=False)) #将index进行降序排序 print(df.describe()) #打印出所有属性值的数量平均值标准差最小值最大值等

截图
2赞 · 0采集
慕雪6366197 2017-09-21

pandas基本操作 head(3)打印出前三行 tail(3)打印出后几行 index values 数组 T sort(columns="C") 对某一列排序 sort_index( ) 索引排序 describe() 统计数据 Select 选择切片 df["A"] A的属性列 df[:3] 第几行

截图
0赞 · 0采集
likeke123 2017-08-28

def main(): import pandas as pd from matplotlib import pyplot as plt import numpy as np print(df.head(3)) print(df.tail(3)) print(df.index) print(df.values) #T表示转置 print(df.T) #sort_index(axis=1,ascending=False)按照属性值进行降序排序 print(df.sort_index(axis=1,ascending=False)) #describe()函数展示出每列数值的最大，最小，均值，四分位数 print(df.describe()) #DataFrame 只由多个Series构成的 #选择Select，可以进行切片操作 print(type(df["A"])) print(df[:3]) print(df["20170301":"20170304"]) print(df.loc[dates[0]]) print(df.loc["20170301":"20170304",["B","D"]]) print(df.at[dates[0],"C"]) #使用iloc进行特定值得索引，切片操作 print(df.iloc[1:3,2:4]) print(df.iloc[1,4]) print(df.iat[1,4]) #Set s1=pd.Series(list(range(10,18)),index=pd.date_range("20170301",periods=8)) df["F"]=s1 print (df) df.at[dates[0],"A"]=0 print(df) df.iat[1,1]=1 df.loc[:,"D"]=np.array([4]*len(df)) print(df) if __name__ =="__main__": main()

0赞 · 0采集
JonyJ 2017-08-03

切片操作

截图
0赞 · 0采集
Dazzz 2017-07-16

copy()

截图
0赞 · 0采集
Dazzz 2017-07-16

important iloc

截图
0赞 · 0采集
Dazzz 2017-07-16

keke

截图
0赞 · 0采集

数据加载中...