Python中pandas库的常用操作实例
作者:菜菜01
这篇文章主要介绍了Python中pandas库的常用操作实例,Pandas 库是一个免费、开源的第三方 Python 库,是 Python 数据分析必不可少的工具之一,它为 Python 数据分析提供了高性能,且易于使用的数据结构,即 Series 和 DataFrame,需要的朋友可以参考下
pandas库常用操作
series
使用列表创建
import pandas as pd data = pd.Series([i for i in range(4)],index=['a','b','c','d']) data
使用字典创建
import pandas as pd dict1 = {"a":0,"b":1,"c":2,"d":3} data = pd.Series(dict1) data
DataFrame
使用数组创建
import pandas as pd import numpy as np arr = np.array([i for i in range(12)]).reshape(3,4) data = pd.DataFrame(arr) data
指定列索引
import pandas as pd import numpy as np arr = np.array([i for i in range(12)]).reshape(3,4) data = pd.DataFrame(arr,columns=['a','b','c','d']) data
index
查询索引
#series import pandas as pd data = pd.Series([i for i in range(3)],index=['a','b','c']) data.index #DataFrame import pandas as pd import numpy as np arr = np.array([i for i in range(9)]).reshape(3,3) data = pd.DataFrame(arr,columns=['a','b','c']) data.columns
查询索引
#series import pandas as pd data = pd.Series([i for i in range(3)],index=['a','b','c']) data[2] #2 data['c'] #2 data[0:2] #0,1 data['a':'c'] #0,1 data[[0,2]] #0,2 data[['a','c']] #0,2 #dataframe import pandas as pd import numpy as np arr = np.array([i for i in range(9)]).reshape(3,3) data = pd.DataFrame(arr,columns=['a','b','c']) data['a'] """" 0 0 1 3 2 6 """" data[['a','c']] """" a c 0 0 2 1 3 5 2 6 8 """" data[:2] """ a b c 0 0 1 2 1 3 4 5 """ ##loc是index,iloc是行号 data.loc[:,["a","c"]] data.iloc[:,[0,2]]
sort
sort in index
#series import pandas as pd data = pd.Series([1,3,0],index=['a','b','c']) data.sort_index(axis=0) data.sort_index(axis=0,ascending=False) #DataFrame import pandas as pd import numpy as np data = pd.DataFrame(np.arange(9).reshape(3,3),columns=['a','b','c']) data.sort_index()
sort in values
#series import pandas as pd data = pd.Series([1,3,0],index=['a','b','c']) data.sort_values(axis=0) data.sort_values(axis=0,ascending=False) #DataFrame import pandas as pd import numpy as np data = pd.DataFrame(np.arange(9).reshape(3,3),columns=['a','b','c']) data.sort_values() data.sort_values(by='a')
multidnex
read , write file
import pandas as pd data = pd.DataFrame(np.arange(9).reshape(3,3),columns=['a','b','c']) data.to_csv(r'./data.csv',index=False) data = pd.read_csv(r'./data.csv') data import pandas as pd data = pd.DataFrame(np.arange(9).reshape(3,3),columns=['a','b','c']) data.to_excel(r'./data.xlsx',index=False) data = pd.read_excel(r'./data.xlsx') data
空值和缺失值处理
import pandas as pd import numpy as np from numpy import NaN data = pd.Series([1,NaN,3]) data.isnull() #delete data.dropna() #fill data.fillna(6)
#重复值处理 import pandas as pd data = pd.DataFrame({'id': [1, 2, 3, 4, 4, 5], 'name': ['小铭', '小月月', '彭岩', '刘华', '刘华', '周华'], 'age': [18, 18, 29, 58, 58, 36], 'height': [180, 180, 185, 175, 175, 178], 'gender': ['女', '女', '男', '男', '男', '男']}) data.duplicated() data.duplicateds()
更改数据类型
import pandas as pd dict1 = {'A':['5', '6', '7'], 'B':['3', '2', '1']} data = pd.DataFrame(dict1) data.dtypes data = pd.DataFrame(dict1,dtype='int') data.dtypes data.astype(dtype='float')
数据合并
import pandas as pd data1 = pd.DataFrame({'A': ['A0', 'A0', 'A1'], 'B': ['B0', 'B0', 'B1']}) data2 = pd.DataFrame({'C': ['C0', 'C0', 'C1', 'C3'], 'D': ['D0', 'D2', 'D2', 'D3']}) # 横向堆叠合并df1和df2,采用外连接的方式 pd.concat([data1,data2], join='outer',axis=1)
数据聚合和组内运算
import pandas as pd import numpy as np dict1 = {"Key":['C','B','C','A','B','B','A','C','A'],"Data":[2,4,6,8,10,1,14,16,18]} data = pd.DataFrame(dict1) data for i in pd.groupby(by='Key'): print(i)
到此这篇关于Python中pandas库的常用操作实例的文章就介绍到这了,更多相关pandas库的常用操作内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!