代码如下:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('../datasets/tips.csv')
plt.boxplot(x = df['total_bill'], #指定箱形图数据,如多组数据[x1,x2,x3]
whis= 1.5, #指定1.5倍的四分位数差
widths = 0.3, #指定箱子的宽度为0.5
patch_artist = True, #填充箱子的颜色
showmeans= True, #显示均值
boxprops= {'facecolor':'RoyalBlue'}, #指定箱子的填充色为宝蓝色
flierprops={'markerfacecolor':'red','markeredgecolor':'red','markersize':3}, #指定异常值的填充色、边框色和大小
meanprops= {'marker':'h','markerfacecolor':'black','markersize':8}, #指定中位数的标识符号和颜色
medianprops= {'linestyle':'--','color':'orange'}, #指定均值点的标识符号、填充色和大小
#labels= [''] #去除x轴刻度值
)
plt.xticks([]) #去除x轴刻度线和刻度值
plt.show()
Q1 = df['total_bill'].quantile(q=0.25)
Q3 = df['total_bill'].quantile(q=0.75)
low_limit = Q1 - 1.5*(Q3-Q1)
up_limit = Q3 + 1.5*(Q3-Q1)
val = df['total_bill'][(df['total_bill']>up_limit)|(df['total_bill']
异常值如下:
59 48.27
102 44.30
142 41.19
156 48.17
170 50.81
182 45.35
184 40.55
197 43.11
212 48.33
Name: total_bill, dtype: float64