본문 바로가기
분석/파이썬 Python

Python : Seaborn Visualization

by 여우요원 2019. 11. 25.
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# 데이터셋 
iris = sns.load_dataset('iris')
titanic = sns.load_dataset('titanic')
tips = sns.load_dataset('tips')
flights = sns.load_dataset('flights')
x = iris.petal_length.values
sns.rugplot(x)
<matplotlib.axes._subplots.AxesSubplot at 0x1a175ec950>

sns.kdeplot(x)
<matplotlib.axes._subplots.AxesSubplot at 0x1a18ce6890>

sns.distplot(x, rug=True, kde=True, bins=50)
<matplotlib.axes._subplots.AxesSubplot at 0x1a18d9d1d0>

plt.hist(x, bins=50)
(array([ 2.,  2.,  7., 13., 13., 11.,  0.,  2.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  1.,  0.,  0.,  2.,  0.,  2.,  2.,  1.,  3.,  5.,
         3.,  6.,  4.,  8.,  3.,  5.,  4.,  9.,  8.,  2.,  2.,  2.,  9.,
         3.,  3.,  2.,  2.,  3.,  1.,  1.,  0.,  1.,  2.,  1.]),
 array([1.   , 1.118, 1.236, 1.354, 1.472, 1.59 , 1.708, 1.826, 1.944,
        2.062, 2.18 , 2.298, 2.416, 2.534, 2.652, 2.77 , 2.888, 3.006,
        3.124, 3.242, 3.36 , 3.478, 3.596, 3.714, 3.832, 3.95 , 4.068,
        4.186, 4.304, 4.422, 4.54 , 4.658, 4.776, 4.894, 5.012, 5.13 ,
        5.248, 5.366, 5.484, 5.602, 5.72 , 5.838, 5.956, 6.074, 6.192,
        6.31 , 6.428, 6.546, 6.664, 6.782, 6.9  ]),
 <a list of 50 Patch objects>)

titanic
  survived pclass sex age sibsp parch fare embarked class who adult_male deck embark_town alive alone
0 0 3 male 22.0 1 0 7.2500 S Third man True NaN Southampton no False
1 1 1 female 38.0 1 0 71.2833 C First woman False C Cherbourg yes False
2 1 3 female 26.0 0 0 7.9250 S Third woman False NaN Southampton yes True
3 1 1 female 35.0 1 0 53.1000 S First woman False C Southampton yes False
4 0 3 male 35.0 0 0 8.0500 S Third man True NaN Southampton no True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
886 0 2 male 27.0 0 0 13.0000 S Second man True NaN Southampton no True
887 1 1 female 19.0 0 0 30.0000 S First woman False B Southampton yes True
888 0 3 female NaN 1 2 23.4500 S Third woman False NaN Southampton no False
889 1 1 male 26.0 0 0 30.0000 C First man True C Cherbourg yes True
890 0 3 male 32.0 0 0 7.7500 Q Third man True NaN Queenstown no True

891 rows × 15 columns

# Count Plot 
sns.countplot(x='class', data=titanic)
<matplotlib.axes._subplots.AxesSubplot at 0x1a22bd7810>

tips
  total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

sns.countplot(x='day', data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x1a22cae310>

sns.jointplot(x='sepal_length', y='sepal_width', data=iris, kind='kde')
<seaborn.axisgrid.JointGrid at 0x1a2309c450>

sns.pairplot(iris)
<seaborn.axisgrid.PairGrid at 0x1a23373590>

sns.pairplot(iris, hue='species')
<seaborn.axisgrid.PairGrid at 0x1a23cd8590>

iris.head(10)
  sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
5 5.4 3.9 1.7 0.4 setosa
6 4.6 3.4 1.4 0.3 setosa
7 5.0 3.4 1.5 0.2 setosa
8 4.4 2.9 1.4 0.2 setosa
9 4.9 3.1 1.5 0.1 setosa
# 2차원 카테고리
titanic_size = titanic.pivot_table(index='class', columns='sex', aggfunc='size')
titanic_size
sex female male
class    
First 94 122
Second 76 108
Third 144 347
sns.heatmap(titanic_size, cmap='summer', annot=True, fmt='d')
<matplotlib.axes._subplots.AxesSubplot at 0x1a245e0250>

sns.barplot(y='day', x='total_bill', data=tips, hue='sex')
<matplotlib.axes._subplots.AxesSubplot at 0x1a25408550>

sns.boxplot(x='day', y='total_bill', data=tips, hue='sex')
<matplotlib.axes._subplots.AxesSubplot at 0x1a2562b910>