Bar plotting or hist plotting

In [74]: import numpy as np

             import matplotlib.pyplot as plt

In [75]: data = [5., 25., 50, 20]

In [79]: plt.bar( range(len(data)), data )

             plt.show()

Bar plotting or hist plotting

In [80]: range(len(data))

Out[80]: range(0, 4)

In [81]: plt.barh( range(len(data)), data)

             plt.show()

Bar plotting or hist plotting

In [ ]:

In [82]: import matplotlib.pyplot as plt

In [83]: data = [5,25,50,20]

In [84]: plt.bar( range(len(data)), data, width=1)

             plt.show()

Bar plotting or hist plotting

In [87]: plt.barh( range(len(data)), data, height=1)

             plt.show()

Bar plotting or hist plotting

In [89]:

In [ ]:

17

In [90]: import numpy as np

import matplotlib.pyplot as plt

In [96]: data = [ [5,25,50,20], [4,23,51,17], [6,22,52,19] ]

             XList = np.arange(4)

In [97]: w=0.25

             plt.bar(XList+w*0, data[0], color='b', width=w)  #w*0 or w*1 or w*2 做偏移

             # center the first blue bar to 0.0

             plt.bar(XList+w*1, data[1], color='g', width=w)

             #width=w  is the gap

             plt.bar(XList+w*2, data[2], color='r', width=w)  

             # center the first red bar to 0.5

             plt.show()

Bar plotting or hist plotting

In [ ]:

In [99]: import numpy as np

             import matplotlib.pyplot as plt

In [100]: data = [[5,25,50,20], [4,23,51,17], [6,22,52,19]]

              color_list=['b','g','r']

              gap = .8/len(data)   

In [101]: for i, row in enumerate(data): #the iterator enumerate returns both the current row and its i

                     X = np.arange(len(row))

                     plt.bar(X+i*gap, row, width=gap, color = color_list[ i%len(color_list) ])

                                                                               #i%len(color_list) if len(data)>3

               plt.show()

Bar plotting or hist plotting

In [ ]:

In [102]: import matplotlib.pyplot as plt

In [106]: A = [5,30,45,22] B = [5,25,50,20]

              XList= range(len(A))

In [107]: plt.bar(XList, A, color='b')

        plt.bar(XList, B, color='r', bottom=A) #default width: 0.8

         plt.show()

Bar plotting or hist plotting

In [ ]:

In [108]: import numpy as np

               import matplotlib.pyplot as plt

In [109]: A = np.array([5,30,45,22])

               B = np.array([5,20,50,20])

               C = np.array([1,2,1,1])

               xList=np.arange(4)

In [110]: plt.bar(xList, A, color='b')

               plt.bar(xList, B, color='y', bottom=A)

               plt.bar(xList, C, color='r', bottom=A+B)

               plt.show()

20Bar plotting or hist plotting

In [ ]:

In [111]: import numpy as np

              import matplotlib.pyplot as plt

In [112]: data=np.array([ [5,30,45,22], [5,20,50,20], [1, 2, 1, 1] ])

              colorList = ['b', 'y','r']

              xList = np.arange(data.shape[1]) #data.shape[1] return the number of columns

              for i in range(data.shape[0]): #axis=0 to remove row: sum by column

                     plt.bar(xList, data[i], bottom=np.sum(data[:i], axis=0), color=color_list[ i%len(color_list) ])

              plt.show()

21Bar plotting or hist plotting

In [ ]:

In [113]: import numpy as np

              import matplotlib.pyplot as plt

In [115]: women_pop = np.array([5,30,45,22])

               men_pop = np.array([5,25,50,20])

               xList= np.arange(4)

In [116]: plt.barh(xList, women_pop, color='r')

              plt.barh(xList, -men_pop, color='b')

              plt.show()

Bar plotting or hist plotting

In [ ]:

In [119]: import numpy as np

               import matplotlib.pyplot as plt

In [120]: xList = np.random.randn(1000)

In [121]: plt.hist(xList, bins=20)

               plt.show()

Bar plotting or hist plotting

In [122]: plt.hist(xList, bins=50)

               plt.show()

Bar plotting or hist plotting

In [ ]:

3 Using custom colors for bar charts

In [13]: import numpy as np

             import matplotlib.pyplot as plt

In [14]: women_pop = np.array([5.0, 30.,45., 22.])

             men_pop = np.array([5.0, 25., 50., 20.])

In [15]: X=np.arange(4) #0~3

In [18]: plt.barh(X, women_pop, color='0.25')

             plt.barh(X, -men_pop, color='0.75')# The parameter edgecolor is alsoavailable

             plt.show().

Out[18]:

Bar plotting or hist plotting

In [19]: import numpy as np

             import matplotlib.pyplot as plt

In [21]: values = np.random.randint(99, size=50) #generating 50 numbers with 0<=values<=99

In [22]: values

Out[22]: array([ 8, 21, 30, 9, 74, 0, 91, 97, 81, 80, 21, 47, 18, 3, 81, 53, 22, 84, 50, 2, 33, 82, 93, 89, 51, 71, 87, 48, 0, 57, 15, 38, 66, 48, 75, 98, 46, 35, 33, 20, 28, 30, 20, 80, 83, 68, 29, 13, 38, 61])

 

In [25]: color_set = ('.00','.25','.50','.75')

##Python中的 // 与 / 的区别, " / " 表示浮点数除法

             color_list=[ color_set[ (len(color_set) * val) //100 ] for val in values ]

             plt.bar(np.arange(len(values)), values, color=color_list)

             plt.show()

Bar plotting or hist plotting

 

In [26]: (len(color_set) * 8) //100

Out[26]: 0

In [28]: color_set = ('.00','.25','.50','.75')

             ##Python中的 // 与 / 的区别, " / " 表示浮点数除法, "//"表示整数除法

             color_list=[ color_set[ (len(color_set) * val) //100 ] for val in sorted(values) ] #sort the v

             plt.bar(np.arange(len(values)), values, color=color_list)

             plt.show()

Bar plotting or hist plotting

7 Using colormaps for bar charts

In [11]: import numpy as np

             import matplotlib.cm as cm

             import matplotlib.colors as col

             import matplotlib.pyplot as plt

In [13]: values = np.random.randint(99, size=50) # 50 numbers (0~99)

In [14]:                               #normalize data into the [0.0, 1.0] interval

             cmap = cm.ScalarMappable(col.Normalize(0,99), cm.binary)

In [16]:                               #converts the list of values to a list of color

            plt.bar(np.arange(len(values)), values, color = cmap.to_rgba(values))

            plt.show()

Bar plotting or hist plotting

Bar plotting or hist plotting

# # we use the linestyle parameter of pyplot.plot() to control the line
# pattern of three different curves. The following line styles are available:
#  ### Solid
#  ### Dashed
#  ### Dotted
#  ### Dashdot

The line style with other plot types

# In[24]:
import numpy as np
import matplotlib.pyplot as plt


# In[25]:
N=8
A=np.random.random(N)
B=np.random.random(N)
X=np.arange(N)


# In[60]:


plt.bar(X, A, color='0.75')
                                                                    ##edgecolor='y'
b=plt.bar(X, A+B, bottom=A, color='w', linestyle='dashed', linewidth=1,edgecolor='y'
plt.show()

Bar plotting or hist plotting

Bar plotting or hist plotting

Controlling a fill pattern
 hatch pattern
#  /
#  \
#  |
#  -
#  +
#  x
#  o
#  O
#  .
#  *

edgecolor parameter will control the color of the hatching.

# In[70]:
import numpy as np
import matplotlib.pyplot as plt


# In[72]:
N = 8
A = np.random.random(N)
B = np.random.random(N)
X = np.arange(N)


# In[77]:
#edgecolor parameter will control the color of the hatching.
plt.bar(X, A, color='w', hatch='x', edgecolor='k')
plt.bar(X, A+B, bottom=A, color='W', hatch='/', edgecolor='k')

plt.show()

Bar plotting or hist plotting

 

Bar Plots

The plot.bar() and plot.barh() make vertical and horizontal bar plots, respectively

 

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

 

fig, axes = plt.subplots(2,1) #row=2  column=1

data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop')) #rand(): [0,1)

 

data.plot.bar(ax=axes[0], color='k', alpha=0.7, rot=0)

data.plot.barh(ax=axes[1],color='b', alpha=0.7)

 

plt.show()

Bar plotting or hist plotting

 

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

 

df = pd.DataFrame(np.random.rand(6,4),

                  index=['one', 'two', 'three', 'four', 'five', 'six'],

                  columns=pd.Index(['A','B','C','D'], name='Genus'))

df

Bar plotting or hist plotting

df.plot.barh(stacked=True,alpha=0.5,rot=0)

plt.legend(loc='upper right',title='Genus')

plt.show()

Bar plotting or hist plotting

A useful recipe for bar plots is to visualize a Series’s value frequency using value_counts: s.value_counts().plot.bar().

#######################################################

tips.csv

Bar plotting or hist plotting

#######################################################

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

 

tips = pd.read_csv('../examples/tips.csv')

tips.head()

Bar plotting or hist plotting

                                     #axis[0]   #axis[1]

party_counts = pd.crosstab(tips['day'], tips['size'])

party_counts

Bar plotting or hist plotting

party_counts = party_counts.loc[:,2:5]  #label[2,3,4,5]

party_counts

Bar plotting or hist plotting

                        # Normalize to sum to 1

party_pcts = party_counts.div(party_counts.sum(1), axis=0)

#groub by (axis=0) day, 16/(16+1+1+0)= 0.888889

party_pcts

Bar plotting or hist plotting

party_pcts.plot.bar(rot=90)

plt.show()

Bar plotting or hist plotting

Conclusion:

So you can see that party sizes appear to increase on the weekend in this dataset.

 

seaborn

import seaborn as sns

import matplotlib.pyplot as plt

 

tips = pd.read_csv('../examples/tips.csv')

tips.head()

Bar plotting or hist plotting

 

#0.063204       = 1.01        / (16.99              - 1.01       )

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])

tips.head()

Bar plotting or hist plotting

sns.barplot(data=tips, x='tip_pct', y='day', orient='h')

sns.set(style=None)

plt.show() #The black lines drawn on the bars represent the 95% confidence interval

Bar plotting or hist plotting

sns.barplot(data=tips, x='tip_pct', y='day', orient='h', hue='time')

sns.set(style='whitegrid')

plt.legend(loc='center right', title='time')

plt.show()

Bar plotting or hist plotting

help(sns.set)

help(sns.axes_style)

 

Histograms and Density Plots

A histogram is a kind of bar plot that gives a discretized display of value frequency. The data points are split into discrete, evenly spaced bins, and the number of data points in each bin is plotted.

import seaborn as sns

import matplotlib.pyplot as plt

 

tips = pd.read_csv('../examples/tips.csv')

tips.head()

Bar plotting or hist plotting

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])   #sorted then split the data points (depend on their values)

tips.head()

Bar plotting or hist plotting

tips['tip_pct'].plot.hist(bins=50) #sorted tips['tip_pct'] then split the data points (depend on their values) to 50 bins

plt.title('Histogram of tip percentages')

plt.show()

Bar plotting or hist plotting

A related plot type is a density plot, which is formed by computing an estimate of a

continuous probability distribution that might have generated the observed data.

 

density plots are also known as kernel density estimate (KDE) plots.

Using plot.kde makes a density plot using the conventional mixture-of-normals estimate

 

tips['tip_pct'].plot.density()

plt.title('Density plot of tip percentages')

plt.show()

Bar plotting or hist plotting

高斯分布(Gaussian Distribution)的概率密度函数(probability density function):

Bar plotting or hist plotting

np.random.randn(size)所谓标准正态分布(μ=0,σ=1),对应于np.random.normal(loc=0, scale=1, size)

                                            #normal distribution mu=0, sigma=1=std.dev

Seaborn makes histograms and density plots even easier through its distplot

method, which can plot both a histogram and a continuous density estimate simultaneously.

 

import seaborn as sns

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

 

                      #mu=0  sigma=1=std.dev  sampling size=200

comp1 = np.random.normal(0,1,size=200)

                      #mu=10, sigma=2=std.dev

comp2 = np.random.normal(10,2, size=200)

values= pd.Series(np.concatenate([comp1, comp2]))

sns.distplot(values, bins=100, color='k')

 

plt.title('Normalized histogram of normal mixture with density estimate')

plt.show()

Bar plotting or hist plotting

Figures and Subplots

Plots in matplotlib reside within a Figure object.

import matplotlib.pyplot as plt

 

fig = plt.figure()

ax = fig.add_subplot(1,1,1)

#pass no label or label='_nolegend_'

ax.plot(randn(1000).cumsum(), color='k', label='one')

ax.plot(randn(1000).cumsum(), color='k', linestyle='--', label='two')

ax.plot(randn(1000).cumsum(), color='k', linestyle='dotted',label='three')

 

ticks = ax.set_xticks([0,250,500,750,1000])

labels = ax.set_xticklabels(['one','two', 'three', 'four', 'five'], rotation=30, fontsize='small')

 

ax.set_title('My first matplotlib plot')

ax.set_xlabel('Stages')

////////////////////////////////////////

props = {

        'title': 'My first matplotlib plot',

        'xlabel': 'Stages'

}

ax.set(**props)

////////////////////////////////////////

ax.legend(loc='best')

plt.show()

Bar plotting or hist plotting

matplotlib draws on the last figure and subplot used (creating one if necessary), thus hiding the figure and subplot creation.

plt.plot(np.random.randn(50).cumsum(), color='black', ls='--')

 

matplotlib includes a convenience method, plt.subplots, that creates a new figure and returns a NumPy array containing the created subplot objects, the axes array can be easily indexed like a two-dimensional array; for example, axes[0, 1].

Bar plotting or hist plotting

Adjusting the spacing around subplots

plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

 

# 1

fig, axes = plt.subplots(2,2, sharex=True, sharey= True)

for i in range(2):

    for j in range(2):

        axes[i,j].hist(np.random.randn(500), bins =5, color='k', alpha=0.5)

plt.subplots_adjust(wspace=0.05, hspace=0.05)

Bar plotting or hist plotting

# 2

from numpy.random import randn

arr=randn(30)

arrCumSum=arr.cumsum()

plt.plot(arrCumSum, color='k', linestyle='dashed', drawstyle='steps-post', label='steps-post', marker='o')

plt.legend(loc='best')     #label='steps-post'

plt.show()

Bar plotting or hist plotting

Annotations and Drawing on a Subplot

Bar plotting or hist plotting

 

import numpy as np

import pandas as pd

from datetime import datetime

                                       #index_col : int or sequence or False, default None

data = pd.read_csv('../examples/spx.csv',parse_dates=True, index_col=0)

spx = data['SPX']  #'SPX' column

 

crisis_data=[

    (datetime(2007, 10, 11), 'Peak of bull market'),  #tuple

    (datetime(2008,  3, 12), 'Bear Stearns Fails'),

    (datetime(2008,  9, 15), 'Lehman Bankruptcy')

]

# // matplotlib Configuration

plt.rc('figure', figsize=(10,10))

font_options={

    'family': 'monospace',

    'weight': 'bold',

    'size': 16

}

plt.rc('font', **font_options)

 

fig = plt.figure()

ax = fig.add_subplot(1,1,1)

 

spx.plot(ax=ax, color='green', linestyle='-')

 

for date, label in crisis_data:

    ax.annotate(  label,

                ha='left',

                va='top',

                xytext=(date, spx.asof(date) + 225), #The xytext parameter specifies the text position.

                xy=(date, spx.asof(date) + 75),     #The xy parameter specifies the arrow's destination         

                arrowprops=dict(facecolor='blue', headwidth=10, headlength=4, width=2 ),

                #arrowprops={'facecolor':'blue', 'headwidth':10, 'headlength':4, 'width':2}

               )

#Zoom in on 2007-2010

ax.set_xlim(['1/1/2007', '1/1/2011'])

ax.set_ylim([600,1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

 

plt.show()

Bar plotting or hist plotting

Bar plotting or hist plotting

Adding arrows

The aspect of the arrow is controlled by a dictionary passed to the arrowprops parameter: 'arrowstyle': The parameters ''<-'', ''<'', ''-'', ''wedge'',''simple'', and ''fancy'' control the style of the arrow 'facecolor': This is the color used for the arrow. It will be used to set the background and the edge color 'edgecolor': This is the color used for the edges of the arrow's shape 'alpha': This is used to set the transparency level so that the arrow blends with the background

The shrink parameter controls the gap between the arrow's endpoints and the arrow itself.

Bar plotting or hist plotting

Facet Grids分面网格 and Categorical Data类型数据

import seaborn as sns

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

tips = pd.read_csv('../examples/tips.csv')

tips.head()

Bar plotting or hist plotting

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])

tips.head()

Bar plotting or hist plotting

                                         #categorical data                        

sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker', kind='bar', data=tips[tips.tip_pct <1])

plt.show()

Bar plotting or hist plotting

                                         #categorical data                        

sns.factorplot(x='day', y='tip_pct', row='time', col='smoker', kind='bar', data=tips[tips.tip_pct <1])

plt.show()

Bar plotting or hist plotting

 

sns.factorplot(x='tip_pct', y='day', kind='box', data=tips[tips.tip_pct<0.5])

plt.show()

Bar plotting or hist plotting