Visualizing Data via R (box-plot,histogram,scatter)
Visualizing Data via R (box-plot,histogram,violin,scatter)
load data
library("AzureML")
ws = workspace()
auto.price = download.datasets(ws, "Automobile price data (Raw)")
## Coerce some character columns to numeric
cols <- c('price', 'bore', 'stroke', 'horsepower', 'peak.rpm')
## 替换未知值为NA
auto.price[, cols] = lapply(auto.price[, cols], function(x) ifelse(x == '?', NA, x))
## 字符串替换为数字
auto.price[, cols] = lapply(auto.price[, cols], as.numeric)
## remove rows with NAs
auto.price = auto.price[complete.cases(auto.price), ]
## Add a log transformed column for price
auto.price$lnprice = log(auto.price$price)
## Consolidate the number of cylinders
auto.price$num.cylinders =
ifelse(auto.price$num.of.cylinders %in% c("four", "three"), "three-four",
ifelse(auto.price$num.of.cylinders %in% c("five", "six"), "five-six", "eight-twelve"))
head(auto.price)
数据处理结果
Create a Pair-Wise Scatter Plot
library(ggplot2)
library(repr)
options(repr.plot.width=8, repr.plot.height=8)
num.cols = c("wheel.base",
"width",
"height",
"curb.weight",
"engine.size",
"bore",
"compression.ratio",
"city.mpg",
"price",
"lnprice")
pairs(~ ., data = auto.price[, num.cols])
十个属性成对散点图
Create Histograms
## Function to plot conditioned histograms
options(repos = c(CRAN = "http://cran.rstudio.com"))
install.packages('gridExtra')
options(repr.plot.width=6, repr.plot.height=3)
auto.hist = function(x) {
library(ggplot2)
library(gridExtra)
## Compute the bin width
rg = range(auto.price[,x])
bw = (rg[2] - rg[1])/30
## Define the title
title <- paste("Histogram of", x, "conditioned on type of drive wheels")
## Create the histogram
ggplot(auto.price, aes_string(x)) +
geom_histogram(aes(y = ..count..), binwidth = bw) +
facet_grid(. ~ drive.wheels) +
ggtitle(title)
}
## Create histograms for specified features.
plot.cols2 = c("length",
"curb.weight",
"engine.size",
"city.mpg",
"price")
lapply(plot.cols2, auto.hist)
按照drive.wheels不同取值对每一个属性画直方图
Create Box Plots
## Function to create conditioned box plots
auto.box = function(x) {
title = paste("Box plot of", x, "by type of drive wheels")
ggplot(auto.price, aes_string('drive.wheels', x)) +
geom_boxplot() +
ggtitle(title)
}
lapply(plot.cols2, auto.box)
结果如下 geom_boxplot()
Create Scatter Plots
## Scatter plot using color to differentiate points
scatter.auto = function(x){
require(ggplot2)
title = paste("price vs.", x, 'with color by num.cylinders')
ggplot(auto.price, aes_string(x, 'price')) +
geom_point(aes(color = factor(num.cylinders))) +
ggtitle(title)
}
## Define columns for making scatter plots
plot.cols3 = c("length",
"curb.weight",
"engine.size",
"city.mpg")
lapply(plot.cols3, scatter.auto)
结果如下:geom_point()。aes(): Aesthetic mappings describe how variables in the data are mapped to visual properties (aesthetics) of geoms. factor()将类别属性作为颜色影响因子。