书籍:《R语言与数据挖掘》
作者:张良均
出版社:机械工业出版社
ISBN:9787111540526
本书由北京华章图文信息有限公司授权杭州云悦读网络有限公司电子版制作与发行
版权所有·侵权必究
lattice包
lattice包的图形参数可通过trellis.par.get()函数来获取,并用trellis.par.set()函数来修改。show.settings()函数可展示当前的图形参数设置情况。
lattice包可以通过添加条件变量,创建出各个水平下的面板。一般情况下,条件变量是因子型变量,若条件变量为连续性,则需要先将连续型变量转换为离散变量,再将其设置为条件变量。
一般参数:
通过添加条件变量,可以创建出各个水平下的面板。若想要把不同水平的图形结果叠加到一起,则可以将变量设定为分组变量。
分组变量v的设定格式为:graph_function(formula, data = , qroup = v)
lattice包不识别par()设置,需要新的方法完成页面摆放。最简单的方法便是先将lattice图形存储到对象中,然后利用plot函数中的split = 和positinotallow= 选项来进行控制。
split的四个选项将页面分割为一个指定行数和列数的矩阵,然后将图形放置到该矩阵中。这四个选项分别为:图形所处的列,图形所处的行,列的总数,行的总数。
常见参数说明
xyplot()函数-散点图
library(lattice)
xyplot(mpg ~ wt, data = mtcars, xlab = "Weight", ylab = "Miles per Gallon")
displacement <- equal.count(mtcars$disp, number = 3, overlap = 0)
xyplot(mpg ~ wt | displacement, data = mtcars,
main = "Miles per Gallon vs. Weight by Engine
Displacement", xlab = "Weight", ylab = "Miles per Gallon", layout = c(3, 1), )
- xyplot的各个参数意义,比如layout、main、xlim、col、pch等请参照:【传送门】
表达式形式通常为:
y~x|A*B
在竖线左边的变量称为主要( primary)变量,右边的变量称为条件( conditioning)变量。
# 绘制添加回归线、光滑曲线、轴须和网格线的散点图
panel <- function(x, y) {
panel.lmline(x, y, col = "red", lwd = 1, lty = 2)
panel.loess(x, y)
panel.grid(h = -1, v = -1)
panel.rug(x, y)
panel.xyplot(x, y)
}
xyplot(mpg ~ wt, data = mtcars, xlab = "Weight", ylab = "Miles per Gallon",
main = " Miles per Gallon on Weight", panel = panel)
# 查看所有设置的列表
names(trellis.par.get())
show.settings()
# 以发动机气缸数量为分组变量的散点图
xyplot(mpg ~ wt, data = mtcars, groups = factor(cyl), pch = 1:3, col = 1:3,
main = "Miles per Gallon vs Weight by Cylinder",
xlab = "Weight", ylab = "Miles per Gallon",
key = list(space = "right", title = "Cylinder", cex.title = 1, cex = 1,
text = list(levels(factor(mtcars$cyl))),
points = list(pch = 1:3, col = 1:3)))
# 同一页面的散点图和添加条件变量的散点图
graph1 <- xyplot(mpg ~ wt, data = mtcars, xlab = "Weight", ylab = "Miles per Gallon")
graph2 <- xyplot(mpg ~ wt | displacement, data = mtcars, xlab = "Weight",
ylab = "Miles per Gallon", layout = c(3, 1))
plot(graph1, split = c(1, 1, 2, 1))
plot(graph2, split = c(2, 1, 2, 1), newpage = FALSE)
或者
plot(graph1, position = c(0, 0, 0.5, 1))
plot(graph2, position = c(0.5, 0, 1, 1), newpage = FALSE)
一般查看数据框的内部结构
barchart()条形图
# 利用str函数查看数据结构
str(Titanic)
str(Titanic)
'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
- attr(*, "dimnames")=List of 4
..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
..$ Sex : chr [1:2] "Male" "Female"
..$ Age : chr [1:2] "Child" "Adult"
..$ Survived: chr [1:2] "No" "Yes"
# 绘图对象为table数据时的条形图
barchart(Titanic, auto.key = TRUE)
# 修改图例,x轴组距自由
barchart(Titanic, layout = c(4, 1),
auto.key = list(title = "Survived", columns = 2),
scales = list(x = "free")) # 将x轴坐标设置为free
# 绘图对象为表达式,数据结构为数据框时的条形图
barchart(Class ~ Freq | Sex + Age, data = as.data.frame(Titanic),
groups = Survived, stack = TRUE,
auto.key = list(title = "Survived", columns = 2, cex = 0.6))
# x轴组距自由
barchart(Class ~ Freq | Sex + Age, data = as.data.frame(Titanic),
groups = Survived, stack = TRUE,
auto.key = list(title = "Survived", columns = 2, cex = 0.6),
scales = list(x = "free"))
# 显示定制面板函数
# 将lattice的高级绘图函数创建的栅栏图存在mygraph对象中
mygraph <- barchart(Class ~ Freq | Sex + Age, data = as.data.frame(Titanic),
groups = Survived, stack = TRUE,
auto.key = list(title = "Survived", columns = 2, cex = 0.6),
scales = list(x = "free"))
# 通过update函数给mygraph图形增加垂直网格线, 并将条形边框设置为透明色
update(mygraph, panel = function(...) {
panel.grid(h = 0, v = -1)
panel.barchart(..., border = "transparent")
})
dotplot()点图
dotplot(VADeaths, pch = 1:4, col = 1:4, xlab = "Rate (per 1000)",
main = list("Death Rates in Virginia - 1940", cex = 0.8),
key = list(column = 4, text = list(colnames(VADeaths)),
points = list(pch = 1:4, col = 1:4)))
dotplot(VADeaths, groups = FALSE,
main = list("Death Rates in Virginia - 1940", cex = 0.8),
xlab = "Rate (per 1000)")
dotplot(VADeaths, groups = FALSE, layout = c(1, 4), origin = 0, type = c("p", "h"),
main = list("Death Rates in Virginia - 1940", cex = 0.8), xlab = "Rate (per 1000)")
histogram()直方图
library(lattice)
library(nutshell)
data(births2006.smpl)
histogram( ~ DBWT|DPLURAL, data = births2006.smpl, main = "Births in the United States, 2006",
layout = c(1, 5), xlab = "Birth weight, in grams")
densityplot()核密度图
densityplot( ~ DBWT | DPLURAL, data = births2006.smpl, layout = c(1, 5), plot.points = FALSE,
main = "Births in the United States, 2006", xlab = "Birth weight, in grams")
densityplot( ~ DBWT, groups = DPLURAL, data = births2006.smpl, plot.points = FALSE,
main = "Births in the United States, 2006", xlab = "Birth weight, in grams",
lty = 1:5, col = 1:5, lwd = 1.5,
key = list(text = list(levels(births2006.smpl$DPLURAL)),
column = 3, lines = list(lty = 1:5, col = 1:5)))
stripplot()带状图
stripplot( ~ DBWT, data = births2006.smpl, main = "Births in the United States, 2006",
subset = (DPLURAL == "5 Quintuplet or highter" | DPLURAL == "4 Quadruplet"),
jitter.data = TRUE, xlab = "Birth weight, in grams")
qq图
library(lattice)
qqmath( ~ height | voice.part, data = singer, prepanel = prepanel.qqmathline,
panel = function(x, ...) {
panel.qqmathline(x, ...)
panel.qqmath(x, ...)
})
qq(voice.part ~ height, aspect = 1, data = singer,
subset = (voice.part == "Bass 2" | voice.part == "Tenor 1"))
bwplot()箱线图
bwplot( ~ height| voice.part, data = singer, xlab = "Height (inches)")
bwplot(voice.part ~ height, data = singer, xlab = "Height (inches)")
散点图矩阵
# 散点图
xyplot(Sepal.Length ~ Sepal.Width | Species, data = iris)
# 散点图矩阵
splom(mtcars[c(1, 3:7)], groups = mtcars$cyl, pscales = 0, pch = 1:3, col = 1:3,
varnames = c("Miles\nper\ngallon", "Displacement\n(cu. in.)", "Gross\nhorsepower",
"Rear\naxle\nratio", "Weight", "1 / 4 mile\ntime"),
key = list(columns = 3, title = "Number of Cylinders",
text = list(levels(factor(mtcars$cyl))),
points = list(pch = 1:3, col = 1:3)))
热力图
# 5.1.2 # 三维水平图
library(lattice)
data(Cars93, package = "MASS")
cor.Cars93 <-cor(Cars93[, !sapply(Cars93, is.factor)], use = "pair")
levelplot(cor.Cars93, scales = list(x = list(rot = 90)))
等高线
contourplot(volcano, cuts = 20, label = FALSE)
三维散点图
par.set <-list(axis.line = list(col = "transparent"), clip = list(panel = "off"))
cloud(Sepal.Length ~ Petal.Length * Petal.Width, data = iris, groups = Species,
cex = .8, pch = 1:3, col = c("blue", "red", "green"),
screen = list(z = 20, x = -70, y = 0), par.settings = par.set,
scales = list(col = "black"),
key = list(title = "Species", column = 3, text = list(levels(iris$Species)),
points = list(pch = 1:3, col = c("blue", "red", "green"))))
三维曲面图
# 5.1.2 # 对volcano绘制三维曲面图
wireframe(volcano, shade = TRUE, aspect = c(61 / 87, 0.4), light.source = c(10, 0, 10))
ggplot2
不得不提到qplot。
功能:快速作图(quick plot)。
使用格式:
qplot(x, y = NULL, …, data,facets = NULL, margins = FALSE, geom = “auto”, stat = list(NULL), position = list(NULL), xlim = c(NA, NA), ylim = c(NA, NA), log = “”, main = NULL, xlab = deparse(substitute(x)), ylab = deparse(substitute(y)), asp = NA)
其中,facets是图形/数据的分面,geom指图形的几何类型,stat指图形的统计类型,position可对图形或者数据的位置调整,其他参数与plot函数类似。
箱线图
qplot版本:
library(ggplot2)
qplot(Species, Sepal.Length, data = iris, geom = "boxplot", fill = Species,
main = "依据种类分组的花萼长度箱线图")
ggplot2版本:
ggplot(iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_violin() + geom_jitter() + labs(title = “依据种类分组的花萼长度小提琴图”)
小提琴图
qplot版本
qplot(Species, Sepal.Length, data = iris, geom = c("violin", "jitter"),
fill = Species, main ="\n依据种类分组的花萼长度小提琴图\n")
ggplot版本
library(ggplot2)
ggplot(iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_boxplot() + labs(title = "依据种类分组的花萼长度箱线图")
散点图
qplot版本
qplot(Sepal.Length, Sepal.Width, data = iris, colour = Species, shape = Species,
main = "绘制花萼长度和花萼宽度的散点图")
qplot(Wind,Temp,data=airquality,colour=Month)
分面板散点图
qplot(Sepal.Length, Sepal.Width, data = iris, geom = c("point", "smooth"),
facets = ~ Species, colour = Species, main = "绘制分面板的散点图")
对图形进行分面
data(singer, package = "lattice")
ggplot(data = singer, aes(x = height, fill = voice.part)) +
geom_density() +
facet_grid(voice.part ~ .)
分面板的密度图
ggplot(data = singer, aes(x = height, fill = voice.part)) +
geom_density() +
facet_wrap( ~ voice.part, ncol = 4) +
theme(legend.position = "none")
利用ggplot函数改变图形颜色
# 方式一:使用scale_color_manual函数
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, colour = Species)) +
scale_color_manual(values = c("orange", "olivedrab", "navy")) +
geom_point(size = 2)
# 方式二:使用scale_color_brewer函数
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, colour = Species)) +
scale_color_brewer(palette = "Set1") +
geom_point(size = 2)
图片保存
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, colour = Species)) +
geom_point(size = 2)
ggsave(file = "mygraph.pdf", width = 5, height = 4)
交互式可视化
这部分暂时还没有用到
直接贴代源代码
# 把“数据及程序”文件夹拷贝到F盘下,再用setwd设置工作空间
setwd("F:/数据及程序/第5章/示例程序")
# 5.3.1 # rCharts包的安装代码
require(devtools)
library(curl)
install_github('ramnathv/rCharts')
# 5.3.1 # 散点图
library(rCharts)
names(iris) <- gsub("\\.", "", names(iris))
rPlot(SepalLength ~ SepalWidth | Species, data = iris, color = 'Species',
type = 'point')
# 5.3.1 # 交互分组柱状图
library(rCharts)
hair_eye_male <- subset(as.data.frame(HairEyeColor), Sex == "Male")
hair_eye_male[, 1] <- paste0("Hair", hair_eye_male[, 1])
hair_eye_male[, 2] <- paste0("Eye", hair_eye_male[, 2])
nPlot(Freq ~ Hair, group = "Eye", data = hair_eye_male, type = "multiBarChart")
# 5.3.1 # 交互气泡图
a <- hPlot(Pulse ~ Height, data = MASS::survey, type = "bubble", title = "Zoomdemo",
subtitle = "bubblechart", size = "Age", group = "Exer")
a$colors('rgba(223,83,83,.5)', 'rgba(119,152, 91,.5)', 'rgba(60,179,113,.5)')
a$chart(zoomType = "xy")
a$exporting(enabled = T)
a
# 5.3.1 # 时间序列图。
data(economics, package = 'ggplot2')
dat <- transform(economics, date = as.character(date))
p1 <- mPlot(x = "date", y = list("psavert", "uempmed"), data = dat,
type = 'Line', pointSize = 0, lineWidth = 1)
p1
# 5.3.1 # 将时间序列图变成面积图
p1$set(type = "Area")
p1
# 5.3.2 # 安装代码
library(devtools)
install_github("yihui/recharts")
# 5.3.2 # 利用recharts包绘制散点图
source("./code/echartR.R")
library(recharts)
echartR(data = iris, x = ~ Sepal.Length, y = ~ Petal.Length, series = ~ Species,
type = 'scatter', palette = "Set1",
markLine = rbind(c(1, 'LinearRegCoef', 'lm', T), c(2, 'LinearRegCoef', 'lm', T),
c(3, 'LinearRegCoef', 'lm', T)))
# 5.3.3 # 利用gvisMotionChart函数绘制功能强大的交互图
library(googleVis)
M1 <- gvisMotionChart(Fruits, idvar = "Fruit", timevar = "Year")
plot(M1)
# 5.3.4 # 利用leaflet函数绘制的交互地图
library(leaflet)
leaflet() %>%
addTiles() %>%
addMarkers(lng = 174.768, lat = -36.852, popup = "ThebirthplaceofR")
# 5.3.4 # 图 5 39利用dygraphs函数绘制的交互时序图
library(dygraphs)
LTV <- read.csv("./data/LTV.csv")
LTV.ts <- ts(LTV)
dygraph(LTV.ts, main = "LTVforecast") %>%
dySeries("V1", label = "LTV", strokeWidth = 3) %>%
dyOptions(colors = "red", fillGraph = TRUE, fillAlpha = 0.4) %>%
dyHighlight(highlightCircleSize = 5,
highlightSeriesBackgroundAlpha = 0.2,
hideOnMouseOut = FALSE) %>%
dyAxis("x", drawGrid = FALSE) %>%
dyAxis("y", label = "LTV(LifeTimeValue)") %>%
dyRangeSelector()
# 5.3.4 # 利用函数plot_ly绘制的交互散点图
library(plotly)
pal <- RColorBrewer::brewer.pal(nlevels(iris$Species), "Set1")
plot_ly(data = iris, x = ~ Sepal.Length, y = ~ Petal.Length, color = ~ Species,
colors = pal, mode = "markers")
# 5.3.4 # 由ggplot2转化的交互散点图
p <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Length, colour = Species)) +
scale_color_brewer(palette = "Set1") +
geom_point()
ggplotly(p)
# 5.3.4 # 交互数据表格
library(DT)
datatable(iris)
# 5.3.4 # 利用simpleNetwork绘制简单网络图
library(networkD3)
src <- c("A", "A", "A", "A", "B", "B", "C", "C", "D")
target <- c("B", "C", "D", "J", "E", "F", "G", "H", "I")
networkData <- data.frame(src, target)
simpleNetwork(networkData, zoom = T)
# 5.3.4 # 利用forceNetwork绘制力导向图
data(MisLinks)
data(MisNodes)
forceNetwork(Links = MisLinks, Nodes = MisNodes, Source = "source", Target = "target",
Value = "value", NodeID = "name", Group = "group", opacity = 0.8)
# 5.3.5 # 基本的网页界面布局UI代码
library(shiny)
shinyServer(function(input, output) {
output$distPlot <- renderPlot({
x <- faithful[, 2]
bins <- seq(min(x), max(x), length.out = input$bins + 1)
hist(x, breaks = bins, col = 'darkgray', border = 'white')
})
})
# 相应的ui.R如下:
library(shiny)
shinyUI(fluidPage(
titlePanel("Old Faithful Geyser Data"),
sidebarLayout(
sidebarPanel(
sliderInput("bins", "Number of bins:", min = 1, max = 50, value = 30)),
mainPanel(
plotOutput("distPlot")))))
# 5.3.5 # 自动生成一个网页展示结果
library(shiny)
runApp("./code/myapp")
# 5.3.5 # shinyApp执行app
library(shiny)
ui <- fluidPage(
numericInput(inputId = "n",
"Samplesize", value = 25),
plotOutput(outputId = "hist")
)
server <- function(input, output) {
output$hist <- renderPlot({
hist(rnorm(input$n))
})
}
shinyApp(ui = ui, server = server)
# 5.3.5 # 得到shinydashboard的基本框架
library(shiny)
library(shinydashboard)
ui <- dashboardPage(
dashboardHeader(),
dashboardSidebar(),
dashboardBody()
)
server <- function(input, output) {}
shinyApp(ui, server)
# 5.3.5 # 用renderPlot()函数将图形赋予输出对象mygraph形式
# server.R #
output$mygraph <- renderPlot({
graph_function(formula, data = ,…)
})
# ui.R #
plotOutput(“mygraph”)
# 5.3.5 # 评价线性模型拟合情况可视化
# server.R #
output$lm.fit <- renderPlot({
fit <- lm(Sepal.Length ~ Sepal.Width, data = iris[, 1:4])
par(mfrow = c(2, 2), pch = "*", bg = "aliceblue")
plot(fit)
})
# ui.R #
plotOutput("lm.fit")
# 5.3.5 # 用renderChart()函数将图形赋予输出对象将图形输出到web中
# server.R #
output$mygraph <- renderChart({
p1 <- hPlot(formula, data, type, …)
p1$addParams(dom = ”mygraph”)
return(p1)
})
# ui.R #
showOutput(“mygraph”, ”highcharts”)
# 5.3.5 # nPlot函数绘制的交互柱状图web展示
# server.R #
output$mychart1 <- renderChart({
hair_eye_male <- subset(as.data.frame(HairEyeColor), Sex == "Male")
hair_eye_male[, 1] <- paste0("Hair", hair_eye_male[, 1])
hair_eye_male[, 2] <- paste0("Eye", hair_eye_male[, 2])
p1 <- nPlot(Freq ~ Hair, group = "Eye", data = hair_eye_male, type = "multiBarChart")
p1$chart(color = c('brown', 'blue', '#594c26', 'green'))
p1$addParams(dom = "mychart1")
return(p1)
})
# ui.R #
showOutput("mychart1", "nvd3")
# 5.3.5 # renderDataTable()函数
# server.R #
output$mytable <- renderDataTable({
datatable(data)
})
# ui.R #
dataTableOutput(“mytable”)
# 5.3.5 # renderForceNetwork()函数
# server.R #
output$mygraph <- renderForceNetwork({
forceNetwork(…)
})
# ui.R #
forceNetworkOutput(“mygraph”)