#向量的运算
x<-seq(1,100,length.out = 10)
x
# %%求余的运算  %/%整除的运算
#向量的运算中一个向量中所包含的元素必须是另一个向量的整数倍
c(1,2,3,4)%in%c(1,2,3,5,6,8,7)
ceiling(c(-2.3,3.14)) #返回不小于x的最大整数
floor(c(-2.3,3.14))#返回不大于x的最大整数
trunc(C(-2.3,3.14))#返回整数部分
round(c(-2.3,3.14))#四舍五入取整数
round(c(-3.114,0.2626),digits=2)#保留两位小数
prod(a)#反映a中连乘的积
a<-1:100
quantile(a,c(0.4,0.6,0.8))#分位数
t<-c(1,2,3,4,8,9)
which.max(t)#返回的值是索引值
which.min(t)
which(t==8)


##矩阵和数组
x<-1:20
m<-matrix(x,4,5)
m
rnames<-c("R1","R2","R3","R4")
cnames<-c("C1","C2","C3","C4","C5")
dimnames(m)<-list(rnames,cnames)
m
x<-1:20
dim(x)<-c(2,2,5)
x

m<-1:24
dim1<-c("A1","B1")
dim2<-c("A2","B2","c2")
dim3<-c("A3","B3","C3","D3")
x<-array(m,c(2,3,4),list(dim1,dim2,dim3))
x

#矩阵的索引
m<-matrix(1:20,4,5,byrow=T)

m

dimnames(m)=list(rnames,cnames)

m
m["R1","C1"]

#列表
a<-1:20
b<-matrix(1:20,4)
c<-mtcars
d<-"this is a test list"
mlist=list(a,b,c,d)
mlist
mlist=list(first=a,second=b,third=c,forth=d)
mlist
#列表的索引
mlist[1]
mlist[c(1,4)]

mlist[-3]#负索引删除列表中的元素



#数据框
#散点图
women
plot(women$height,women$weight)
lm(formula = height~weight,data=women)
#attach
rownames(mtcars)
colnames(mtcars)
#因子
mtcars$cyl
c<-factor(c("red","green","blue","yellow"))
c
f<-factor(mtcars$cyl)
f
plot(mtcars$cyl)
plot(factor(mtcars$cyl))


##缺失数据
#NA代表缺失值
a<-c(1:50,NA)
a
sum(a,na.rm=T)
is.na(a)
install.packages("VIM")
require(grid)
library(VIM)
sleep
is.na(sleep)
# na.omit()去除缺失值
c<- c(NA,1:100,NA)
d<-na.omit(c)
d
#NaN不可能存在的值
#nchar字符串的长度
#length字符串中字符的个数如:
l<-c(456,41,6,777)
length(l)
nchar(l)
 #paste连接字符串
paste("wusuqi","is","nb")
paste(c("wusuqi","is","nb"))
paste("wusqi","is","nb",sep = "==")
names<-c("wusuqi","wangyuting")
paste(names,"is very nb")
#substr提取字符串长度
substr(month.name,start=1,stop = 3)
tem<-substr(month.name,start = 1,stop=3)
toupper(tem)#字母大写
tolower(tem)#字母小写
gsub("^(\\w)","\\U\\1",tolower(tem),perl=T)#首字母大写
gsub("^(\\w)","\\L\\1",toupper(tem),perl=T)#首字母小写


x<-c("A+","AC","b")
grep("A+",x)
grep("A+",x,fixed = T)
grep("A+",x,fixed = F)
match("AC",x)

path<-"User/wusuqi/nb/666"
strsplit(path,"/")
path<-"wusuqi,nb,666"
strsplit(path,",")


face<-1:20
suit<-c("wusuqi","wangyuting","wsq","wyt")
outer(suit,face,FUN=paste)

#日期
Sys.Date()
a<-"2021-3-31"
as.Date(a,format="%Y-%m-%d")
class(as.Date(a,format="%Y-%m-%d"))
seq(as.Date("2021-3-31"),as.Date("2021-4-30"),by=5)
sales<-round(runif(48,min=50,max=100))
sales
ts(sales,start=c(2010,4),end=c(2015,4),frequency = 12)
ts(sales,start=c(2020,1),end=c(2048,5),frequency = 4)
ts(sales,start=c(2020,1),end=c(2048,5),frequency = 1)

c<-matrix(c(1:20),4,4)
c
getwd()
PatientID<-c(1,2,3,4)
ADmDate<-c("10/15/200-","11/01/2009","10/21/2009","10/28/2009")
Age<-c(25,34,28,52)
Diabetes<-c("Type1","Type2","Type3","Type4")

Status<-c("poor","Improved","Excellent","poor")
data<-data.frame(PatientID,ADmDate,Age,Diabetes,Status)
data
?edit
data2<-data.frame(PatientID=character(0),ADmDate=character(0),Age=numeric(),Diabetes=character(),Status=character())
data2<-edit(data2)
data2
fix(data2)
install.packages("RODBC")
setwd("C:/Users/wusuqi/Desktop/Rdata")
read.table("input.txt")
x<-read.table("input.txt")
x
head(x,n=10)
x<-read.table("C:/Users/wusuqi/Desktop/Rdata/input.txt")
x
setwd("C:/Users/wusuqi/Desktop/Rdata")
x<-read.table("input.csv",sep=",")
x
x<-read.table("input.csv",sep=",",header=T)#header=T,将第一行设为变量名称
head(x)
setwd("C:/Users/wusuqi/Desktop/Rdata")
read.table("input 1.txt",header = T,skip = 5)#从第六行读取信息
read.table("input 1.txt",header = T,skip = 50,nrows=200)#从第51行读取信息到200行
install.packages("XML")
library(XML)
help(package="foreign")
readClipboard()#读取剪切版的信息
read.table("clipboard",header=T,sep="\t")#读取剪切板的信息
setwd("C:/Users/wusuqi/Desktop/Rdata")
read.table(gzfile("input.txt.gz"))#读取压缩文件中
readLines("input.csv",n=)#读取五行的数据
x<-rivers
x
write(x,file = "x.txt")
getwd()
write.table(x,file="C:/Users/wusuqi/Desktop/newfile.txt")#写入文件
write.table(iris,file="C:/Users/wusuqi/Desktop/Rdata/newfile.txt",col.names=F,append=T)
write.table(mtcars,gzfile("newfile.txt.gz"))#写入压缩包
read.table("mtcars.csv",sep=",",header = T)
install.packages("XLConnect")
#读取excel格式数据
library(readxl)
read_excel("file.xlsx",range=NULL,sheet=NULL,col_names = T)
setwd("C:/Users/wusuqi/Desktop/Rdata")
read.csv("mtcars.csv",header = T,sep=",")
read.table("clipboard",header = T,sep="\t")
library(readxl)
read_excel("mtcars.xlsxz",sheet=NULL,range=NULL,col_names=T)
cars32<-read_excel("mtcars.xlsx",sheet=NULL,range=NULL,col_names=T)
#安装xlsx包
install.packages("rJava")
install.packages("xlsx")
install.packages("xlsxjars")
library(xlsx)
setwd("C:/Users/wusuqi/Desktop/Rdata")
cars32<-read.xlsx("mtcars.xlsx",sheetIndex =1,header = T) 
methods(is)
#将向量转化为矩阵
x<-state.abb
x
dim(x)<-c(10,5)
x
#将矩阵转化为数据框
x<-state.x77
x
data.77<-as.data.frame(x)
data.77
#转化为因子类型
x<-state.abb
x
as.factor(x)
as.list(x)#转化为列表
state<-data.frame(x,state.region,state.x77)
state$Income
state["Nevada",]


who<-read.csv("WHO.csv",header = T)
head(who)
who1<-who[c(1:50),c(1:10)]


who2<-who[c(1,3,5,8),c(2,4,6,8)]
who2
who$Continent
who4<-who[which(who$CountryID>50,who$Country<100)]
who4


x<-1:100
sample(x,30)#在x中抽样抽一百个数字
sample(x,100,replace=T)#有放回的抽样
sample(x,100,replace=F)#无放回的抽样
sort(sample(x,60,replace = F))
who5<-sample(who$CountryID,30,replace = T)
who

mtcars
mtcars[-1:-5]#删除对应行
mtcars[,-1:-4]#删除对应列
mtcars$mpg<-NULL#删除某一行或者某一列的数据
mtcars


#添加数据集
data.frame(USArrests,state.division)
cbind(USArrests,state.division)#合并列
data1<-head(USArrests,20)
data2<-tail(USArrests,20)
data<-rbind(data1,data2)#合并行,必须要有相同的列名
data
rownames(data)
length(rownames(data))
duplicated(data)#查看哪些项是重复值
data[duplicated(data),]#取出重复的部分
data[!duplicated(data),]#取出非重复的部分
unqiue(data)#取出非重复的部分


sractm<-t(mtcars)#转秩
sractm


letters
rev(letters)#反转

women
rev(rownames(women))
women[rev(rownames(women)),]


#修改数据框中的值
women
women$height*2.5
data.frame(women$height*2.5,women$weight)
transform(women,height=height*2.5)
transform(women,cm=height*2.5)#在women中加入了新的一列

#数据框的排序
sort(rivers)
rev(sort(rivers))#相反的排序,sort不能用于数据框的排序
mtcars[sort(rownames(mtcars)),]
#order返回向量中的值所在的位置
mtcars[order(mtcars$cyl,mtcars$disp),]


WorldPhones
worldphones<-as.data.frame(WorldPhones)
cs<-rowSums(worldphones)
cs
ca<-colMeans(worldphones)
ca
tatal<-cbind(worldphones,Totcaal=cs)
tatal
rbind(tatal,ca) 

apply(WorldPhones,MARGIN = 1,FUN=sum)#MARGIN=1对行进行操作,MARGIN对列进行操作。FUN是函数

#lapply返回值是列表,sapply返回值是列表或者矩阵

lapply(state.center,FUN=length)
#tapply用于处理因子
heatmap(state.x77)
x<-c(1,2,3,4,6)
 

#scale函数实现中心化和标准化
x<-scale(state.x77,center = T,scale=T)#center=T做中心化处理,scale=t做标化处理
heatmap(x)
install.packages("reshape2")
library(reshape2)
airquality
head(airquality)
names(airquality)<-tolower(names(airquality))
head(airquality)
aq1<-melt(airquality)
aq1
head(aq1)
aq1<-melt(airquality,id.vars=c("month","day"))#融合数据
aq1
head(aq1,50)
aq2<-dcast(aq1,month+day~variable)
aq2


install.packages(c("tidyr","dplyr"))
library(tidyr)
tdata<-mtcars[1:10,1:3]
tdata<-data.frame(names=rownames(tdata),tdata)
tdata
gather(tdata,key="Key",value="Value",cyl,disp,hp)#gather合并列
gdata<-gather(tdata,key="Key",value="Value",cyl,disp,hp)#gather合并列
gdata
spread(gdata,key="Key",value = "Value")#spread拆分列
df<-data.frame(x=c(NA,"a.b","a.c","a.d"))
df
separate(df,col=x,into=c("A","B"))
df<-data.frame(x=c(NA,"a-b","a-c","a-d"))
separate(df,col=x,into=c("A","B"))
unite(x,col="AB",A,B,sep="-")
library(dplyr)
dplyr::filter(iris,Sepal.Length>7)#:`:是调用dplyr中的filter函数
dplyr::distinct(rbind(iris[1:10,],iris[1:15,]))#distinct去除重复项
dplyr::slice(iris,10:15)#取出任意行
dplyr::sample_n(iris,10)#随机抽取10行
dplyr::sample_frac(iris,0.1)#按比例随机抽取
dplyr::arrange(iris,Sepal.Length)#按length排序
dplyr::arrange(iris,desc(Sepal.Length))#按相反的方向排序
summarise(iris,avg=mean(Sepal.Length))
#%>%将一个函数的输出作为下一个函数的输入,可以用快捷键ctrl+shift+m快捷键打出来
head(mtcars,10)
head(mtcars,10) %>% tail(5)
dplyr::group_by(iris,Species)#分组
iris %>% group_by(Species) %>% summarise()
iris %>% group_by(Species) %>% summarise(avg=mean(Sepal.Width))
iris %>% group_by(Species) %>% summarise(avg=mean(Sepal.Width)) %>% arrange(avg)

dplyr::mutate(iris,new=Sepal.Length+Petal.Length)
a<-data.frame(x1=c("A","B","C"),x2=c(1,2,3))
b<-data.frame(x1=c("A","B","D"),x3=c(T,T,F))
a
b
dplyr::left_join(a,b,by="x1")#左连接
dplyr::right_join(a,b,by="x1")
dplyr::full_join(a,b,by="x1")
dplyr::semi_join(a,b,by="x1")
dplyr::anti_join(a,b,by="x1")
library(dplyr)
mtcars<-mutate(mtcars,Model=rownames(mtcars))
mtcars
first<-slice(mtcars,1:20)
second<-slice(mtcars,10:30)
intersect(first,second)#取交集
dplyr::union(first,second)#取并集
setdiff(first,second)#  取first的补集
setdiff(second,first)#取second的补集
state<-as.data.frame(state.x77[,c("Murder","Population","Illiteracy","Income","Frost")])
fir<-lm(Murder~Population+Illiteracy+Income+Frost,data=state)
summary(fir)
ls()
a<-1:100
plot(a)
ls("package:base")
rnorm(n=100,mean=465,sd=668)
round(rnorm(n=100,mean=465,sd=668))
x<-round(rnorm(n=13,mean=46,sd=55))
qqnorm(x)
runif(1)#随机生成一个零到一之间的随机数
runif(10)*10#随机生成10个零到十之间的随机数
runif(20,min=45,max=57878)
set.seed(666)
runif(51)
set.seed(666)
runif(51)#随机数与set.seed绑定
   
myvars<-mtcars[c("hp","wt","am")]
summary(myvars)
fivenum(myvars$hp)

install.packages("Hmisc")
library(Hmisc)
describe(myvars)

install.packages("pastecs")
library(pastecs)
stat.desc(myvars)
stat.desc(myvars,basic=T)
stat.desc(myvars,desc = T)
stat.desc(myvars,norm = T)

install.packages("psych")
library(psych)
describe(myvars)
describe(myvars,trim=0.1)
library(MASS)
head(Cars93)
aggregate(Cars93[c("Min.Price","Max.Price","MPG.city")],by=list(Manufacturer=Cars93$Manufacturer),mean)
aggregate(Cars93[c("Min.Price","Max.Price","MPG.city")],by=list(Manufacturer=Cars93$Origin),mean)
aggregate(Cars93[c("Min.Price","Max.Price","MPG.city")],by=list(Manufacturer=Cars93$Manufacturer),sd)

install.packages("doBy")
library(doBy)
summaryBy(hp+wt~am,data=myvars,FUN=mean)
describe.by(myvars,list(am=mtcars$am))
mtcars$cyl<-as.factor(mtcars$cyl)
split(mtcars,mtcars$cyl)
mtcars
cut(mtcars$mpg,c(seq(10,50,10)))
table(mtcars$cyl)
table(cut(mtcars$mpg,c(seq(10,50,10))))
prop.table(table(mtcars$cyl))
prop.table(table(mtcars$cyl))*100
library(vcd)
Arthritis
table(Arthritis$Treatment,Arthritis$Improved)
with(data=Arthritis,{table(Treatment,Improved)})

xtabs(~Treatment+Improved,data=Arthritis)
  
library(vcd)
mytable<-table(Arthritis$Treatment,Arthritis$Improved)
chisq.test(mytable)

fisher.test(mytable)
mytable<-table(Arthritis$Sex,Arthritis$Improved)
chisq.test(mytable)
cor(state.x77)
install.packages("ggm")
library(ggm)
library(MASS)
UScrime
t.test(Prob~So,data=UScrime)
women
plot(women$height)
plot(women$height,women$weight)
plot(as.factor(women$height))
plot(mtcars$cyl)
plot(as.factor(mtcars$cyl))
plot(women$height~women$weight)
plot(as.factor(mtcars$cyl),col=c("red","blue","green"))
cor
for(i in 1:10){print("hello world")}
i=1;while(i<=10) {print("hello world")}
setwd("C:/Users/wusuqi/Desktop")
save.image("bilibili.R")
getwd()