R语言实战-读书笔记 (第4章 图形初阶)

***********************************

与导图结合的脚本文件:
创建脚本:文件——新建脚本程序,将以下代码复制粘贴至脚本内,选中右键运行当前或所选代码。

#第四章 基本数据管理#

#创建数据集#
manager <- c(1, 2, 3, 4, 5)
date <- c("10/24/08", "10/28/08", "10/1/08", "10/12/08", "5/1/09")
country <- c("US", "US", "UK", "UK", "UK")
gender <- c("M", "F", "F", "M", "F")
age <- c(32, 45, 25, 39, 99)
q1 <- c(5, 3, 3, 3, 2)
q2 <- c(4, 5, 5, 3, 2)
q3 <- c(5, 2, 5, 4, 1)
q4 <- c(5, 5, 5, NA, 2)
q5 <- c(5, 5, 2, NA, 1)
leadership <- data.frame(manager, date, country, gender, age,
q1, q2, q3, q4, q5, stringsAsFactors=FALSE)

#创建新变量#
mydata<-data.frame(x1 = c(2, 2, 6, 4),
x2 = c(3, 4, 2, 8))
mydata$sumx <- mydata$x1 + mydata$x2
mydata$meanx <- (mydata$x1 + mydata$x2)/2

attach(mydata)
mydata$sumx <- x1 + x2
mydata$meanx <- (x1 + x2)/2
detach(mydata)

mydata <- transform(mydata,
sumx = x1 + x2,
meanx = (x1 + x2)/2)
mydata

#变量重编码#
leadership <- within(leadership,{
agecat <- NA
agecat[age > 75] <- "Elder"
agecat[age >= 55 & age <= 75] <- "Middle Aged"
agecat[age < 55] <- "Young" })
leadership

#变量重命名#
install.packages("plyr")
library(plyr)
leadership <- rename(leadership,
c(manager="managerID", date="testDate"))
leadership

#缺失值#
is.na(leadership[,6:10])
#重编码某些值为缺失值#
leadership$age[leadership$age == 99] <- NA
leadership$age
#移除缺失值#
x <- c(1, 2, NA, 3)
z1 <- x[1] + x[2] + x[3] + x[4] #结果为NA#
z2 <- sum(x) #结果为NA,help(sum)查看sum函数对缺失值的处理,发现默认为na.rm=FALSE,所以改为TRUE#
z3 <- sum(x, na.rm=TRUE)
y  <- na.omit(x) 
z4 <- sum(y)
z1
z2
z3
z4

#日期格式设定#
leadership
myformat <- "%m/%d/%y"
leadership$date <- as.Date(leadership$testDate, myformat)
leadership

Sys.Date() #可以返回当天的日期#
date() #返回当前的日期和时间#
format(Sys.Date(),format="%A")
format(Sys.Date(),format="%a")

today <- Sys.Date()
dob <- as.Date("1956-10-12")
difftime(today, dob, units="weeks")

#类型转换#
a <- c(1,2,3)
is.numeric(a)
a <- as.character(a)
a

#排序#
attach(leadership)
sex_order <-leadership[order(gender),]
sex_order
sex_age_order <- leadership[order(gender, age),] #-age为降序#
sex_age_order
detach(leadership)

#提取该数据,没有-就是删除该数据#
leadership
newdata1 <- leadership[c(2,3),]#提取第2,3行#
newdata1
newdata2 <- leadership[,c(2,3)]#提取第2,3列#
newdata2 
newdata3 <- leadership[c(-2,-3),]#删除第2,3行#
newdata3 
newdata4 <- leadership[,c(-2,-3)]#删除第2,3列#
newdata4 
#数据筛选#
attach(leadership)
newdata <- leadership[gender=='M' & age > 30,]
newdata
detach(leadership)

发布了9 篇原创文章 · 获赞 19 · 访问量 6688

猜你喜欢

转载自blog.csdn.net/qq_36509256/article/details/105688601