R语言 多元线性回归 研究年龄、身高、体重的关系

  • 0-20岁数据分析
data <- read.table('e://kg.txt',
                   header = TRUE,
                   sep = '\t')
data <- data %>% as_tibble()
data %>% attach()
data %>% ggplot(aes(cm, kg))+ geom_line()
data %>% ggplot(aes(age,cm))+ geom_line()
data %>% ggplot(aes(age,kg))+ geom_line()

# age 与 height 与weight 关系:
data[1:3] %>% cor() %>% corrplot::corrplot(method = "color",
                                           addCoef.col = "grey")
lm_data <- data %>% lm(kg~I(cm^3),.)
lm_data %>% summary()
lm_data
plot(cm^3,kg,xaxt='n');
axis(1,at=cm^3,labels=cm);
abline(lm_data)

# ggplot拟合
data %>% ggplot(aes(cm^3,kg)) + 
  geom_point() + 
  geom_smooth()
  • 分性别数据分析
# https://zhuanlan.zhihu.com/p/94372177
# https://www.jianshu.com/p/a081a791ae03
# https://cloud.tencent.com/developer/article/1674211
# https://www3.nd.edu/~steve/computing_with_data/2_Motivation/motivate_ht_wt.html?spm=a2c4e.11153940.blogcont603256.20.333b1d6fYOsiOK
# 载入数据,数据集在这里下载:https://github.com/johnmyleswhite/ML_for_Hackers/blob/master/02-Exploration/data/01_heights_weights_genders.csv
library(tidyverse)
ht_weight_df <- read.table("e://01_heights_weights_genders.txt",
                           header = TRUE,
                           sep = "\t") %>% 
  as_tibble()
ht_weight_df %>% mice::md.pattern()

# 绘图查看相关性
ht_weight_df %>% select(-1) %>% 
  cor() %>% corrplot::corrplot(method = "color",
                               addCoef.col = "grey")
ht_weight_df %>% select(-1) %>% sample_frac(0.1) %>% 
  plot(cex = 0.1)

# 拟合检验线性相关
lm_ht_weight <- lm(Weight ~ Height, data = ht_weight_df)
lm_ht_weight %>% summary()
lm_ht_weight %>% abline()

# 分性别对照
ht_weight_df %>% group_by(Gender) %>% 
  dplyr::summarise( round( mean( Height)* 2.54))
  # subset(Gender == )也可选取组
  # fivenum() 不能[2]、select(2)
  # sapply()不能$变量、select(2)
  # psych::describe() 不能[2]
  # pastecs::stat.desc()、Hmisc::describe()、summary() 都可以
  # plyr::ddply(.(Gender), function(df) summary(df$Height))从原数据分组求值

# 查看分布
par(mfrow = c(1,1))
ht_weight_df %>% subset(Gender == "Male") %>% select(Height) %>% 
  unlist() %>% as.numeric() %>% 
  density() %>% plot(type = "h", col = 4, ann = FALSE) #  main被屏蔽
ht_weight_df %>% subset(Gender == "Female") %>% select(Height) %>% 
  unlist() %>% as.numeric() %>% 
  density() %>% lines(col = 2)
title(main = "Height By Gender")
abline(col = c(1, 2),
       lty = 3,
       v = c(
         mean(ht_weight_df %>% subset(Gender == "Male") %>% 
                select(Height) %>% unlist()),
         mean(ht_weight_df %>% subset(Gender == "Female") %>% 
                select(Height) %>% unlist())
         ))
ht_weight_df %>% ggplot(aes(x = Height, colour = Gender)) + 
  geom_density()
ht_weight_df %>% ggplot(aes(sample = Height)) + 
  geom_point(stat = "qq") + facet_wrap(~Gender) # stat_qq requires sample

# 分类数据线性拟合
ht_weight_df %>% ggplot(aes(x = Height, y = Weight, colour = Gender)) +
  geom_point(alpha = 0.2) + 
  geom_smooth(method = "lm", formula = y ~ x)
lm_ht_wt_by_gender <- lm(Weight ~ Height * Gender, data = ht_weight_df)
lm_ht_wt_by_gender %>% summary()
  • 如果观察人的一生,身高、体重的变化曲线,会是什么样的呢?

猜你喜欢

转载自blog.csdn.net/weixin_42683052/article/details/121225950