# 1. 练习：钻石

``````library(ggplot2)
data(diamonds)
summary(diamonds)
``````

``````?diamonds
``````

# 2. 练习：价格直方图

``````ggplot(aes(x = price), data = diamonds) + geom_histogram(binwidth = 10)
``````

# 3. 练习：价格直方图小结

``````summary(diamonds\$price)
``````

Min. 1st Qu. Median Mean 3rd Qu. Max. 326 950 2401 3933 5324 18823

# 4. 练习：钻石数量

``````summary(diamonds\$price >= 15000)
``````

# 5. 练习：廉价钻石

``````ggplot(aes(x = price), data = diamonds) + geom_histogram(binwidth = 10) + scale_x_continuous(breaks = 1:1000)

``````

# 6. 练习：切工-价格直方图

``````ggplot(aes(x = price), data = diamonds) + geom_histogram(binwidth = 10) + scale_x_continuous(breaks = 1:15000) + facet_wrap(~cut,ncol = 2)
``````

# 7. 练习：切工-价格

``````by(diamonds\$price,diamonds\$cut,summary)
``````

diamonds\$cut: Fair Min. 1st Qu. Median Mean 3rd Qu. Max. 337 2050 3282 4359 5206 18574

diamonds\$cut: Good Min. 1st Qu. Median Mean 3rd Qu. Max. 327 1145 3050 3929 5028 18788

diamonds\$cut: Very Good Min. 1st Qu. Median Mean 3rd Qu. Max. 336 912 2648 3982 5373 18818

diamonds\$cut: Premium Min. 1st Qu. Median Mean 3rd Qu. Max. 326 1046 3185 4584 6296 18823

diamonds\$cut: Ideal Min. 1st Qu. Median Mean 3rd Qu. Max. 326 878 1810 3458 4678 18806

# 8. 练习：标尺和多直方图

``````qplot(x = price, data = diamonds) + facet_wrap(~cut)
``````

# 9. 练习：由切工决定的每克拉价格

``````ggplot(aes(x = price/carat), data = diamonds) + geom_histogram(binwidth = 1) + scale_x_continuous(breaks = 1:15000) + facet_wrap(~cut,ncol = 2) + scale_x_log10()
``````

# 10. 练习：价格箱线图

``````qplot(x=color,y=price,
data = subset(diamonds,!is.na(cut)),
geom = "boxplot") +
scale_y_continuous(lim = c(0,15000),breaks = seq(0,15000,500))
``````

# 11. 练习：四分位数间距-IQR

``````by(diamonds\$price,diamonds\$color,summary)
``````

diamonds\$color: D Min. 1st Qu. Median Mean 3rd Qu. Max. 357 911 1838 3170 4214 18693

diamonds\$color: E Min. 1st Qu. Median Mean 3rd Qu. Max. 326 882 1739 3077 4003 18731

diamonds\$color: F Min. 1st Qu. Median Mean 3rd Qu. Max. 342 982 2344 3725 4868 18791

diamonds\$color: G Min. 1st Qu. Median Mean 3rd Qu. Max. 354 931 2242 3999 6048 18818

diamonds\$color: H Min. 1st Qu. Median Mean 3rd Qu. Max. 337 984 3460 4487 5980 18803

diamonds\$color: I Min. 1st Qu. Median Mean 3rd Qu. Max. 334 1120 3730 5092 7202 18823

diamonds\$color: J Min. 1st Qu. Median Mean 3rd Qu. Max. 335 1860 4234 5324 7695 18710

# 12. 练习：由颜色表示的每克拉箱线图

``````qplot(x=color,y=price/carat,
data = subset(diamonds,!is.na(cut)),
geom = "boxplot") +
scale_y_continuous(lim = c(0,10000),breaks = seq(0,10000,500))
``````

# 13. 练习：克拉频率多边形

``````qplot(x = carat,data = subset(diamonds,!is.na(carat)),
binwidth = 0.1,geom = "freqpoly") +
scale_x_continuous(lim = c(0,5),breaks = seq(0,5,0.5)) +
scale_y_continuous(lim = c(0,3000),breaks = seq(0,3000,500))

``````

``````table(diamonds\$carat)
``````

# 14. 练习：用 R 进行数据整理

• tidyr ： 用于重塑数据布局的包
• dplyr ： 用于帮助转换整洁的表格数据的包