-
Notifications
You must be signed in to change notification settings - Fork 0
/
World_Life_Expectancy_Project.sql
151 lines (130 loc) · 3.47 KB
/
World_Life_Expectancy_Project.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# Data Cleaning
SELECT *
FROM worldlifexpectancy;
SELECT country, year, CONCAT(Country, year), count(CONCAT(Country, year))
FROM worldlifexpectancy_backup
GROUP BY country, year, CONCAT(Country, year)
HAVING COUNT(CONCAT(Country, year)) > 1;
SELECT *
FROM (
SELECT row_id,
CONCAT(Country, year),
ROW_NUMBER() OVER(PARTITION BY CONCAT(Country, year) ORDER BY CONCAT(Country, year)) AS row_num
FROM worldlifexpectancy
) AS row_table
WHERE row_num > 1;
DELETE FROM worldlifexpectancy
WHERE row_id IN
(
SELECT row_id
FROM (
SELECT row_id,
CONCAT(Country, year),
ROW_NUMBER() OVER(PARTITION BY CONCAT(Country, year) ORDER BY CONCAT(Country, year)) AS row_num
FROM worldlifexpectancy
) AS row_table
WHERE row_num > 1
);
SELECT *
FROM worldlifexpectancy
WHERE status = '';
SELECT DISTINCT(status)
FROM worldlifexpectancy
WHERE status <> '';
SELECT DISTINCT(country)
FROM worldlifexpectancy
WHERE status ='Developing';
UPDATE worldlifexpectancy t1
JOIN worldlifexpectancy t2
ON t1.country = t2.country
SET t1.status = 'Developing'
WHERE t1.status = ''
AND t2.status <> ''
AND t2.status = 'Developing'
;
SELECT *
FROM worldlifexpectancy
WHERE country = 'United States of America';
UPDATE worldlifexpectancy t1
JOIN worldlifexpectancy t2
ON t1.country = t2.country
SET t1.status = 'Developed'
WHERE t1.status = ''
AND t2.status <> ''
AND t2.status = 'Developed'
;
SELECT t1.country, t1.year, t1.Lifeexpectancy,
t2.country, t2.year, t2.Lifeexpectancy,
t3.country, t3.year, t3.Lifeexpectancy,
ROUND((t2.Lifeexpectancy + t3.Lifeexpectancy)/2, 1)
FROM worldlifexpectancy t1
JOIN worldlifexpectancy t2
ON t1.country = t2.country
AND t1.year = t2.year - 1
JOIN worldlifexpectancy t3
ON t1.country = t3.country
AND t1.year = t3.year +1
WHERE t1.Lifeexpectancy = ''
;
UPDATE worldlifexpectancy t1
JOIN worldlifexpectancy t2
ON t1.country = t2.country
AND t1.year = t2.year - 1
JOIN worldlifexpectancy t3
ON t1.country = t3.country
AND t1.year = t3.year +1
SET t1.Lifeexpectancy = ROUND((t2.Lifeexpectancy + t3.Lifeexpectancy)/2, 1)
WHERE t1.Lifeexpectancy = ''
;
SELECT *
FROM worldlifexpectancy;
# Exploratory Data Analysis
SELECT Country, MIN(Lifeexpectancy), MAX(Lifeexpectancy),
ROUND(MAX(Lifeexpectancy) - MIN(Lifeexpectancy), 1) AS life_increase_15_years
FROM worldlifexpectancy
GROUP BY Country
HAVING MIN(Lifeexpectancy) <> 0 AND MIN(Lifeexpectancy) <> 0
ORDER BY life_increase_15_years ;
SELECT Year, ROUND(AVG(Lifeexpectancy), 1)
FROM worldlifexpectancy
WHERE Lifeexpectancy <> 0
GROUP BY Year
ORDER BY year;
# CORRELATION GDP to Life Expectancy
SELECT Country,
ROUND(AVG(Lifeexpectancy), 1) AS Life_Exp,
ROUND(AVG(GDP), 1) AS GDP
FROM worldlifexpectancy
GROUP BY Country
HAVING Life_Exp > 0
AND GDP > 0
ORDER BY GDP DESC;
SELECT
SUM(CASE
WHEN GDP >= 1500 THEN 1 ELSE 0
END) high_GDP_count,
ROUND(AVG(CASE
WHEN GDP >= 1500 THEN Lifeexpectancy ELSE NULL
END), 1) high_GDP_life_expectancy,
SUM(CASE
WHEN GDP <= 1500 THEN 1 ELSE 0
END) low_GDP_count,
ROUND(AVG(CASE
WHEN GDP <= 1500 THEN Lifeexpectancy ELSE NULL
END), 1) low_GDP_life_expectancy
FROM worldlifexpectancy;
SELECT status, ROUND(AVG(Lifeexpectancy), 1), COUNT(DISTINCT Country)
FROM worldlifexpectancy
GROUP BY status;
SELECT country, ROUND(AVG(Lifeexpectancy), 1) AS life_exp, ROUND(AVG(BMI), 1) AS BMI
FROM worldlifexpectancy
GROUP BY Country
HAVING Life_Exp > 0
AND BMI > 0
ORDER BY BMI DESC;
SELECT Country,
Year,
Lifeexpectancy,
AdultMortality,
SUM(AdultMortality) OVER(PARTITION BY Country ORDER BY Year) AS rolling_total
FROM worldlifexpectancy;