Grouping and Window function in SQL on World Population Dataset

DATA ANALYSIS

SQL SERVER

1.Use ‘regexp_like’ to replace ‘LIKE’ clauses

Database :

MS SQL SERVER

**Window Functions**

**(From - Where - Group BY - Having)**

Sequence

2.Create & Insert Statement

Create Database

				
					sql
create database Country;
use Country;
				
			

Creating Table

				
					sql!
-- Create table
CREATE TABLE Country (
    id INT,
    code VARCHAR(2),
    name VARCHAR(100),
    area INT,
    area_land INT,
    area_water INT,
    population BIGINT,
    population_growth DECIMAL(4, 2),
    birth_rate DECIMAL(5, 2),
    death_rate DECIMAL(5, 2),
    migration_rate DECIMAL(5, 2),
    continent VARCHAR(50) 
);
				
			

Insert Statement

				
					INSERT INTO Country (id, code, name, area, area_land, area_water, population, population_growth, birth_rate, death_rate, migration_rate, continent)
VALUES
    (1, 'af', 'Afghanistan', 652230, 652230, 0, 32564342, 2.32, 38.57, 13.89, 1.51, 'Asia'),
    (2, 'al', 'Albania', 28748, 27398, 1350, 3029278, 0.3, 12.92, 6.58, 3.3, 'Europe'),
    (3, 'ag', 'Algeria', 2381741, 2381741, 0, 39542166, 1.84, 23.67, 4.31, 0.92, 'Africa'),
    (4, 'an', 'Andorra', 468, 468, 0, 85580, 0.12, 8.13, 6.96, 0.0, 'Europe'),
    (5, 'ao', 'Angola', 1246700, 1246700, 0, 19625353, 2.78, 38.78, 11.49, 0.46, 'Africa'),
    (6, 'aq', 'Antarctica', 14000000, 280000, 13720000, 1106, NULL, NULL, NULL, NULL, 'Antarctica'),
    (7, 'ar', 'Argentina', 2780400, 2736690, 43710, 43431886, 0.93, 16.64, 7.33, 0.0, 'South America'),
    (8, 'am', 'Armenia', 29800, 28203, 1597, 3056382, 0.2, 13.61, 9.34, 0.0, 'Asia'),
    (9, 'aw', 'Aruba', 180, 180, 0, 112162, 1.22, 12.67, 8.68, 8.09, 'North America'),
    (10, 'au', 'Australia', 7692024, 7682300, 9724, 22751014, 1.05, 12.15, 7.14, 5.65, 'Oceania'),
    (11, 'at', 'Austria', 83858, 82445, 1413, 8665550, 0.55, 9.41, 9.42, 5.56, 'Europe'),
    (12, 'az', 'Azerbaijan', 86600, 82629, 3971, 9780780, 0.96, 16.64, 7.07, 0.0, 'Asia'),
    (13, 'bs', 'Bahamas', 13880, 10010, 3870, 324597, 0.95, 15.5, 7.29, 2.01, 'North America'),
    (14, 'bh', 'Bahrain', 760, 760, 0, 1346613, 4.51, 15.43, 2.71, 13.31, 'Asia'),
    (15, 'bd', 'Bangladesh', 147570, 130170, 17400, 156186882, 1.6, 19.68, 5.96, 0.0, 'Asia'),
    (16, 'bb', 'Barbados', 430, 430, 0, 290604, 0.31, 11.87, 8.69, 0.0, 'North America'),
    (17, 'by', 'Belarus', 207600, 202900, 4700, 9589689, 0.2, 10.7, 13.36, 0.7, 'Europe'),
    (18, 'be', 'Belgium', 30528, 30280, 248, 11323973, 0.66, 11.98, 9.54, 2.91, 'Europe'),
    (19, 'bz', 'Belize', 22966, 22806, 160, 347369, 1.97, 24.86, 5.29, 0.0, 'North America'),
    (20, 'bj', 'Benin', 112622, 110622, 2000, 10448647, 2.78, 38.85, 8.21, 0.0, 'Africa');
				
			

3.Table Preview

Table Preview

				
					Query - 
```sql!
select * from Country2;
				
			

Output:

Id Code Name Area Area_land Area_water Population Population_growth Birth_rate
1
af
Afghanistan
652230
652230
1
32564342
2.32
38.57
2
af
Afghanistan
652230
652230
1
32564342
2.32
38.57
3
af
Afghanistan
652230
652230
1
32564342
2.32
38.57
4
af
Afghanistan
652230
652230
1
32564342
2.32
38.57
5
af
Afghanistan
652230
652230
1
32564342
2.32
38.57
6
af
Afghanistan
652230
652230
1
32564342
2.32
38.57
7
af
Afghanistan
652230
652230
1
32564342
2.32
38.57

Question 1. Find the total population of each continent.

Query:

				
					select sum(population) total_population, continent
from Country2
group by continent;
				
			

Output:

total_population continent
69616166
Africa
1106
Antarctica
202934999
Asia
32694070
Europe
1074732
North America
22751014
Oceania
43431886
South America

Question 2. Calculate the average area_land for countries in each continent.

Query:

				
					select avg(area_land) avg_land_area, continent 
from country2
group by continent;
				
			

Output:

avg_land_area continent
1246354
Africa
280000
Antarctica
178798
Asia
68698
Europe
8356
North America
7682300
Oceania
2736690
South America

Question 3. List the continents with 5 or more than 5 countries.

Query:

				
					select continent, count(name) as country_count 
from Country2 
group by continent
having count(name) >= 5;
				
			

Output:

continent country_count
Asia
5
Europe
5

Question 4. Show the highest population among countries for each continent.

Query:

				
					select foo.name, foo.population, foo.continent from
(select max(population) over(partition by continent) as max_pop,
population, name, continent
from Country2) foo
where foo.max_pop = foo.population
				
			

Output:

name population continent
Algeria
39542166
Africa
Antarctica
1106
Antarctica
Bangladesh
156186882
Asia
Belgium
11323973
Europe
Belize
347369
North America
Australia
22751014
Oceania
Argentina
43431886
South America

Question 5. Display the continents where the average birth rate is higher than 20.

Query:

				
					select avg(birth_rate) avg_birth_rate, continent
from Country2
group by continent
having avg(birth_rate) > 20
				
			

Output:

avg_birth_rate continent
33.766666
Africa
20.786000
Asia

Question 6. List the continents with at least two country having a death rate above 10.

Query:

				
					select continent from Country2
where death_rate > 10
group by continent
having count(name)>=1
				
			

Output:

continent
Africa
Asia
Europe

Question 7. Show the continents with more than 3 countries and a total population greater than 100 million.

Query:

				
					select continent,sum(population) sum_pop,count(name) country_count
from Country2
group by continent
having sum(population) > 100000000
and
count(name)>3
				
			

Output:

continent sum_pop country_count
Asia
202934999
5

Question 8. Find the average population growth rate for countries in each continent.

Query:

				
					select avg(population_growth) avg_population, continent
from Country2
group by continent
				
			

Output:

avg_land_area continent
2.466666
Africa
NULL
Antarctica
1.918000
Asia
0.366000
Europe
1.112500
North America
1.050000
Oceania
0.930000
South America

Question 9 Find the continents where the average migration rate is less than 1.

Query:

				
					select continent, avg(migration_rate) avg_mig_rate
from Country2
group by continent
having avg(migration_rate)<1
				
			

Output:

continent avg_mig_rate
Africa
0.460000
South America
0.000000

Question 10. Query name of countries where birth_rate is less than avg_birth rate of continent.

Query:

				
					select a.name,a.birth_rate,
a.avg_birthrate,continent 
from(
    select 
    avg(birth_rate) over(partition by continent)
    avg_birthrate, continent,birth_rate,name
from Country2
) a
where a.birth_rate < a.avg_birthrate
				
			

Output:

name birth_rate avg_birthrate continent
Algeria
23.67
33.766666
Africa
Armenia
13.61
20.786000
Asia
Azerbaijan
16.64
20.786000
Asia
Bahrain
15.43
20.786000
Asia
Bangladesh
19.68
20.786000
Asia
Austria
9.41
10.628000
Europe
Andorra
8.13
10.628000
Europe
Aruba
12.67
16.225000
North America
Barbados
11.87
16.225000
North America
Bahamas
15.50
16.225000
North America

Question 11. Query Countries which have maximum population growth in their continent.

Query:

				
					select foo.name, 
foo.Max_pop_growth, 
foo.population_growth,
foo.continent 
from 
(
select max(population_growth) over(partition by continent) Max_pop_growth,
name,continent,
population_growth
from Country2
) foo
where foo.Max_pop_growth = foo.population_growth
				
			

Output:

name Max_pop_growth population_growth continent
Angola
2.78
2.78
Africa
Benin
2.78
2.78
Africa
Bahrain
4.51
4.51
Asia
Belgium
0.66
0.66
Europe
Belize
1.97
1.97
North America
Australia
1.05
1.05
Oceania
Argentina
0.93
0.93
South America
Aruba
12.67
16.225000
North America
Barbados
11.87
16.225000
North America
Bahamas
15.50
16.225000
North America

Question 12. For each country, calculate the percentage of the total population it represents within its continent.

Query:

				
					select foo.name,foo.continent,
foo.population,
foo.sum_pop as continent_tot_pop,
(foo.population* 100)/foo.sum_pop  as percentage_by_continent 
from
(select continent,
name,population,
sum(population) over(partition by continent) sum_pop from Country2) foo;
				
			

Output:

name continent population continent_tot_pop percentage_by_continent
Algeria
Africa
39542166
69616166
56
Angola
Africa
19625353
69616166
28
Benin
Africa
10448647
69616166
15
Antarctica
Antarctica
1106
1106
100
Armenia
Asia
3056382
202934999
1
Afghanistan
Asia
32564342
202934999
16
Azerbaijan
Asia
9780780
202934999
4
Bahrain
Asia
1346613
202934999
-
Bangladesh
Asia
156186882
202934999
76
Austria
Europe
8665550
32694070
26
Belarus
Europe
9589689
32694070
29
Belgium
Europe
11323973
32694070
34
Albania
Europe
3029278
32694070
9
Andorra
Europe
85580
32694070
0
Aruba
North Americ
112162
1074732
10
Belize
North America
347369
1074732
32
Barbados
North America
290604
1074732
27
Bahamas
North America
324597
1074732
30
Australia
Oceania
22751014
22751014
100
Argentina
South America
43431886
43431886
100

Question 13. Rank the countries in each continent based on their population.

Query:

				
					select name,continent,population,
Rank() over(partition by continent order by population) as country_rank 
from Country2;
				
			

Output:

name continent population country_rank percentage_by_continent
Benin
Africa
10448647
1
56
Angola
Africa
19625353
2
28
Algeria
Africa
39542166
3
15
Antarctica
Antarctica
1106
1
100
Bahrain
Asia
3056382
1
1
Armenia
Asia
32564342
2
16
Azerbaijan
Asia
9780780
3
4
Afghanistan
Asia
1346613
4
-
Bangladesh
Asia
156186882
5
76
Andorra
Europe
8665550
1
26
Albania
Europe
9589689
2
29
Austria
Europe
11323973
3
34
Belarus
Europe
3029278
4
9
Belgium
Europe
85580
5
0
Aruba
North Americ
112162
1
10
Barbados
North America
347369
2
32
Bahamas
North America
290604
3
27
Belize
North America
324597
4
30
Australia
Oceania
22751014
1
100
Argentina
South America
43431886
1
100

Question 14. Calculate the cumulative population for each continent in ascending order based on their population.

Query:

				
					select continent,population,
sum(population) over(partition by continent order by population) sum_pop 
from Country2;
				
			

Output:

continent population sum_pop
Africa
10448647
10448647
Africa
19625353
30074000
Africa
39542166
69616166
Antarctica
1106
1106
Asia
1346613
1346613
Asia
3056382
202934999
Asia
9780780
14183775
Asia
156186882
202934999
Europe
85580
85580
Europe
3029278
3114858
Europe
8665550
11780408
Europe
9589689
21370097
Europe
11323973
32694070
North Americ
112162
112162
North America
290604
402766
North America
324597
727363
North America
347369
1074732
Oceania
22751014
22751014
South America
43431886
43431886

Question 15. Lead Lag Questions.

Query:

				
					select name, area,
lag(area) over(order by name) as previous_area,
lead(area) over(order by name) as next_area
from Country2; 
				
			

Output:

name area previous_area next_area
Afghanistan
652230
NULL
28748
Albania
28748
652230
2381741
Algeria
2381741
28748
468
Andorra
468
2381741
1246700
Angola
1246700
468
14000000
Antarctica
14000000
1246700
2780400
Argentina
2780400
14000000
29800
Armenia
29800
2780400
180
Aruba
180
29800
7692024
Australia
7692024
180
83858
Austria
83858
7692024
86600
Azerbaijan
86600
83858
13880
Bahamas
13880
86600
760
Bahrain
760
13880
147570
Bangladesh
147570
760
430
Barbados
430
147570
207600
Belarus
207600
430
30528
Belgium
30528
207600
22966
Belize
22966
30528
112622
Benin
112622
22966
NULL

Ready to Unlock Your Data Potential?

Get Started Today

Subscribe to Newsletter

    Copyright©Dunboxed2024

    Scroll to Top

    Home