A brief overview with examples for Postgres 17. Created using previous Oracle Overview of group, analytical, and hierarchical queries.
CHAPTER 1: "Group functions"
CHAPTER 2: "Popular analytical functions"
CHAPTER 3: "Hierarchical Query"
CHAPTER 1: "Group functions"
Task 1.1. Group functions and group by clause.
Do next operations:
drop table tab1;
create table tab1(id numeric,
n numeric,
v text,
d timestamp);
insert into tab1 (id, n, v, d) values (1, 11, 'A', current_timestamp);
insert into tab1 (id, n, v, d) values (2, 21, 'B', current_timestamp);
insert into tab1 (id, n, v, d) values (3, 22, 'B', current_timestamp);
insert into tab1 (id, n, v, d) values (4, 31, 'C', current_timestamp);
insert into tab1 (id, n, v, d) values (5, null, 'C', current_timestamp);
insert into tab1 (id, n, v, d) values (6, 33, 'C', current_timestamp);
commit;
Then do next:
1. What do group function?
2. What maximum nesting level for aggregate functions?
3. Make result:
A 11 11
B 22 21
C 33 31
4. For which datatypes allowed using SUM and AVG.
5. Demonstrate different COUNT for all rows and for column N (for N also
demonstrate result equal COUNT for all rows).
Solution:
--1
Group functions work with sets of rows to give one result per group.
--2
1
--3
select v,
max(n),
min(n)
from tab1
group by v
order by v
--4
SUM: different numeric types, interval and money
AVG: different numeric types, interval
--5
select count(*),
count(1),
count(coalesce(n,1)),
count(n),
count(distinct n)
from tab1
Task 1.2. ROLLUP.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Demonstrate (ROLLUP and non-ROLLUP) SUM(N) for V and add grand total to the
end of result. Grand total must have name, do not display NULL. Write result.
2. Demonstrate (ROLLUP and non-ROLLUP) SUM(N) for D, V and add grand total to
the end of result. Grand total must have name, do not display NULL. Write result.
Solution:
--1
select v,
sum(n)
from tab1
group by v
union all
select 'grand total',
sum(n)
from tab1
--
select coalesce(v, 'grand total'),
sum(n)
from tab1
group by rollup (v)
--Result
A 80
B 20
C 170
grand total 270
--2
select d,
v,
sum(n)
from tab1
group by d, v
union all
select d,
null, --or '{sub total}'
sum(n)
from tab1
group by d
union all
select 'grand total',
null,
sum(n)
from tab1
order by 1, 2
--Rollup produce n + 1 totals.
select coalesce(d, 'grand total'),
v,
sum(n)
from tab1
group by rollup (d, v)
order by 1, 2
--Result
1_SEPTEMBER A 10
1_SEPTEMBER B 20
1_SEPTEMBER 30
2_OCTOBER C 90
2_OCTOBER 90
3_NOVEMBER A 70
3_NOVEMBER C 80
3_NOVEMBER 150
grand total 270
Task 1.3. CUBE.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Using CUBE demonstrate SUM(N) for V. Total must have name 'Total', do not
display NULL. Which difference between ROLLUP and CUBE. Write result.
2. Using CUBE demonstrate SUM(N) for D, V. All totals must have name 'Total', do
not display NULL. Write result.
Solution:
--1
--CUBE have result at the upper of table without order.
select coalesce(v, 'total'),
sum(n)
from tab1
group by cube (v)
--result
total 270
A 80
B 20
C 170
--2
--CUBE instead ROLLUP produces subtotals for all possible combinations of
--groupings specified in the GROUP BY CUBE clause, and a grand total.
--CUBE totals = 2^(count of columns from group by)
select coalesce(d, 'total'),
coalesce(v, 'total'),
sum(n)
from tab1
group by cube (d, v)
order by 1, 2
--result
1_SEPTEMBER A 10
1_SEPTEMBER B 20
1_SEPTEMBER total 30
2_OCTOBER C 90
2_OCTOBER total 90
3_NOVEMBER A 70
3_NOVEMBER C 80
3_NOVEMBER total 150
total A 80
total B 20
total C 170
total total 270
Task 1.4. GROUPING.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Explain GROUPING.
2. Using COUNT function and ROLLUP (for D and N colummns) demonstrate GROUPING
for TAB1. Totals must have name 'Total'. Write result.
3. Using COUNT function and CUBE (for D and N colummns) demonstrate GROUPING for
TAB1. Totals must have name 'Total'. Write result.
Solution:
--1
GROUPING - is the function returns 1 (for subtotals from ROLLUP or CUBE),
otherwise return 0, also when return 1 this mean that in current row this column
not participate in GROUP BY clause for calculate subtotal. Using for make visual
difference between NULL values and NULL-total from ROLLUP and CUBE.
GROUPING argument or expression must match with argument or expression from GROUP
BY.
--2
select case
when grouping(d) = 1
then 'total'
else cast(d as text)
end d,
case
when grouping(n) = 1
then 'total'
else cast(n as text)
end n,
count(v),
grouping(d),
grouping(n)
from tab1
group by rollup (d, n)
order by 1, 2
1_SEPTEMBER 10 1 0 0
1_SEPTEMBER 20 1 0 0
1_SEPTEMBER total 2 0 1
2_OCTOBER 30 1 0 0
2_OCTOBER 60 1 0 0
2_OCTOBER total 3 0 1
2_OCTOBER 1 0 0
3_NOVEMBER 70 1 0 0
3_NOVEMBER 80 1 0 0
3_NOVEMBER total 2 0 1
total total 7 1 1
--3
select case
when grouping(d) = 1
then 'total'
else cast(d as text)
end d,
case
when grouping(n) = 1
then 'total'
else cast(n as text)
end n,
count(v),
grouping(d),
grouping(n)
from tab1
group by cube (d, n)
order by 1, 2
1_SEPTEMBER 10 1 0 0
1_SEPTEMBER 20 1 0 0
1_SEPTEMBER total 2 0 1
2_OCTOBER 30 1 0 0
2_OCTOBER 60 1 0 0
2_OCTOBER total 3 0 1
2_OCTOBER 1 0 0
3_NOVEMBER 70 1 0 0
3_NOVEMBER 80 1 0 0
3_NOVEMBER total 2 0 1
total 10 1 1 0
total 20 1 1 0
total 30 1 1 0
total 60 1 1 0
total 70 1 1 0
total 80 1 1 0
total total 7 1 1
total 1 1 0
Task 1.5. GROUPING_ID.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Explain GROUPING_ID.
2. Using COUNT function and ROLLUP (for D and N colummns) demonstrate GROUPING_ID
for TAB1. Totals must have name 'Total'. Write result.
3. Using COUNT function and CUBE (for D and N colummns) demonstrate GROUPING_ID
for TAB1. Totals must have name 'Total'. Write result.
Solution:
--1
PostgresSQL has no GROUPING_ID Oracle-function. Use GORUPING to simulate it.
GROUPING_ID - is the function, that return a number for subtotal rows (from
ROLLUP or CUBE) and this number indicate the GROUP BY level. In this function
need write all columns from GROUP BY clause.
--2
--GROUPING_ID displays level for each totals from ROLLUP, where number 0 is first
--detailed total, then 1 is total for first argument from GROUPING_ID and etc.
select case (grouping(d) * 2 + grouping(n))
when 3
then 'total'
else cast(d as text)
end d,
case (grouping(d) * 2 + grouping(n))
when 1
then 'total'
else cast(n as text)
end n,
count(v),
(grouping(d) * 2 + grouping(n))
from tab1
group by rollup (d, n)
order by 1, 2
1_SEPTEMBER 10 1 0
1_SEPTEMBER 20 1 0
1_SEPTEMBER total 2 1
2_OCTOBER 30 1 0
2_OCTOBER 60 1 0
2_OCTOBER total 3 1
2_OCTOBER 1 0
3_NOVEMBER 70 1 0
3_NOVEMBER 80 1 0
3_NOVEMBER total 2 1
total 7 3
--3
--GROUPING_ID displays level for each totals from CUBE, where number 0 is first
--detailed total, then 1 is total for first argument from GROUPING_ID and etc.
select case (grouping(d) * 2 + grouping(n))
when 2
then 'total'
when 3
then 'total'
else cast(d as text)
end d,
case (grouping(d) * 2 + grouping(n))
when 1
then 'total'
when 3
then 'total'
else cast(n as text)
end n,
count(v),
(grouping(d) * 2 + grouping(n))
from tab1
group by cube (d, n)
order by 1, 2
1_SEPTEMBER 10 1 0
1_SEPTEMBER 20 1 0
1_SEPTEMBER total 2 1
2_OCTOBER 30 1 0
2_OCTOBER 60 1 0
2_OCTOBER total 3 1
2_OCTOBER 1 0
3_NOVEMBER 70 1 0
3_NOVEMBER 80 1 0
3_NOVEMBER total 2 1
total 10 1 2
total 20 1 2
total 30 1 2
total 60 1 2
total 70 1 2
total 80 1 2
total total 7 3
total 1 2
Task 1.6. GROUPING SETS.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Explain GROUPING SETS.
2. Write a query (order by D) displaying SUM(N) for D, V and demonstrating
GROUPING SETS where also must be grand total. Write result.
3. Remake query from point 2 and demonstrating the use of GROUPING SETS instead
of ROLLUP, indicates totals by GROUPING_ID(V, D). Write result.
4. Remake query from point 2 and demonstrating the use of GROUPING SETS instead
of CUBE, indicates totals by GROUPING_ID(V, D). Write result.
5. What happens if at a query from point 2 add in GROUPING SETS clause CUBE(D,V)?
Solution:
--1
GROUPING SETS allows create totals for certain columns, column sets or for all
columns. All grouping columns from SELECT clause must be listed at the GROUPING
SETS.
--2
--PostgresSQL has no GROUPING_ID Oracle-function. Use GORUPING to simulate it.
select d,
v,
sum(n)
from tab1
group by grouping sets (
(d, v), ()
)
order by d, v
--
D V SUM(N)
1_SEPTEMBER A 10
1_SEPTEMBER B 20
2_OCTOBER C 90
3_NOVEMBER A 70
3_NOVEMBER C 80
270
--3
--PostgresSQL has no GROUPING_ID Oracle-function. Use GORUPING to simulate it.
select d,
v,
sum(n),
(grouping(v) * 2 + grouping(d))
from tab1
group by grouping sets (
(d, v), d, ()
)
order by d, v
--
D V SUM(N) GROUPING_ID(V,D)
1_SEPTEMBER A 10 0
1_SEPTEMBER B 20 0
1_SEPTEMBER 30 2
2_OCTOBER C 90 0
2_OCTOBER 90 2
3_NOVEMBER A 70 0
3_NOVEMBER C 80 0
3_NOVEMBER 150 2
270 3
--4
select d,
v,
sum(n),
(grouping(v) * 2 + grouping(d))
from tab1
group by grouping sets (
(d, v), (d), (v), ()
)
order by d, v
--
D V SUM(N) GROUPING_ID(V,D)
1_SEPTEMBER A 10 0
1_SEPTEMBER B 20 0
1_SEPTEMBER 30 2
2_OCTOBER C 90 0
2_OCTOBER 90 2
3_NOVEMBER A 70 0
3_NOVEMBER C 80 0
3_NOVEMBER 150 2
A 80 1
B 20 1
C 170 1
270 3
--5
--will be added additional rows with totals as at using the CUBE
select d,
v,
sum(n)
from tab1
group by grouping sets (
(d, v), (), cube(d, v)
)
order by d, v
Task 1.7. GROUPING SETS. Part 2.
Do next operations:
drop table tab1;
create table tab1(year text,
shop text,
product text,
color text,
quantity numeric);
insert into tab1 (year, shop, product, color, quantity)
values (2000, 'shop1', 'a', 'black', 1);
insert into tab1 (year, shop, product, color, quantity)
values (2000, 'shop2', 'a', 'white', 2);
insert into tab1 (year, shop, product, color, quantity)
values (2001, 'shop1', 'a', 'white', 4);
commit;
Then do next:
1. Using several GROUPING SETS write a query displaying all columns +
SUM(QUANTITY), for next groups:
YEAR + PRODUCT,
YEAR + COLOR,
SHOP + PRODUCT,
SHOP + COLOR.
Write result.
Solution:
--1
select coalesce(year, '-'),
coalesce(shop, '-'),
coalesce(product, '-'),
coalesce(color, '-'),
sum(quantity)
from tab1
group by grouping sets (year, shop),
grouping sets (product, color)
--Result = 4 groups = quantity of columns from GROUPING SETS (YEAR, SHOP) *
--quantity of columns from GROUPING SETS (PRODUCT, COLOR)
"YEAR" "SHOP" "PRODUCT" "COLOR" "QUANTITY"
2001 - a - 4
2000 - a - 3
2000 - - white 2
2000 - - black 1
2001 - - white 4
- shop1 a - 5
- shop2 a - 2
- shop1 - black 1
- shop2 - white 2
- shop1 - white 4
Task 1.8. ROLLUP and CUBE with composite columns.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Write a query (order by D) displaying SUM(N) for D, V and demonstrating total
for (D, V), total for D and also grand total using composite columns. Write result.
2. Write a query (order by D) displaying SUM(N) for D, V and demonstrating total
for (D, V) and grand total using composite columns. Write result.
Solution:
--1
--will be 3 totals: for D, for D, V and grand total
select d,
v,
sum(n)
from tab1
group by rollup(
d, (d, v)
)
order by d, v
1_SEPTEMBER A 10
1_SEPTEMBER B 20
1_SEPTEMBER 30
2_OCTOBER C 90
2_OCTOBER 90
3_NOVEMBER A 70
3_NOVEMBER C 80
3_NOVEMBER 150
270
--2
--will be 2 totals: D, V and grand total
select d,
v,
sum(n)
from tab1
group by cube(
(d, v)
)
order by d, v
1_SEPTEMBER A 10
1_SEPTEMBER B 20
2_OCTOBER C 90
3_NOVEMBER A 70
3_NOVEMBER C 80
270
CHAPTER 2: "Popular analytical functions"
Task 2.1. Analytic functions.
1. What difference between analytical and aggregate functions?
2. How to processes analytical function in query? Where is it can be appears?
3. What does "window" mean and where is it defined? What determines a "window"?
What is a default value of the "window"?
4. Write structure of analytic function with clauses.
Solution:
--1
Analytic function return multiple rows for each group.
--2
--analytic functions can appear in SELECT, ORDER BY clause
Process:
1. FROM, JOINS, WHERE, GROUP BY, and HAVING clauses are completed first.
2. Analytic function operates on a query result set. DISTINCT applies after
analytic function to exclude duplicates and additional sorting!
3. Execute ORDER BY (if exists) for additional sorting.
--3
The group of rows is called a window and is defined by the analytic_clause. For
each row, a sliding window of rows is defined.
The window determines the range of rows used to perform the calculations for the
current row. Window is extension of ORDER_BY_CLAUSE in analytic function.
Default value = RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.
--4
Analytic_function(arguments) -> OVER (analytic_clause)
analytic_clause = query_partition_clause -> order_by_clause -> windowing_clause
Task 2.2. RANK, DENSE_RANK, ROWNUM, ROW_NUMBER.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
In any places with ORDER BY use ASC ordering and then do next:
1. What does RANK calculates?
2. Demonstrate RANK for row with N = 60, V = 'Z' and write type of syntax. Write
result.
3. Using different syntax from point 2 write query with:
- RANK for column V;
- DENSE_RANK for column V;
- ROWNUM analog;
- ROW_NUMBER for column V.
Write result.
4. With partition for V using different syntax from point 2 write query with:
- RANK for column V;
- DENSE_RANK for column V;
- ROWNUM analog;
- ROW_NUMBER for column V.
Write result.
Solution:
--1
RANK calculates the rank of a value in a group of values.
--2
--Aggregate Syntax
--result = 5
select rank(60, 'Z') within group (order by n, v)
from tab1
--3
--Analytic Syntax
select v,
rank() over (order by v) "rank",
dense_rank() over (order by v) "dense_rank",
--row_number is analog of rownum
row_number() over (order by v) "row_number"
from tab1
--result
V RANK DENSE_RANK ROW_NUMBER
A 1 1 1
A 1 1 2
B 3 2 3
C 4 3 4
C 4 3 5
C 4 3 6
C 4 3 7
--4
select v,
rank() over (partition by v order by v) "rank",
dense_rank() over (partition by v order by v) "dense_rank",
--row_number is analog of rownum
row_number() over (partition by v order by v) "row_number"
from tab1
--result
V RANK DENSE_RANK ROW_NUMBER
A 1 1 1
A 1 1 2
B 1 1 1
C 1 1 1
C 1 1 2
C 1 1 3
C 1 1 4
Task 2.3. Sorting and RANK, DENSE_RANK, ROWNUM, ROW_NUMBER.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Using analytical function for partition = V and sorting N by ascending
display in result all columns with only first rows from each partition. Write
result.
2. Make 1 query where summarize of N and sort sum by DESC. Result must have 3
rows. Do not write ORDER BY clause at the end of the query. Write result.
3. Make 1 query where display N, RANK for partition N + order by any column.
But sorting in column N must be descending. Do not write ORDER BY clause at the
end of query. Write result.
Solution:
--1
select t.*
from
(
select t1.*,
row_number() over (partition by v order by n asc) first_row
from tab1 t1
) t
where t.first_row = 1
--result
N V D FIRST_ROW
10 A 1_SEPTEMBER 1
20 B 1_SEPTEMBER 1
30 C 2_OCTOBER 1
--2
select v,
sum(n) sumn,
dense_rank() over(order by sum(n) desc) ordering
from tab1
group by v
--result
V SUMN ORDERING
C 170 1
A 80 2
B 20 3
--3
select coalesce(cast(n as text), 'null') n,
rank() over (partition by n * -1 order by v asc) ordering
from tab1
--result
N ORDERING
80 1
70 1
60 1
30 1
20 1
10 1
null 1
Task 2.4. Using analytical functions.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Write query using analytical functions where display all columns and:
- SUM of N;
- SUM of N with order by D ASC;
- SUM of N with order by D, N ASC;
- SUM of N with partition by D and order by D ASC;
- SUM of N with partition of D and order by N ASC;
Solution:
--1
select n,
v,
d,
sum(n) over() s1,
sum(n) over (order by d) s2,
sum(n) over (order by d, n) s3,
sum(n) over (partition by d order by d) s4,
sum(n) over (partition by d order by n) s5
from tab1
--
N V D S1 S2 S3 S4 S5
10 A 1_SEPTEMBER 270 30 10 30 10
20 B 1_SEPTEMBER 270 30 30 30 30
30 C 2_OCTOBER 270 120 60 90 30
60 C 2_OCTOBER 270 120 120 90 90
C 2_OCTOBER 270 120 120 90 90
70 A 3_NOVEMBER 270 270 190 150 70
80 C 3_NOVEMBER 270 270 270 150 150
Task 2.5. LAG, LEAD.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
In any places with ORDER BY use ASC ordering and then do next:
1. Make query where display:
- N;
- using short syntax (is mean use default values and do not write them in query)
display for COALESCE(N, 99) (order by D) previous and next values. Write result.
2. Make query where display:
- N;
- using full syntax display for N partition by D order by V previous (eliminate
NULL rows in N column) and next (do not eliminate NULL rows in N column) values
with step = 1 (if value is out of scope then display 0). Write result.
Solution:
--1
select n,
lag(coalesce(n, 99)) over (order by d) lg,
lead(coalesce(n, 99)) over (order by d) ld
from tab1
--
N LG LD
10 20
20 10 30
30 20 99
30 60
60 99 70
70 60 80
80 70
--2
--Postgres (before version 19) have not RESPECT NULLS, IGNORE NULLS. The
--behaviour is always like RESPECT_NULLS. So just write a query without that.
select n,
lag(n, 1, 0) over (partition by d order by v) lag,
lead(n, 1, 0) over (partition by d order by v) lead
from tab1
--
N LAG LEAD
10 0 20
20 10 0
30 0
30 60
60 0
70 0 80
80 70 0
Task 2.6. FIRST. LAST.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
1. What are aggregate functions can use with the FIRST or LAST?
2. Using aggregate syntax display V, minimum of N for first and last row in group
using ASC sorting for N. Do not use ORDER BY at the end of query. Write result.
3. Remake point 2, but display maximum of N. Also use ASC sorting for V. Do not
use ORDER BY at the end of the query. Write result.
4. Rewrite queries from points 2 and 3 together, but get SUM(N) instead minimum,
maximum and also use analytical syntax. Result must be sorted by V DESC, but do
not use ORDER BY at the end of the query. Write result.
Solution:
--1
Postgres have not Oracle KEEP clause
--2
--because have not KEEP in postgres, we may use something like that
select distinct on (v)
v,
first_value(n) over(partition by v order by n asc) mn_first,
first_value(n) over(partition by v order by n desc) mn_last
from tab1
--result
V MN_FIRST MN_LAST
A 10 70
B 20 20
C 30
--3
select distinct on (v)
v,
max(n) over(partition by v order by v asc) mx_first,
max(n) over(partition by v order by v desc) mx_last
from tab1
--result
V MX_FIRST MX_LAST
A 70 70
B 20 20
C 80 80
--4
select v,
first_value(n) over(partition by (case v
when 'A' then '3'
when 'B' then '2'
when 'C' then '1'
else null
end) order by n asc) sum_first_n,
first_value(n) over(partition by v order by n desc) sum_last_n,
sum(n) over(partition by v order by v asc) sum_first_v,
sum(n) over(partition by v order by v desc) sum_last_v
from tab1
--result
V SUM_FIRST_N SUM_LAST_N SUM_FIRST_V SUM_LAST_V
C 30 170 170
C 30 170 170
C 30 170 170
C 30 170 170
B 20 20 20 20
A 10 70 80 80
A 10 70 80 80
Task 2.7. FIRST_VALUE. LAST_VALUE.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
commit;
In any places with ORDER BY use ASC ordering and then do next:
1. Display N, first (do not eliminate NULL rows of N) and last (eliminate NULL
rows of N) values of N for whole table with ORDER BY N ASC at the end of query.
Write result.
2. Do not eliminate NULL rows display N, first value and last value for partition
D and order by V. Write result.
Solution:
--1
--Postgres (before version 19) have not RESPECT NULLS, IGNORE NULLS. The
--behaviour is always like RESPECT_NULLS. So just write a query without that.
--if do not specify analytic_clause in OVER, then FIRST_VALUE and LAST_VALUE
--returns value after ORDER BY
select n,
first_value(n) over() fv,
last_value(n) over() lv
from tab1
order by n
--result
N FV LV
10 10
20 10
30 10
60 10
70 10
80 10
10
--2
select n,
first_value(n) over(partition by d order by v) fv,
last_value(n) over(partition by d order by v) lv
from tab1
--result may be different for rows wherein v = C
N FV LV
10 10 10
20 10 20
30 30
60 30
30
70 70 70
80 70 80
Task 2.8. NTILE.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Write query where display D column and divide TAB1 by 0.5, 1, 2, 5.5, 10
buckets sorting by column D ASC. Write result.
2. Remake point 1, but use partition for column D.
Solution:
--1
select d,
--ntile(cast(trunc(0.5) as int)) over(order by d) n05,
--will be error, because noninteger
--value truncates to integer and it
--must be positive integer, not a zero
ntile(1) over(order by d) n1,
ntile(2) over(order by d) n2,
ntile(cast(trunc(5.5) as int)) over(order by d) n55,
ntile(10) over(order by d) n10--if number of rows <= ntile argument then
--number of buckets = the number of rows
from tab1
--result
D N1 N2 N55 N10
1_SEPTEMBER 1 1 1 1
1_SEPTEMBER 1 1 1 2
2_OCTOBER 1 1 2 3
2_OCTOBER 1 1 2 4
2_OCTOBER 1 2 3 5
3_NOVEMBER 1 2 4 6
3_NOVEMBER 1 2 5 7
--2
select d,
--ntile(0.5) over(partition by d order by d) n05, --will be error,
--Postgres does not truncate to int ntile argument automatically
ntile(1) over(partition by d order by d) n1,
ntile(2) over(partition by d order by d) n2,
ntile(cast(trunc(5.5) as int)) over(partition by d order by d) n55,
ntile(10) over(partition by d order by d) n10
from tab1
--result
D N1 N2 N55 N10
1_SEPTEMBER 1 1 1 1
1_SEPTEMBER 1 2 2 2
2_OCTOBER 1 1 1 1
2_OCTOBER 1 1 2 2
2_OCTOBER 1 2 3 3
3_NOVEMBER 1 1 1 1
3_NOVEMBER 1 2 2 2
Task 2.9. Finding gaps. Removing duplicates.
Do next operations:
drop table tab1;
create table tab1(id numeric,
n numeric,
v text,
d text);
insert into tab1 (id, n, v, d) values (1, 10, 'A', '1_SEPTEMBER');
insert into tab1 (id, n, v, d) values (2, 20, 'B', '1_SEPTEMBER');
insert into tab1 (id, n, v, d) values (3, 30, 'C', '2_OCTOBER');
insert into tab1 (id, n, v, d) values (4, null, 'C', '2_OCTOBER');
insert into tab1 (id, n, v, d) values (5, 60, 'C', '2_OCTOBER');
insert into tab1 (id, n, v, d) values (6, null, 'A', '3_NOVEMBER');
insert into tab1 (id, n, v, d) values (7, 80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Without analyt. function find the first value of N (i.e. 30) for which the
next value of N is not equal to current N + 10.
2. Using analyt. function find first value of N, last value of N, all values
of N for which the next value of N is not equal to current N + 10 (except last
row = 80). That is display: the first gap, last gap and all gaps in the sequence.
3. Write a DELETE statement that demonstrates the fastest way to remove duplicate
records based on column D.
Solution:
--1
select min(t1.n) min_n
from tab1 t1
left join tab1 t2
on t1.n + 10 = t2.n
where t2.n is null
--2
select min(n) over() n_first
,max(n) over() n_last
,n n_all
from (select lead(n) over(order by n) ld
,n
from tab1)
where ld - n <> 10
--3
delete from tab1
where id in (select id
from (select id
,row_number()
over(partition by d) rn
from tab1)
where rn <> 1);
Task 2.10. Working with WINDOW.
Do next operations:
drop table tab1;
create table tab1(n numeric,
v text,
d text);
insert into tab1 (n, v, d) values (10, 'A', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (20, 'B', '1_SEPTEMBER');
insert into tab1 (n, v, d) values (30, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (null, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (60, 'C', '2_OCTOBER');
insert into tab1 (n, v, d) values (70, 'A', '3_NOVEMBER');
insert into tab1 (n, v, d) values (80, 'C', '3_NOVEMBER');
commit;
Then do next:
1. Use sorting by N and window clause. Write query to display N and sum of N:
- for previous row, current row and next row;
- for all rows;
- for only current row;
- for current row and next 2 rows;
- since first row to current row.
Write result.
2. Use sorting by D and window clause. Write query to display D, N and sum of N:
- for range since first row to current row;
- for range since current row to last row;
- for range of current row only;
- for range of all rows;
Write result.
Solution:
--1
select n
,sum(n) over(order by n rows between 1 preceding and 1 following) s1
,sum(n) over(order by n rows between unbounded preceding
and unbounded following) s2
,sum(n) over(order by n rows between 0 preceding and 0 following) s3
,sum(n) over(order by n rows between current row and 2 following) s4
,sum(n) over(order by n rows unbounded preceding) s5
from tab1
N S1 S2 S3 S4 S5
10 30 270 10 60 10
20 60 270 20 110 30
30 110 270 30 160 60
60 160 270 60 210 120
70 210 270 70 150 190
80 150 270 80 80 270
80 270 270
--2
select d
,n
,sum(n) over(order by d
range between unbounded preceding and current row) s1
,sum(n) over(order by d
range between current row and unbounded following) s2
,sum(n) over(order by d
range between current row and current row) s3
,sum(n) over(order by d
range between unbounded preceding and unbounded following) s4
from tab1
D N S1 S2 S3 S4
1_SEPTEMBER 10 30 270 30 270
1_SEPTEMBER 20 30 270 30 270
2_OCTOBER 30 120 240 90 270
2_OCTOBER 120 240 90 270
2_OCTOBER 60 120 240 90 270
3_NOVEMBER 70 270 150 150 270
3_NOVEMBER 80 270 150 150 270
CHAPTER 3: "Hierarchical Query"
Task 3.1. Hierarchical query clauses.
1. List the hierarchical query clauses and explain what are they do.
2. Explain recursive query evaluation.
3. What are two clauses valid only in recursive queries?
Solution:
--1
WITH RECURSIVE - it allows a SELECT subquery to reference itself by name, i.e.
using RECURSIVE, a WITH query can refer to its own output. It helps create
recursive query. When there are multiple queries in the WITH clause, RECURSIVE
should be written only once, immediately after WITH.
--
SEARCH - computes a 'search_seq_col_name' column that will be added to the result
of the WITH query and can be used in the outer query to sorting (breadth-first or
depth-first order).
--
CYCLE - detect cycles in recursive queries and produces column
'cycle_mark_col_name' that will be added to the result column list of the WITH
query, this column will be set to boolean 'cycle_mark_value' when a cycle has
been detected, else to 'cycle_mark_default'. Processing of the recursive UNION
will stop when a cycle has been detected.
--2
Firstly Postgres process non-recursive query once. On next step use rows from
non-recursive result in recursive of query and produce result that will be used
each time on next steps by recursive query until recursive query produces rows.
It is mean recursive query may read rows from previous step (not all rows) of
recursive query.
--3
SEARCH, CYCLE. SEARCH columns appears before CYCLE columns.
Task 3.2. Hierarchical query. Part 1.
Do next operations:
drop table tab1;
create table tab1(id numeric,
pid numeric,
name text);
insert into tab1(id, pid, name) values(0, null, 'A');
insert into tab1(id, pid, name) values(1, 0, 'B');
insert into tab1(id, pid, name) values(2, 0, 'C');
insert into tab1(id, pid, name) values(null, 0, 'D');
insert into tab1(id, pid, name) values(4, 2, 'E');
insert into tab1(id, pid, name) values(5, null, 'F');
insert into tab1(id, pid, name) values(6, 5, 'G');
insert into tab1(id, pid, name) values(7, 3, 'H');
commit;
Suppose that ID is child column, PID is parent. Then do next:
1. Write hierarchical query without specifies root rows and also indicate level
of each row. Explain how much rows will be in result.
2. Remake query from point 1, but:
- root row must contain NULL in PID and sort by depth-first order (without
built-in syntax), NAME within hierarchy;
- instead NAME use function that returns the path of a NAME from root to node;
- for each row display NAME from root row.
Write result.
3. Remake query from point 1, but:
- display only leaf rows;
- root row must contain ID = 0 and sort by depth-first order (with built-in
syntax), NAME within hierarchy. Write result.
4. Write query where display column NAME in one row as H,G,F,E,D,C,B,A.
Solution:
--1
--Will be retrieved 14 rows: 8 rows for 1 level, 5 rows for 2 level, 1 row for 3
--level. Postgres does not support Oracle LEVEL, simulate it.
with recursive cte0 as (
select t1.id
,t1.pid
,t1.name
,1 level
from tab1 t1
union all
select t2.id
,t2.pid
,t2.name
,c0.level + 1 level
from tab1 t2
,cte0 c0
where t2.pid = c0.id
)
select c0.*
from cte0 c0
--2
with recursive cte0 as (
select t1.id
,t1.pid
,'/' || t1.name name
,t1.name root
,1 level
,array[t1.id] path
from tab1 t1
where t1.pid is null
union all
select t2.id
,t2.pid
,c0.name || '/' || t2.name name
,c0.root
,c0.level + 1 level
,path || t2.id path
from tab1 t2
,cte0 c0
where t2.pid = c0.id
)
select c0.id
,c0.pid
,c0.name
,c0.root
,c0.level
from cte0 c0
order by c0.path
,c0.name
--will be two root rows: A and F
ID PID NAME ROOT LEVEL
0 /A A 1
1 0 /A/B A 2
2 0 /A/C A 2
4 2 /A/C/E A 3
0 /A/D A 2
5 /F F 1
6 5 /F/G F 2
--3
with recursive cte0 as (
select t1.id
,t1.pid
,t1.name
,1 level
,array[t1.id] path
from tab1 t1
where t1.id = 0
union all
select t2.id
,t2.pid
,t2.name
,c0.level + 1 level
,path || t2.id path
from tab1 t2
,cte0 c0
where t2.pid = c0.id
) search depth first by id set ordercol
select c0.id
,c0.pid
,c0.name
,c0.level
,case
when not exists (
select null
from tab1 t1
where c0.id = t1.pid
)
then 1
else 0
end leaf_is_1
from cte0 c0
where not exists(
select null
from tab1 t1
where c0.id = t1.pid
)
order by c0.ordercol
,c0.name
ID PID NAME LEVEL LEAF_IS_1
1 0 B 2 1
4 2 E 3 1
0 D 2 1
--4
with recursive cte0 as (
(select row_number() over(order by name desc) rn
,name
from tab1 t1
limit 1)
union all
select t2.rn
,c0.name || t2.name name
from cte0 c0
join(
select row_number() over(order by name desc) rn
,name
from tab1 t1
) t2
on c0.rn = t2.rn - 1
) search depth first by rn set ordercol
select c0.name
from cte0 c0
order by c0.rn desc limit 1
Task 3.3. Hierarchical query. Part 2.
Do next operations:
drop table tab1;
create table tab1(id numeric,
pid numeric,
name text);
insert into tab1(id, pid, name) values(0, null, 'A');
insert into tab1(id, pid, name) values(1, 0, 'B');
insert into tab1(id, pid, name) values(2, 0, 'C');
insert into tab1(id, pid, name) values(2, 2, 'D');
insert into tab1(id, pid, name) values(4, 2, 'E');
commit;
Suppose that ID is child column, PID is parent. Then do next:
1. Create hierarchical query where root row must contain ID = 0. Indicate
row that has a child which is also its ancestor. Write result.
2. Create hierarchical query where root row must contain ID = 0 and calculate
count ID for PID, NAME. In result display only row that has a child which is
also its ancestor. Write result.
3. Make query that return dates for year (365 or 366 rows, since 1 JAN to 31 DEC)
depends on leap year.
4. Split string 'a,b,c,,d' into one column with values 'a', 'b', 'c', 'd' using
hierarchical query and REGEXP functions by two ways: display empty value (for
',,') as row with NULL and not.
Solution:
--1
with recursive cte0 as (
select t1.id
,t1.pid
,t1.name
from tab1 t1
where t1.id = 0
union all
select t2.id
,t2.pid
,t2.name
from tab1 t2
,cte0 c0
where t2.pid = c0.id
) cycle id set is_cycle using path
select c0.id
,c0.pid
,c0.name
,c0.is_cycle --similar to Oracle CONNECT_BY_ISCYCLE
,coalesce(c2.is_cycle, false) CONNECT_BY_ISCYCLE
from cte0 c0
left join cte0 c2
on c0.id = c2.id
and c0.id = c2.pid
where not c0.is_cycle
order by c0.path
--
ID PID NAME IS_CYCLE CONNECT_BY_ISCYCLE
0 A false false
1 0 B false false
2 0 C false true
4 2 E false false
--2
with recursive cte0 as (
select t1.id
,t1.pid
,t1.name
from tab1 t1
where t1.id = 0
union all
select t2.id
,t2.pid
,t2.name
from tab1 t2
,cte0 c0
where t2.pid = c0.id
) cycle id set is_cycle using path
select COUNT(c0.id) cnt
,c0.pid
,c0.name
from cte0 c0
join cte0 c2
on c0.id = c2.id
and c0.id = c2.pid
where not c0.is_cycle
group by c0.pid
,c0.name
--
CNT PID NAME
1 0 C
--3
with recursive cte0 as (
select to_date('01.01.'||extract(year from current_date), 'dd.mm.yyyy') dt
,1 level
union all
select c0.dt + 1 dt
,level + 1
from cte0 c0
where level + 1 <= 337 +
to_date('01.03.'||extract(year from current_date), 'dd.mm.yyyy')-
to_date('01.02.'||extract(year from current_date), 'dd.mm.yyyy')
)
select c0.dt
from cte0 c0
--4
--display empty value
with recursive cte0 as (
select null col
,'a,b,c,,d' st
,1 level
union all
select regexp_substr(c0.st, '(.*?)(,|$)', 1, level, '', 1)
,c0.st
,level + 1 level
from cte0 c0
where level <= regexp_count(c0.st, '(,|$)')
)
select c0.col
from cte0 c0
offset 1 row
--do not display empty value
with recursive cte0 as (
select null col
,'a,b,c,,d' st
,1 level
union all
select regexp_substr(c0.st, '[^,]+', 1, level) col
,c0.st
,level + 1 level
from cte0 c0
where regexp_instr(c0.st, '[^,]+', 1, level) <> 0
)
select c0.col
from cte0 c0
offset 1 row