Frontier Software

With Recursive

The Postgresql documentation has various code examples.

The basic structure of a recursive query is

WITH RECURSIVE with_table(init, next, ...) AS (
  SELECT init, next, ... 
  FROM graph_table
    UNION
  SELECT with_table.init, graph_table.next, ... 
  FROM graph_table JOIN with_table ON with_table.next = graph_table.init 
)
SELECT ...
FROM mytable
WHERE init = X
AND next = Y;

The base query can’t reference mytable while the recursive query joins mytable and graph_table to make a transitive closure.

These examples are taken from edX’s Databases: OLAP and Recursion free online course.

I put a Prolog version swish.

The Stanford course makes a digression into how nice it would be if the recursive query could be if the recursive query could be written from mytable R1, mytable R2, but trying that causes Postgresql to barf:

ERROR:  recursive reference to query "mytable" must not appear more than once

Family Tree example

create table ParentOf(parent text, child text);

insert into ParentOf values ('Alice', 'Carol');
insert into ParentOf values ('Bob', 'Carol');
insert into ParentOf values ('Carol', 'Dave');
insert into ParentOf values ('Carol', 'George');
insert into ParentOf values ('Dave', 'Mary');
insert into ParentOf values ('Eve', 'Mary');
insert into ParentOf values ('Mary', 'Frank');

I’ve rewritten the provided example to explicitly use a join between the recursive table’s child and the “normal” table’s parent.

This goes “up” the tree to ancestors (Widden used alias a for parent and alias d for child, but I decluttered by simply using the provided column names).

WITH RECURSIVE ancestor(parent, child) AS (
  SELECT parent, child
  FROM ParentOf
    UNION
  SELECT Ancestor.parent, ParentOf.child
  FROM Ancestor JOIN ParentOf ON Ancestor.child = ParentOf.parent
)
SELECT parent
FROM ancestor
WHERE child = 'Frank';

And this goes “down” the tree:

WITH RECURSIVE ancestor(parent, child) AS (
  SELECT parent, child
  FROM ParentOf
    UNION
  SELECT Ancestor.parent, ParentOf.child
  FROM Ancestor JOIN ParentOf ON Ancestor.child = ParentOf.parent
)
SELECT child
FROM ancestor
WHERE parent = 'Alice';

Paths

Typically we want to get a route. If we know the “start” and “end” nodes, we can get the path like this:

WITH RECURSIVE ancestor(parent, child, path) AS (
  SELECT parent, child, ARRAY[parent, child]
  FROM ParentOf
    UNION
  SELECT Ancestor.parent, ParentOf.child, path || ParentOf.child
  FROM Ancestor JOIN ParentOf ON Ancestor.child = ParentOf.parent
)
SELECT path
FROM ancestor
WHERE parent = 'Alice'
AND child = 'Frank';

If we have a fixed root node, we can hardwire it by moving WHERE parent = 'Alice' to the base case:

WITH RECURSIVE ancestor(parent, child, path) AS (
  SELECT parent, child, ARRAY[parent, child]
  FROM ParentOf
  WHERE parent = 'Alice'
    UNION
  SELECT Ancestor.parent, ParentOf.child, path || ParentOf.child
  FROM Ancestor JOIN ParentOf ON Ancestor.child = ParentOf.parent
)
SELECT path FROM ancestor
WHERE child = 'Frank';

Hierarchy

create table Employee(ID int, salary int);
create table Manager(mID int, eID int);
create table Project(name text, mgrID int);

insert into Employee values (123, 100);
insert into Employee values (234, 90);
insert into Employee values (345, 80);
insert into Employee values (456, 70);
insert into Employee values (567, 60);
insert into Manager values (123, 234);
insert into Manager values (234, 345);
insert into Manager values (234, 456);
insert into Manager values (345, 567);
insert into Project values ('X', 123);
insert into Project values ('Y', 234);
insert into Project values ('Z', 456);

Find total salary cost of project ‘X’

Note the recursive call does not include the starting node (top project manager in this example) so needs to be added.

WITH RECURSIVE superior(mID, eID) AS (
  SELECT mID, eID
  FROM Manager
    UNION
  SELECT superior.mID, Manager.eID
  FROM superior JOIN manager ON superior.eID = manager.mID
)
SELECT sum(salary) FROM (
  SELECT ID, salary FROM Employee WHERE ID = (SELECT mgrID FROM Project WHERE name = 'X')
    UNION
  SELECT eID AS ID, salary
  FROM superior JOIN Employee ON Employee.ID = superior.eID
  WHERE mID = (SELECT mgrID FROM Project WHERE name = 'X')
);

Find total salary cost of project ‘Y’ and ‘Z’

WITH RECURSIVE superior(mID, eID) AS (
  SELECT mID, eID
  FROM Manager
    UNION
  SELECT superior.mID, Manager.eID
  FROM superior JOIN manager ON superior.eID = manager.mID
)
SELECT 'Y-total' AS project_name, sum(salary) FROM (
  SELECT ID, salary FROM Employee WHERE ID = (SELECT mgrID FROM Project WHERE name = 'Y')
    UNION
  SELECT eID AS ID, salary
  FROM superior JOIN Employee ON Employee.ID = superior.eID
  WHERE mID = (SELECT mgrID FROM Project WHERE name = 'Y')
)
UNION
SELECT 'Z-total' AS project_name, sum(salary) FROM (
  SELECT ID, salary FROM Employee WHERE ID = (SELECT mgrID FROM Project WHERE name = 'Z')
    UNION
  SELECT eID AS ID, salary
  FROM superior JOIN Employee ON Employee.ID = superior.eID
  WHERE mID = (SELECT mgrID FROM Project WHERE name = 'Z')
);

Airline flights

create table Flight(orig text, dest text, airline text, cost int);

insert into Flight values ('A', 'ORD', 'United', 200);
insert into Flight values ('ORD', 'B', 'American', 100);
insert into Flight values ('A', 'PHX', 'Southwest', 25);
insert into Flight values ('PHX', 'LAS', 'Southwest', 30);
insert into Flight values ('LAS', 'CMH', 'Frontier', 60);
insert into Flight values ('CMH', 'B', 'Frontier', 60);
insert into Flight values ('A', 'B', 'JetBlue', 195);

First find all costs

WITH RECURSIVE route(orig, dest, total, path) AS (
  SELECT orig, dest, cost, ARRAY[orig, dest]
  FROM Flight 
  UNION
  SELECT route.orig, Flight.dest, total + cost, path || Flight.dest
  FROM Flight JOIN route ON route.dest = flight.orig 
)
SELECT path, total
FROM route
WHERE orig = 'A'
AND dest = 'B'
ORDER BY total;

Guarding against cycles

insert into Flight values ('CMH', 'PHX', 'Frontier', 80);

Using Cycle Detection from the Postgresql documentation as a a template:

WITH RECURSIVE route(orig, dest, is_cycle, total, path) AS (
  SELECT orig, dest, false, cost, ARRAY[orig, dest]
  FROM Flight 
  UNION
  SELECT route.orig, Flight.dest, Flight.dest = ANY(path), total + cost, path || Flight.dest
  FROM Flight JOIN route ON route.dest = flight.orig
  WHERE NOT is_cycle
)
SELECT path, total
FROM route
WHERE orig = 'A'
AND dest = 'B'
ORDER BY total;