1、数据结构

假设我们依据以下表结构存储城市及城市间距相关数据:


IF OBJECT_ID('dbo.Roads') IS NOT NULL
  DROP TABLE dbo.Roads;
GO
IF OBJECT_ID('dbo.Cities') IS NOT NULL
  DROP TABLE dbo.Cities;
GO

CREATE TABLE dbo.Cities
(
  cityid  CHAR(3)     NOT NULL PRIMARY KEY,
  city    VARCHAR(30) NOT NULL,
  region  VARCHAR(30) NULL,
  country VARCHAR(30) NOT NULL
);

CREATE TABLE dbo.Roads
(
  city1       CHAR(3) NOT NULL REFERENCES dbo.Cities,
  city2       CHAR(3) NOT NULL REFERENCES dbo.Cities,
  distance INT     NOT NULL,
  PRIMARY KEY(city1, city2),
  CHECK(city1 < city2),
  CHECK(distance > 0)
);

并插入以下测试数据:

INSERT INTO dbo.Cities(cityid, city, region, country) VALUES
  ('ATL', 'Atlanta', 'GA', 'USA'),
  ('ORD', 'Chicago', 'IL', 'USA'),
  ('DEN', 'Denver', 'CO', 'USA'),
  ('IAH', 'Houston', 'TX', 'USA'),
  ('MCI', 'Kansas City', 'KS', 'USA'),
  ('LAX', 'Los Angeles', 'CA', 'USA'),
  ('MIA', 'Miami', 'FL', 'USA'),
  ('MSP', 'Minneapolis', 'MN', 'USA'),
  ('JFK', 'New York', 'NY', 'USA'),
  ('SEA', 'Seattle', 'WA', 'USA'),
  ('SFO', 'San Francisco', 'CA', 'USA'),
  ('ANC', 'Anchorage', 'AK', 'USA'),
  ('FAI', 'Fairbanks', 'AK', 'USA');

INSERT INTO dbo.Roads(city1, city2, distance) VALUES
  ('ANC', 'FAI',  359),
  ('ATL', 'ORD',  715),
  ('ATL', 'IAH',  800),
  ('ATL', 'MCI',  805),
  ('ATL', 'MIA',  665),
  ('ATL', 'JFK',  865),
  ('DEN', 'IAH', 1120),
  ('DEN', 'MCI',  600),
  ('DEN', 'LAX', 1025),
  ('DEN', 'MSP',  915),
  ('DEN', 'SEA', 1335),
  ('DEN', 'SFO', 1270),
  ('IAH', 'MCI',  795),
  ('IAH', 'LAX', 1550),
  ('IAH', 'MIA', 1190),
  ('JFK', 'ORD',  795),
  ('LAX', 'SFO',  385),
  ('MCI', 'ORD',  525),
  ('MCI', 'MSP',  440),
  ('MSP', 'ORD',  410),
  ('MSP', 'SEA', 2015),
  ('SEA', 'SFO',  815);
GO



2、分析与解决

要计算出Roads的所有路径之间的最短距离,需要先将它转换为有向图,并为每个边生成两个有向边(Roads表中保存的是城市间单边及其间距,这些数据并不能组成整个有效的有向图):  

SELECT city1 AS from_city, city2 AS to_city FROM dbo.Roads
  UNION ALL
  SELECT city2, city1 FROM dbo.Roads

例如:无向图中的边(JFK,ATL)转换出有向图的两条边(JFK,ATL)与(ATL,JFK)。前者表示从纽约(New York)到亚特兰大(Atlanta)的道路,后者表示从亚特兰大(Atlanta)到纽约(New York)的道路。

因为Roads是一个有环图,需要有检测环的逻辑,以避免重复遍历环路径。有了从无向图生成有向图和环检测方法,就有了生成道路的传递闭包所需的全部工具。(传递:如果输入图包含了边(a , b)和(b , c),则a和c具有传递性。)

生成Roads的传递闭包:

;With Roads2 as( --为每对节点生成两条路径(f=>t,t=>f)
 select city1 from_city,city2 to_city,distance
 from Roads(nolock) 
 union all
 select city2,city1,distance from Roads(nolock) 
),
RoadsPaths as(
	--返回所有第1级直接可达的节点对
	select from_city, to_city,
		--用于识别环的路径
		CAST('.' + from_city + '.' + to_city + '.' as varchar(MAX)) as path
	from Roads2
	
	union all
	
	--返回下一级可达的节点对
	select F.from_city,T.to_city,
		CAST(F.path + T.to_city + '.' as varchar(Max))
	from RoadsPaths F	--from
	inner join Roads2 T --to
		--如果在from_city的路径中已存在 to_city ,则检测到环,不应再继续
		on case when F.path like '%.' + T.to_city + '.%' 
			then 1 else 0 end=0
		and F.to_city=T.from_city
)
--返回Roads的传递闭包
select distinct from_city,to_city
from RoadsPaths

Roads2 CTE负责从Roads创建有向图;RoadsPaths CTE返回所有可能的源节点/目标节点对(可能产生较大开销),不会重复返回和遍历检测到环的路径。外部查询返回所有不重复的源节点/目标节点对。


如下代码返回所有路径和距离:

;With Roads2 as( --为每对节点生成两条路径(f=>t,t=>f)
 select city1 from_city,city2 to_city,distance
 from Roads(nolock) 
 union all
 select city2,city1,distance from Roads(nolock) 
),
RoadsPaths as(
	--返回所有第1级直接可达的节点对
	select from_city, to_city,distance,
		--用于识别环的路径
		CAST('.' + from_city + '.' + to_city + '.' as varchar(MAX)) as path
	from Roads2
	
	union all
	
	--返回下一级可达的节点对
	select F.from_city,T.to_city,F.distance + T.distance,
		CAST(F.path + T.to_city + '.' as varchar(Max))
	from RoadsPaths F	--from
	inner join Roads2 T --to
		--如果在from_city的路径中已存在 to_city ,则检测到环,不应再继续
		on case when F.path like '%.' + T.to_city + '.%' 
			then 1 else 0 end=0
		and F.to_city=T.from_city
)
--返回Roads所有路径和距离
select *
from RoadsPaths


最后,要返回Roads最短路径,需要取出所有路径中距离最小的节点对:

;With Roads2 as( --为每对节点生成两条路径(f=>t,t=>f)
 select city1 from_city,city2 to_city,distance
 from Roads(nolock) 
 union all
 select city2,city1,distance from Roads(nolock) 
),
RoadsPaths as(
	--返回所有第1级直接可达的节点对
	select from_city, to_city,distance,
		--用于识别环的路径
		CAST('.' + from_city + '.' + to_city + '.' as varchar(MAX)) as path
	from Roads2
	
	union all
	
	--返回下一级可达的节点对
	select F.from_city,T.to_city,F.distance + T.distance,
		CAST(F.path + T.to_city + '.' as varchar(Max))
	from RoadsPaths F	--from
	inner join Roads2 T --to
		--如果在from_city的路径中已存在 to_city ,则检测到环,不应再继续
		on case when F.path like '%.' + T.to_city + '.%' 
			then 1 else 0 end=0
		and F.to_city=T.from_city
),
RoadsMinDist as(--取出每对节点最短距离
	select p.from_city,p.to_city,MIN(p.distance) as minDist
	from RoadsPaths p
	group by p.from_city,p.to_city
)
--返回Roads最短路径
select RP.*
from RoadsMinDist as RMD
	inner join RoadsPaths RP
	on RMD.from_city=RP.from_city
	and RMD.to_city=RP.to_city
	and RMD.minDist=RP.distance


为满足对两个城市间最小距离或者说最短路径的多次查询请求,可以将结果集保存在一个表中,并创建索引,如下所示:

;With Roads2 as( --为每对节点生成两条路径(f=>t,t=>f)
 select city1 from_city,city2 to_city,distance
 from Roads(nolock) 
 union all
 select city2,city1,distance from Roads(nolock) 
),
RoadsPaths as(
	--返回所有第1级直接可达的节点对
	select from_city, to_city,distance,
		--用于识别环的路径
		CAST('.' + from_city + '.' + to_city + '.' as varchar(MAX)) as path
	from Roads2
	
	union all
	
	--返回下一级可达的节点对
	select F.from_city,T.to_city,F.distance + T.distance,
		CAST(F.path + T.to_city + '.' as varchar(Max))
	from RoadsPaths F	--from
	inner join Roads2 T --to
		--如果在from_city的路径中已存在 to_city ,则检测到环,不应再继续
		on case when F.path like '%.' + T.to_city + '.%' 
			then 1 else 0 end=0
		and F.to_city=T.from_city
),
RoadsMinDist as(--取出每对节点最短距离
	select p.from_city,p.to_city,MIN(p.distance) as minDist
	from RoadsPaths p
	group by p.from_city,p.to_city
)
--返回Roads最短路径
select RP.*
into dbo.RoadPaths --插入创建的表
from RoadsMinDist as RMD
	inner join RoadsPaths RP
	on RMD.from_city=RP.from_city
	and RMD.to_city=RP.to_city
	and RMD.minDist=RP.distance;
	
create unique clustered index idx_uc_from_city_to_city
on dbo.RoadPaths(from_city,to_city);

--查询洛杉矶到纽约之间最短距离
select * from dbo.RoadPaths
where from_city='LAX' and to_city='JFK'



from_city

to_city distance path


LAX

JFK 2945 .LAX.DEN.MCI.ORD.JFK.


参考:SQL Server 2008 技术内幕