1 回答
TA贡献1883条经验 获得超3个赞
您在这里遇到了很多挑战,但我认为它们都是可行的。
我认为您已经解决了第一个问题,即知道给定数据点位于哪些区域中。无论是STContains()或STIntersects()。
第二个是您实际上是在寻找基于时间的连续性集群。假设您具有可靠的数据收集,这也是可以解决的。一旦从上面获得了一组(人,地区,时间戳)元组,这就是一个空白和离岛的问题。玩具解决方案如下:
IF OBJECT_ID('tempdb.dbo.#observations') IS NOT NULL
DROP TABLE #observations;
IF OBJECT_ID('tempdb.dbo.#regions') IS NOT NULL
DROP TABLE #regions;
CREATE TABLE #observations (
ObservationID INT NOT NULL IDENTITY,
CONSTRAINT PK_Observations PRIMARY KEY CLUSTERED (ObservationID),
PersonID INT NOT null,
Point GEOMETRY NOT null,
TS DATETIME2(0) NOT NULL CONSTRAINT DF_Observations_TS DEFAULT SYSUTCDATETIME()
);
CREATE TABLE #regions (
RegionID INT NOT NULL IDENTITY,
CONSTRAINT PK_Regions PRIMARY KEY CLUSTERED (RegionID),
Area GEOMETRY NOT NULL
);
INSERT INTO #regions
(
Area
)
VALUES
( geometry::STGeomFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))', 0) ),
( geometry::STGeomFromText('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))', 0) ),
( geometry::STGeomFromText('POLYGON((0 0, 4 0, 4 4, 0 4, 0 0))', 0) );
INSERT INTO #observations
(
PersonID ,
Point ,
TS
)
VALUES
( 1 ,
geometry::Point(0.5, 0.5, 0) ,
'2018-01-01 00:00:00'
),
( 1 ,
geometry::Point(1.5, 1.5, 0) ,
'2018-01-01 00:00:05'
),
( 1 ,
geometry::Point(2.5, 2.5, 0) ,
'2018-01-01 00:00:10'
),
( 1 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 00:00:15'
),
( 1 ,
geometry::Point(4.5, 4.5, 0) ,
'2018-01-01 00:00:20'
),
( 1 ,
geometry::Point(0.5, 0.5, 0) ,
'2018-01-01 01:00:00'
),
( 1 ,
geometry::Point(1.5, 1.5, 0) ,
'2018-01-01 01:00:05'
),
( 1 ,
geometry::Point(2.5, 2.5, 0) ,
'2018-01-01 01:00:10'
),
( 1 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 01:00:15'
),
( 1 ,
geometry::Point(4.5, 4.5, 0) ,
'2018-01-01 01:00:20'
),
( 2 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 00:00:00'
),
( 2 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 00:00:05'
),
( 2 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 00:00:10'
),
( 2 ,
geometry::Point(3.6, 3.6, 0) ,
'2018-01-01 00:00:15'
),
( 2 ,
geometry::Point(4.5, 4.5, 0) ,
'2018-01-01 00:00:20'
);
WITH cte AS (
SELECT o.ObservationID,
o.PersonID ,
o.TS ,
r.RegionID,
(DATEDIFF(SECOND, '2017-01-01', o.ts)/5) - ROW_NUMBER() OVER (PARTITION BY o.PersonID, r.RegionID ORDER BY o.ts) AS gid,
DATEDIFF(SECOND, '2017-01-01', o.ts)/5 AS diff,
ROW_NUMBER() OVER (PARTITION BY o.PersonID, r.RegionID ORDER BY o.ts) AS rn
FROM #observations AS o
JOIN #regions AS r
ON o.Point.STIntersects(r.Area) = 1
--JOIN #timestamps AS ts
-- ON ts.TS = o.TS
)
SELECT cte.PersonID, cte.RegionID, MIN(ts), MAX(ts)
FROM cte
GROUP BY cte.PersonID ,
cte.RegionID,
cte.gid;
诀窍(如果有的话)是意识到row_number()岛中每个成员的增量为1,并且对于相同的条件,(秒数)/ 5也应该增量为1。因此,对于在同一个岛中具有同等资格的行,它们之间的差异应该是恒定的。这给我们提供了方便的分组依据。
- 1 回答
- 0 关注
- 136 浏览
添加回答
举报