Solution to TSQL Challenge 34 - Search for two keywords within the maximum distance of one word between them By CarlosBenito
WITH
CTE_PHRASE
AS
( SELECT TextID
, ' ' + Data + ' '
AS Phrase
FROM dbo.TC34_Phrases
)
,
CTE_Sword
AS
( SELECT SearchID
, LTRIM(RTRIM(Text1))
AS Sword1
, LTRIM(RTRIM(Text2))
AS Sword2
, LEN(LTRIM(RTRIM(Text1)))
AS Sword1_Len
, LEN(LTRIM(RTRIM(Text2)))
AS Sword2_Len
, ' ' + LTRIM(RTRIM(Text1)) + ' '
AS Cword1
, ' ' + LTRIM(RTRIM(Text2)) + ' '
AS Cword2
FROM dbo.TC34_Searches
)
,
CTE_1
AS
( SELECT TextID
-- , Phrase
, SearchID
, Sword1
, Sword2
, Sword1_Len
, Sword2_Len
, SUBSTRING(Phrase, CHARINDEX(Sword1, Phrase, 1) + Sword1_Len + 1, Len(Phrase))
AS Phrase1
, SUBSTRING(Phrase, CHARINDEX(Sword2, Phrase, 1) + Sword2_Len + 1, Len(Phrase))
AS Phrase2
, 1
AS Seq
, 'N'
AS Match
FROM ( SELECT *
FROM CTE_PHRASE
CROSS APPLY ( SELECT *
FROM CTE_Sword
WHERE CHARINDEX(Cword1, Phrase, 1) > 0
AND CHARINDEX(Cword2, Phrase, 1) > 0
) S
) T1
UNION ALL
SELECT TextID
-- , Phrase
, SearchID
, Sword1
, Sword2
, Sword1_Len
, Sword2_Len
, CASE WHEN Seq = 2
THEN CASE WHEN CHARINDEX(Sword1, Phrase1, 1) > 0
THEN SUBSTRING(Phrase1, CHARINDEX(Sword1, Phrase1, 1) + Sword1_Len + 1, Len(Phrase1))
ELSE ''
END
ELSE SUBSTRING(Phrase1, Word1_Start + Word1_Len + 1, Len(Phrase1))
END
AS Phrase1
, CASE WHEN Seq = 2
THEN CASE WHEN CHARINDEX(Sword2, Phrase2, 1) > 0
THEN SUBSTRING(Phrase2, CHARINDEX(Sword2, Phrase2, 1) + Sword2_Len + 1, Len(Phrase2))
ELSE ''
END
ELSE SUBSTRING(Phrase2, Word2_Start + Word2_Len + 1, Len(Phrase2))
END
AS Phrase2
, CASE WHEN Seq = 2
THEN 1
ELSE Seq + 1
END
AS Seq
, CASE WHEN Sword1 = SUBSTRING(Phrase2, Word2_Start, Word2_Len)
OR Sword2 = SUBSTRING(Phrase1, Word1_Start, Word1_Len)
THEN 'Y'
ELSE 'N'
END
AS Match
FROM ( SELECT *
, PATINDEX('%[^ ]%', Phrase1)
AS Word1_Start
, CHARINDEX(' ', Phrase1, PATINDEX('%[^ ]%', Phrase1)) - PATINDEX('%[^ ]%', Phrase1)
AS Word1_Len
, PATINDEX('%[^ ]%', Phrase2)
AS Word2_Start
, CHARINDEX(' ', Phrase2, PATINDEX('%[^ ]%', Phrase2)) - PATINDEX('%[^ ]%', Phrase2)
AS Word2_Len
FROM CTE_1
WHERE Match = 'N'
AND ( Phrase1 <> '' OR Phrase2 <> '' )
) T2
)
SELECT SearchID
, TextID
FROM CTE_1
WHERE Match = 'Y'
ORDER BY SearchID
, TextID
OPTION (MAXRECURSION 32767)
Sr# StartTime Reads Writes CPU Duration
--- ------------------- ----------- ----------- ----------- --------
1 Dec 10 2010 11:08AM 735744 6209 2496 2.758
2 Dec 10 2010 12:19PM 735885 6218 2465 2.672
3 Dec 10 2010 1:30PM 735858 6220 2418 2.751
4 Dec 10 2010 2:42PM 735848 6214 2402 2.704
5 Dec 10 2010 3:52PM 735888 6214 2340 2.658