SQL Server – Query – Like Clause

Introduction

In Database Applications, the SQL Clause (like) is often used to match on partial terms.  The Application developer can allow end-users to search on entries starting on search tags without having to enter the entire item to be searched for.

It is thus a time and keyboard saver.

Puzzling in Microsoft SQL Server v2008-R2, it seems the system sometimes does not utilize an index search even when it seemed that it would have being a better choice.

Create Entities


use [tempdb]
go

if OBJECT_id('dbo.v_student') is not null
begin
	drop view dbo.v_student
end	
go

if OBJECT_id('dbo.student') is not null
begin
	drop table dbo.student
end	
go

if OBJECT_id('dbo.fn_student_zipcode') is not null
begin
	drop function dbo.fn_student_zipcode
end	
go

create table dbo.student
(
	  [id] int not null identity(1,1) 
	, firstname sysname not null
	, lastname sysname not null
	, zipcode varchar(10) null

	, fullname as firstname + ' ' + lastname

	, constraint PK_Student primary key ([id])

)
go

create index idx_Zipcode 
on dbo.student
([zipcode])
go

create index idx_Zipcode_id
on dbo.student
([zipcode])
include
([id])
go

create view dbo.v_student
as

	select 
			  id
			, firstname
			, lastname
			, fullName
			, zipcode
	from   dbo.student
go	

create function dbo.fn_student_zipcode
(
	@zipcode varchar(10)
)
returns table
as
	return
	(
		select 
				  id
				, firstname
				, lastname
				, fullName
				, zipcode
		from   dbo.student
		where  zipcode like @zipcode
	)		
go	

Populate Entities


set nocount on
go

use [tempdb]
go

truncate table dbo.student
go

declare @id int
declare @idMax int

declare @izipcode bigint
declare @strZipCode varchar(10)

set @id =1
set @idMax = 1E7

while (@id < @idMax)
begin

	set @izipcode = RAND() * 10000000
	set @strZipCode = cast(@izipcode as varchar(10))

	insert into dbo.student
	([firstname], [lastname], [zipcode]) 
        values 
        ('bob', 'smith', @strZipcode)

	set @id = @id +1

end	

update statistics dbo.student with fullscan
go

update statistics dbo.student idx_Zipcode with fullscan
go

Queries (Combined)


set nocount on
set statistics io on
set statistics time on
go

use [tempdb]
go

declare @zipcode varchar(10)
declare @zipcodeEnd varchar(11)

set @zipcode = '91819'
set @zipcodeEnd = @zipcode + CHAR(0)

select *
from   dbo.student (nolock)
where  zipcode like '91819' --@zipcode

select *
from   dbo.student (nolock)
where  zipcode like @zipcode

select *
from   dbo.v_student with (nolock)
where  zipcode > @zipcode  
and    zipcode < @zipcodeEnd

select *
from   dbo.fn_student_zipcode(@zipcode)

select *
from   dbo.student (nolock)
where  zipcode like '' + @zipcode
option (OPTIMIZE FOR (@zipcode UNKNOWN))

Query Plan (Combined)

LikeClauseAndIndexes

Hard Coded Valued (column = 'xxxxx') 


set nocount on
set statistics io on
set statistics time on
go

use [tempdb]
go

declare @zipcode varchar(10)
declare @zipcodeEnd varchar(11)

set @zipcode = '91819'
set @zipcodeEnd = @zipcode + CHAR(0)

select *
from   dbo.student (nolock)
where  zipcode like '91819' 

Result: 

zipcode equal-to hardcode value
Explanation:

  • Uses index – Index seek on zipCode

Column like Variable 


set nocount on
set statistics io on
set statistics time on
go

use [tempdb]
go

declare @zipcode varchar(10)
declare @zipcodeEnd varchar(11)

set @zipcode = '91819'
set @zipcodeEnd = @zipcode + CHAR(0)

select *
from   dbo.student (nolock)
where  zipcode like @zipcode

Result: 

zipcode like variable
Explanation:

  • Does not use Index; it is performing a Clustered Index Scan
  • Spending time on Parallelism, as well

Against Database View, Column like Variable 


set nocount on
set statistics io on
set statistics time on
go

use [tempdb]
go

declare @zipcode varchar(10)
declare @zipcodeEnd varchar(11)

set @zipcode = '91819'
set @zipcodeEnd = @zipcode + CHAR(0)

select *
from   dbo.v_student (nolock)
where  zipcode like @zipcode

Result: 

zipcode like variable (view) 

Explanation:

  • As a view, our query is not fairing any better
  • Does not use Index; it is performing a Clustered Index Scan
  • Spending time on Parallelism, as well

Against Table Value Function, Column like Variable 


set nocount on
set statistics io on
set statistics time on
go

use [tempdb]
go

declare @zipcode varchar(10)

set @zipcode = '91819'

select *
from   dbo.fn_student_zipcode(@zipCode)
where  zipcode like @zipcode

Result: 

zipcode like variable (TVF) 
Explanation:

  • As a Table Value Function, the proverbial needle did not move any
  • Does not use Index; it is performing a Clustered Index Scan
  • Spending time on Parallelism, as well

Using a Query Hint (Optimize for)


set nocount on
set statistics io on
set statistics time on
go

use [tempdb]
go

declare @zipcode varchar(10)

set @zipcode = '91819'

select *
from   dbo.student
where  zipcode like @zipcode
option ( optimize for (@zipcode UNKNOWN) )

Result: 

zipcode like variable (optimize for) -v2

Explanation:

  • Next tried a query hint; specifically OPTIMIZE for unknown
  • Does not use Index; it is still performing a Clustered Index Scan
  • Spending time on Parallelism, as well

Obviously, nothing we tried worked!

Checked Statistics

I thought may be it is Statistics; I thus used Management Studio to check our two statistics

Statistics -- idx_Zipcode

2nd stats

Statistics -- idx_Zipcode

Statistics -- idx_Zipcode_id

The statistics looked well distributed enough.

Stored Procedures

Stored Procedures Code:


use [tempdb]
go

if OBJECT_ID('dbo.usp_student_zipcode') is null
begin

   exec ('create procedure dbo.usp_student_zipcode as 
               select 1/0 as [undeclared] ')

end
go

alter procedure dbo.usp_student_zipcode
(
	@zipcode varchar(10)
)
as

     select 
	  id
	, firstname
	, lastname
	, fullName
	, zipcode
     from   dbo.student
     where  zipcode like @zipcode + '%'

go	

</code></pre>
<h3>Execute Stored Procedure:</h3>
<pre><code>
use [tempdb]
go

declare @zipcode varchar(10)
set @zipcode = 
exec  dbo.usp_student_zipcode
	@zipcode varchar(10)

Output:

zipcode like variable (Stored Procedure)

Explanation:

  • As a Stored Procedure, we returned to the joys of our youth; that is our initial try with hard-coded values

Here is the Statistics I/O Output:

SQLLike -- Statistics IO

Alternatives

There are alternatives that you can consider:

  • Full Text Search

Next

Not really sure what it is going on yet.

I need some more work with sqlcmd and may be other programming languages. As it stands now only stored procedures is using our index.

References:

References (Stored Procedures with recompile):

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s