SQL Server – Nice Heaps?

Preface

Taking a look at Query Plans and found an interesting one that I wanted to talk about.

It has to do with Heaps.

 

Data model

Here is our nice and simple data-model.

Diagram_Worktable

Code

DDL

DDL – WorkTable.Person


if schema_id('worktable') is null
begin

  exec ('create schema [worktable] authorization [dbo]')
	
end
go

/*
   drop table [worktable].[person];
   exec sp_help '[worktable].[person]'
	
*/

if object_id('[worktable].[person]') is not null
begin

   set noexec on
	
end
go

create table [worktable].[person]
(
   [id] bigint not null
     identity(1,1)

  , [name] nvarchar(200) not null
		
  , [email] nvarchar(200) not null

  , [addedBy] sysname not null
	constraint [defaultWorktablePersonAddedBy] 
	  default SYSTEM_USER

  , [addedOn] smalldatetime not null
	constraint [defaultWorktableEmployeeAddedOn] 
	  default getdate()
			
  , constraint PK_WORKTABLE_PERSON
	primary key
	  (
	    [id]
	  )					

)
go

set noexec off
go

DDL – WorkTable.Attribute



if schema_id('worktable') is null
begin

   exec ('create schema [worktable] authorization [dbo]')
	
end
go

/*
  drop table [worktable].[attribute];
  exec sp_help '[worktable].[attribute]'
*/

if object_id('[worktable].[attribute]') is not null
begin

   set noexec on
	
end
go

create table [worktable].[attribute]
(

     [id] int not null
 	identity(1,1)

   , [name] nvarchar(200) not null

	
   , [datatype] int not null
	
	/*
  	  select * 
	  from sys.types
	  where  name = 'nvarchar'
	*/
	constraint [defaultWorktableAttributeDatatype] 
		default 231
			
  , [addedBy] sysname not null
	constraint [defaultWorktableAttributeAddedBy] 
	   default SYSTEM_USER

  , [addedOn] smalldatetime not null
	constraint [defaultWorktableAttributeAddedOn] 
	   default getdate()
			
   , constraint PK_WORKTABLE_ATTRIBUTE
	primary key
	  (
	     [id]
	   )					

   , constraint uniqueName
	unique
	  (
	    [name]
	  )					

)
go

set noexec off
go


DDL – WorkTable.PersonAttribute


if schema_id('worktable') is null
begin

	exec ('create schema [worktable] authorization [dbo]')
	
end
go

/*
	drop table [worktable].[personAttribute];
	exec sp_help '[worktable].[personAttribute]'
	
*/

if object_id('[worktable].[personAttribute]') is not null
begin

   set noexec on
	
end
go

create table [worktable].[personAttribute]
(

    [personID] bigint not null
	
  , [attributeID] int not null
	
  , [value] nvarchar(4000)
			
  , [addedBy] sysname not null
	constraint [defaultWorktablePersonAttributeAddedBy] 
		default SYSTEM_USER

  , [addedOn] smalldatetime not null
	constraint [defaultWorktablePersonAttributeAddedOn] 
	  default getdate()
			
  , constraint PK_WORKTABLE_PERSONATTRIBUTE
	primary key 
	  nonclustered
	    (
	        [personID]
	      , [attributeID]
	    )					

   , constraint FK_WORKTABLE_PERSON
	foreign key
	  (
	     [personID]
	  )
	references [worktable].[person]
	  (
	    [id]
	  )

  , constraint FK_WORKTABLE_PERSONATTRIBUTE
	foreign key
	  (
	    [attributeID]
	  )
	references [worktable].[attribute]
 	 (
	    [id]
	 )
												
)
go

set noexec off
go

Populate Tables


set nocount on;
go

truncate table [worktable].[personAttribute]
delete from [worktable].[attribute]
delete from [worktable].[person]
go

/*
  http://stackoverflow.com/questions/472578/dbcc-checkident-sets-identity-to-0
  Answer by Zyphrax
*/
IF EXISTS 
(
	SELECT last_value, * 
	FROM   sys.identity_columns 
	WHERE  OBJECT_ID = object_id('[worktable].[person]')
	AND    last_value IS NOT NULL
) 
begin

    DBCC CHECKIDENT ('[worktable].[person]', RESEED, 0)
		with no_infomsgs

end    

IF EXISTS 
(
   SELECT last_value, * 
   FROM   sys.identity_columns 
   WHERE  OBJECT_ID = object_id('[worktable].[attribute]')
   AND    last_value IS NOT NULL
) 
begin

    DBCC CHECKIDENT ('[worktable].[attribute]', RESEED, 0)
		with no_infomsgs    
		
end    

insert into [worktable].[attribute]
(
    [name]
)
select 'First Car'
UNION
select 'Favorite Color'
UNION
select 'Favorite City'
UNION
select 'Name of Favorite Cousin'
UNION
select 'Childhood Street Name'
UNION
select 'Childhood Nickname'
UNION
select 'Movie Actor Crush'
UNION
select 'Action Europe'
UNION
select 'Email Provider'
go

/* Add 10,000 entries using go 10000 */	
insert into [worktable].[person]
(
    [name]
  , [email]
)
select
   replace(newid(), '-', '')
   , replace(newID(), '-', '')
go 10000


declare @iPersonID bigint
declare @iNumberofPersons bigint

declare @iAttributeID int
declare @iNumberofAttributes int


set @iPersonID = 1
set @iNumberofPersons = 
	( 
		select max([id]) 
		from [worktable].[person] 
	)

set @iAttributeID = 1
set @iNumberofAttributes = 
	( 
		select max([id]) 
		from [worktable].[attribute] 
	)


while (@iPersonID <= @iNumberofPersons)
begin

   set @iAttributeID 
	= cast(
		rand() * @iNumberofAttributes
		 as int
	      )

	if (
               (@iAttributeID < 1) or (@iAttributeID > @iNumberofAttributes)
	   )
	begin
	   set @iAttributeID = 1
	end					
	
	insert into worktable.personAttribute
	(
	    [personID]
	  , [attributeID]
	  , [value]
	)		
	select 
   	     @iPersonID
	   , @iAttributeID
	   , replace(newid(), '-', '')
		
	set @iPersonID = @iPersonID + 1

end

go

Get Data


set nocount on
go
	
declare @personID bigint
declare @personIDMax bigint

set @personID = 500

if (@personID is null)
begin
	
   set @personIDMax
	=
	   (
	      SELECT cast(last_value as bigint)
	      FROM   sys.identity_columns 
	      WHERE  OBJECT_ID = object_id('[worktable].[person]')
	   )					

	set @personID = cast(rand() * @personIDMax as int)

	print replicate('*', 80)

end	

select 
	  tblP.[id]
	, tblPA.[value]
		
from   [worktable].[person] tblP

         inner join [worktable].[personAttribute] tblPA
         
	    on tblP.[id] = tblPA.[personID]

where  tblP.[id] = @personID


Query Plan

NonClustered-QueryPlan

Statistics I/O

NonClustered-IOStats

 

Quick Explanation

  1. Query Plan
    • There are two Nested Loops
      • The first joins the result of the Index Seek and RID Lookup on the personAttribute table
      • The second joins our two tables, person and personAttribute
  2. Statistics I/O
    1. On the personAttribute table, we have a single “scan count” and 3 logical reads
    2. On the person table, we have 2 logical reads

 

Simplify Query Plan

The query is quite simple, a straight forward join.

Let us see how we can simplify it.

The things that stand out are:

  1. Multiple Nested Loops
  2. RID Lookup

 

Covering Index

Let us add a covering index.

Basically a covering index adds often mentioned columns as “included columns” to the main keys of an index.

 

DDL – Add


if 
 (
      object_id('[worktable].[personAttribute]') is not null
      and  not exists
           (
	     select 1
	     from   sys.indexes tblSI
	     where  tblSI.object_id 
	       = object_id('[worktable].[personAttribute]')
	     and    tblSI.name	
	       = 'INDX_PERSONID_ATTRIBUTEID_IncCoveringCols'							
           )
 )
 begin
	
    create index [INDX_PERSONID_ATTRIBUTEID_IncCoveringCols]
    on [worktable].[personAttribute]
    (
        [personID]
      , [attributeID]
    )
    include
    (
       [value]
    )
 end			  

go


Review Index

Let us review our indexes.

sp_helpindex ( Microsoft )


exec sp_helpindex '[worktable].[personAttribute]'

Output:

AddedCoveringIndex

 

sp_helpindex2 ( Percy Reyes)

Microsoft’s implementation of sp_helpindex is dated as it does not reflect included columns nor the size of each index.

I much prefer Percy Reyes’s implementation:

SQL Server Index Report with Included Columns, Storage and more for all Tables in a Database
https://www.mssqltips.com/sqlservertip/3450/sql-server-index-report-with-included-columns-storage-and-more-for-all-tables-in-a-database/


   exec sp_helpindex2 
	 @schemaName = 'worktable'
	,@tableName = 'personAttribute'


Output:

AddedCoveringIndex-spHelpIndex2

 

Explanation:
  1. For Index INDX_PERSONID_ATTRIBUTEID_IncCoveringCols
    • We see the Included Column noted as value in the the “IncludedColumns” column
    • The Index’s size is 0.9 MB while the primary key is only 0.2 MB

 

 

Convert Non-Clustered Index to Clustered Index

Our primary key was defined as non-clustered.

We can drop existing primary key/non-clustered index and replace it with a clustered index/primary key.

As this is only a practice session and we are prejudiced towards comparative analysis, we will not throw away the original design, but create a parallel one.

DDL – Add

Let us create our clustered table.


if schema_id('worktable') is null
begin
  exec ('create schema [worktable] authorization [dbo]')
end
go

/*
  drop table [worktable].[personAttributeClustered];
*/

if object_id( '[worktable].[personAttributeClustered]')
      is not null
begin
  set noexec on
end
go

create table [worktable].[personAttributeClustered]
(

  [personID] bigint not null
	
 , [attributeID] int not null
	
, [value] nvarchar(4000)
			
  , [addedBy] sysname not null
	constraint
         [defaultWorktablePersonClusAttributeAddedBy] 
	 default SYSTEM_USER

  , [addedOn] smalldatetime not null
        constraint
         [defaultWorktablePersonClusAttributeAddedOn] 
	 default getdate()
			
  , constraint PK_WORKTABLE_PERSONATTRIBUTEClustered
	primary key 
		--nonclustered
		clustered
		(
		    [personID]
		  , [attributeID]
		)					

	, constraint FK_WORKTABLE_PERSONClustered
		foreign key
		(
		  [personID]
		)
		references [worktable].[person]
		(
		  [id]
		)

	, constraint FK_WORKTABLE_PERSONATTRIBUTEClustered
		foreign key
		(
		  [attributeID]
		)
		references [worktable].[attribute]
		(
		  [id]
		)
							
)
go

set noexec off
go

<span style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; line-height: 1.5;">

DDL – Add Data

Let us add data to our clustered table.

We can do so via code similar to our original sample, but it is likely simpler to just your insert into/select from.


    set nocount on
    go

    truncate table [worktable].[personAttributeClustered]
    go

    insert into [worktable].[personAttributeClustered]
    (
      [personID]
     ,[attributeID]
     ,[value]
     ,[addedBy]
     ,[addedOn]
    )
    select 
      [personID]
     ,[attributeID]
     ,[value]
     ,[addedBy]
     ,[addedOn]
   from   worktable.personAttribute
   go

Re-issue Query



set nocount on
go

declare @personID bigint
declare @personIDMax bigint

set @personID = 500

if (@personID is null)
begin
		
   set @personIDMax
	=
	  (
	    SELECT cast(last_value as bigint)
	    FROM   sys.identity_columns 
	    WHERE  OBJECT_ID 
                    = object_id('[worktable].[person]')
	  )					

	set @personID = cast(rand() * @personIDMax as int)

	print replicate('*', 80)

end	


select /* non-clustered */
	  tblP.[id]
	, tblPA.[value]
		
from   [worktable].[person] tblP

         inner join [worktable].[personAttribute] tblPA
		with (INDEX = PK_WORKTABLE_PERSONATTRIBUTE)
         
	      on tblP.[id] = tblPA.[personID]

where  tblP.[id] = @personID



print replicate('*', 80)

select  /* non-clustered coverig index*/
	  tblP.[id]
	, tblPA.[value]
		
from   [worktable].[person] tblP

         inner join [worktable].[personAttribute] tblPA
         
		on tblP.[id] = tblPA.[personID]

where  tblP.[id] = @personID



print replicate('*', 80)

select  /* clustered */
	  tblP.[id]
	, tblPAC.[value]
		
from   [worktable].[person] tblP

         inner join [worktable].[personAttributeClustered] 
            tblPAC

           on tblP.[id] = tblPAC.[personID]

where  tblP.[id] = @personID


Query Plan

Query Plan – Non-Clustered/Index Hint-Primary Key

NonClustered-PrimaryKey

 

Query Plan – Non-Clustered/Index Covering

NonClustered-CoveringIndex

Query Plan – Clustered Index

Clustered-PrimaryKey

 

Statistics I/O

StatisticsIO

 

Quick Explanation

  1. Query Plan
    • The Query Cost for our original query is 43% of the total
    • While the subsequent queries are both at 29%
  2. Statistics I/O
    • The query that we have targeting ( through Index hint) the original Index is taking 3 up logical reads from personAttribute table
    • And, the other queries are only using 2 logical reads

 

Index Metadata

Let us quickly review the size\cost of our indexes.


exec sp_helpindex2 
		  @schemaName = 'worktable'
		, @tableName = 'personAttribute'
		
exec sp_helpindex2 
		  @schemaName = 'worktable'
		, @tableName = 'personAttributeClustered'	

Output:

IndexMetadata

 

Quick Explanation

  1. Clustered Indexes (CI)
    • Clustered Keys are far more use useful than non-clustered indexes.
  2. Non-Clustered Indexes (NCI)
    • NCIs have to use “RID Lookup” to access data outside of their core and included columns

Please read more on your own…

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s