DATA SELECTIVITY ANALYSIS FOR INDEX SELECTION

 

If column returns more than 5% of the table rows,

then query optimiser may not use the index.

 

This stored procedure, will find the selectivity of your data.

Use this to test whether a column is a good candidate for an index.

 

---------------------------------

drop proc usp_IndexSelectivity

go

 

--drop table ##tmpRowHits

--drop table ##tmpRowCount

--usp_IndexSelectivity head_blockers, spid

 

--usp_IndexSelectivity journal_history, custid

 

create proc usp_IndexSelectivity @pvcTblName varchar(100), @pvcColumn varchar(20) as

 

--Author: Keith Biddle

--www.sqlserverportal.com

--Desc:

--Find selectivity of data for a potential index column

--If column rtns > 5% of table rows, then qry optimiser may not use the idx.

--Params:

--Table & column to be assessed.

----------------------------------------------------------------------

 

print 'usp_IndexSelectivity'

print ''

print '--------------------------------------------------'

print 'DATA SELECTIVITY ANALYSIS FOR INDEX SELECTION'

print '--------------------------------------------------'

print ''

 

set nocount on

declare @vcSQL Nvarchar(255)

declare @iTotRows int, @iUniqueValues int

declare @iOnePercent decimal(10,2), @iAvRowsRtnd decimal(10,2)

DECLARE @vcdbname varchar(100)

 

select             @vcdbname = db_name()

 

print 'DATABASE:   ' + @vcdbname

print 'TABLE:      ' + @pvcTblName

print 'COLUMN:     ' + @pvcColumn

print '----------------------------------------'

print ''

 

select @vcSQL = 'select ' + @pvcColumn + ', count(*) as RowHits into ##tmpRowHits from ' + @pvcTblName + ' group by ' + @pvcColumn + ' order by RowHits'

exec sp_executesql @vcSQL

 

print 'Row hits are the number of rows per different column value.'

print 'TotalRowHits has summed all the row hit sub totals, it should be the same as the total table row count.'

print ''

select sum(RowHits) as TotalRowHits from ##tmpRowHits

 

select @vcsql = 'select count(*) as rowcnt into ##tmpRowCount from ' + @pvcTblName

execute( @vcsql )

 

select @iTotRows=rowcnt from ##tmpRowCount

print 'Total table rows'

print '-------------------'

print @iTotRows

print ''

 

print '1% of table'

print '--------------'

set @iOnePercent = @iTotRows /100

print convert(varchar(100),@iOnePercent) + ' rows'

print ''

 

select @iUniqueValues=count(*) from ##tmpRowHits

print 'Number of different values in ' + @pvcTblName

print '---------------------------------------------------------------'

print @iUniqueValues

print ''

 

select @iAvRowsRtnd = @iTotRows / @iUniqueValues

print 'Average return set'

print '--------------------'

print @iAvRowsRtnd

print ''

 

print 'Average return set % of table'

print '------------------------------------'

print @iAvRowsRtnd /@iOnePercent

print ''

 

print 'If different values are > 50,000, then skip full print out'

if (@iUniqueValues > 50000)

            begin

                        print 'Skipped full print out'

                        goto endit

            end

print ''

 

print 'HitPercentOfTable represents the % of the table used by the rows from the specified value.'

print 'Values returning 5% or more of the table, may cause the qry optimiser not to use the index.'

print 'After index creation, check performance using a value which returns a high % of the table.'

print 'Check the execution plan to see if the index is used.'

print ''

select @vcsql = 'select ' + @pvcColumn + ', RowHits , RowHits / ' + CONVERT(varchar(25),@iOnePercent) + ' as HitPercentOfTable from ##tmpRowHits'

execute (@vcsql)

 

endit:

 

drop table ##tmpRowHits

drop table ##tmpRowCount