SQL Server 2005 Emergency Diagnostic and Performance Queries Part 1

Below is a set of troubleshooting and diagnostic queries that will help you quickly identify some common performance and other issues you may run into with a SQL Server 2005 OLTP workload.  I plan on enhancing this with more steps and information in the near future, but this is a good starting point. Many of the DMV queries require VIEW SERVER STATE permission on the server.


-- SQL Server 2005 Emergency Diagnostic and Performance Queries
-- Glenn Berry 3-17-2008

-- Step 1 - Check Task Manager. Are all CPUs above 90-95% for an extended period of time?
-- If yes, run HIGH CPU queries below:

-- Step 2 - Check Performance Monitor
-- SQL Server Buffer Manager: Buffer Cache Hit Ratio and Page Life Expectancy
-- SQL Server Memory Manager: Memory Grants Pending and Memory Grants Pending
-- Physical Disk: Avg disk sec/Read and Avg disk sec/Write

-- Step 3 - Check for locking, blocking and missing indexes
-- Run the BLOCKING queries below: 

-- Step 4 - Is the transaction log full?
-- Run the TRANSACTION LOG FULL queries below:

-- Step 5 - Check for IO Problems
-- Run the IO ISSUES queries below

    -- HIGH CPU *******
    -- Get the most CPU intensive queries

    DECLARE @SpID smallint
    SELECT TOP 25 spid
    FROM master..sysprocesses
    WHERE status = 'runnable'
    AND spid > 50   -- Eliminate system SPIDs
    AND spid <> @@SPID

    OPEN spID_Cursor

    INTO @spID
        WHILE @@FETCH_STATUS = 0
            PRINT 'Spid #:' + STR(@spID)
            EXEC ('DBCC INPUTBUFFER (' + @spID + ')')

               FETCH NEXT FROM spID_Cursor
            INTO @spID

    -- Close and deallocate the cursor
    CLOSE spID_Cursor
    DEALLOCATE spID_Cursor

      -- HIGH CPU *******
      -- Isolate top waits for server instance
      WITH Waits AS
            wait_time_ms / 1000. AS wait_time_s,
            100. * wait_time_ms / SUM(wait_time_ms) OVER() AS pct,
            ROW_NUMBER() OVER(ORDER BY wait_time_ms DESC) AS rn
        FROM sys.dm_os_wait_stats
        WHERE wait_type NOT LIKE '%SLEEP%'
        CAST(W1.wait_time_s AS DECIMAL(12, 2)) AS wait_time_s,
        CAST(W1.pct AS DECIMAL(12, 2)) AS pct,
        CAST(SUM(W2.pct) AS DECIMAL(12, 2)) AS running_pct
      FROM Waits AS W1
      INNER JOIN Waits AS W2
      ON W2.rn <= W1.rn
      GROUP BY W1.rn, W1.wait_type, W1.wait_time_s, W1.pct
      HAVING SUM(W2.pct) - W1.pct < 90 -- percentage threshold
      ORDER BY W1.rn;

      -- HIGH CPU *******
      -- Total waits are wait_time_ms (high signal waits indicates CPU pressure)
      SELECT '%signal (cpu) waits' = CAST(100.0 * SUM(signal_wait_time_ms) / SUM (wait_time_ms) AS NUMERIC(20,2)),
           '%resource waits'= CAST(100.0 * SUM(wait_time_ms - signal_wait_time_ms) / SUM (wait_time_ms) AS NUMERIC(20,2))
      FROM sys.dm_os_wait_stats;

      -- HIGH CPU *******
      -- Check SQL Server Schedulers to see if they are waiting on CPU (runnable tasks above 10 is very bad)
      SELECT scheduler_id, current_tasks_count, runnable_tasks_count
      FROM sys.dm_os_schedulers
      WHERE scheduler_id < 255

      -- HIGH CPU *******
      --  Who is running what at this instant 
      SELECT st.text AS [Command text], login_time, [host_name], 
      [program_name], sys.dm_exec_requests.session_id, client_net_address,
      sys.dm_exec_requests.status, command, db_name(database_id) AS DatabaseName
      FROM sys.dm_exec_requests 
      INNER JOIN sys.dm_exec_connections 
      ON sys.dm_exec_requests.session_id = sys.dm_exec_connections.session_id
      INNER JOIN sys.dm_exec_sessions 
      ON sys.dm_exec_sessions.session_id = sys.dm_exec_requests.session_id
      CROSS APPLY sys.dm_exec_sql_text(sql_handle) AS st
      WHERE sys.dm_exec_requests.session_id >= 51
      AND sys.dm_exec_requests.session_id <> @@spid
      ORDER BY sys.dm_exec_requests.status

      -- HIGH CPU *******
      -- Get a snapshot of current activity
      SELECT LTRIM (st.text) AS 'Command Text',[host_name], der.session_id AS 'SPID',
      der.status, db_name(database_id) AS DatabaseName, ISNULL(der.wait_type, 'None')AS 'Wait Type', der.logical_reads 
      FROM sys.dm_exec_requests AS der
      INNER JOIN sys.dm_exec_connections AS dexc
      ON der.session_id = dexc.session_id
      INNER JOIN sys.dm_exec_sessions AS dexs
      ON dexs.session_id = der.session_id
      CROSS APPLY sys.dm_exec_sql_text(sql_handle) AS st
      WHERE der.session_id >= 51
      AND der.session_id <> @@spid
      ORDER BY der.status

      -- BLOCKING ************
      -- Detect blocking
      SELECT blocked_query.session_id AS blocked_session_id,
      blocking_query.session_id AS blocking_session_id,
      sql_text.text AS blocked_text, sql_btext.text AS blocking_text, waits.wait_type AS blocking_resource
      FROM sys.dm_exec_requests AS blocked_query
      INNER JOIN sys.dm_exec_requests AS blocking_query 
      ON blocked_query.blocking_session_id = blocking_query.session_id
      (SELECT * FROM sys.dm_exec_sql_text(blocking_query.sql_handle)
      ) sql_btext
      (SELECT * FROM sys.dm_exec_sql_text(blocked_query.sql_handle)
      ) sql_text
      INNER JOIN sys.dm_os_waiting_tasks AS waits 
      ON waits.session_id = blocking_query.session_id

      -- BLOCKING ************
      -- Index Contention
      SELECT dbid=database_id, objectname=object_name(s.object_id),
      indexname=i.name, i.index_id, row_lock_count, row_lock_wait_count,
      [block %]= CAST (100.0 * row_lock_wait_count / (1 + row_lock_count) AS NUMERIC(15,2)),
      [avg row lock waits in ms]= CAST (1.0 * row_lock_wait_in_ms / (1 + row_lock_wait_count) AS NUMERIC(15,2))
      FROM sys.dm_db_index_operational_stats (db_id(), NULL, NULL, NULL) AS s
      INNER JOIN sys.indexes AS i
      ON i.object_id = s.object_id
      WHERE objectproperty(s.object_id,'IsUserTable') = 1
      AND i.index_id = s.index_id
      ORDER BY row_lock_wait_count DESC

      -- Find the log reuse description for the transaction log
      SELECT name, database_id, log_reuse_wait_desc 
      FROM sys.databases

      -- Individual File Size query
      SELECT name AS 'File Name' , physical_name AS 'Physical Name', size/128 AS 'Total Size in MB',
      size/128.0 - CAST(FILEPROPERTY(name, 'SpaceUsed') AS int)/128.0 AS 'Available Space In MB'--, *
      FROM sys.database_files;

      -- IO ISSUES *****************
      -- Analyze DB IO, ranked by IO Stall %
            DB_NAME(IVFS.database_id) AS db,
            CASE WHEN MF.type = 1 THEN 'log' ELSE 'data' END AS file_type,
            SUM(IVFS.num_of_bytes_read + IVFS.num_of_bytes_written) AS io,
            SUM(IVFS.io_stall) AS io_stall
        FROM sys.dm_io_virtual_file_stats(NULL, NULL) AS IVFS
            JOIN sys.master_files AS MF
              ON IVFS.database_id = MF.database_id
              AND IVFS.file_id = MF.file_id
        GROUP BY DB_NAME(IVFS.database_id), MF.type
      SELECT db, file_type, 
        CAST(1. * io / (1024 * 1024) AS DECIMAL(12, 2)) AS io_mb,
        CAST(io_stall / 1000. AS DECIMAL(12, 2)) AS io_stall_s,
        CAST(100. * io_stall / SUM(io_stall) OVER()
               AS DECIMAL(10, 2)) AS io_stall_pct,
        ROW_NUMBER() OVER(ORDER BY io_stall DESC) AS rn
      ORDER BY io_stall DESC;

      -- HIGH CPU ************
      -- Get Top 100 executed SP's ordered by execution count
      SELECT TOP 100 qt.text AS 'SP Name', qs.execution_count AS 'Execution Count',  
      qs.execution_count/DATEDIFF(Second, qs.creation_time, GetDate()) AS 'Calls/Second',
      qs.total_worker_time/qs.execution_count AS 'AvgWorkerTime',
      qs.total_worker_time AS 'TotalWorkerTime',
      qs.total_elapsed_time/qs.execution_count AS 'AvgElapsedTime',
      qs.max_logical_reads, qs.max_logical_writes, qs.total_physical_reads, 
      DATEDIFF(Minute, qs.creation_time, GetDate()) AS 'Age in Cache'
      FROM sys.dm_exec_query_stats AS qs
      CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle) AS qt
      WHERE qt.dbid = db_id() -- Filter by current database
      ORDER BY qs.execution_count DESC

      -- HIGH CPU *************
      -- Get Top 20 executed SP's ordered by total worker time (CPU pressure)
      SELECT TOP 20 qt.text AS 'SP Name', qs.total_worker_time AS 'TotalWorkerTime', 
      qs.total_worker_time/qs.execution_count AS 'AvgWorkerTime',
      qs.execution_count AS 'Execution Count', 
      ISNULL(qs.execution_count/DATEDIFF(Second, qs.creation_time, GetDate()), 0) AS 'Calls/Second',
      ISNULL(qs.total_elapsed_time/qs.execution_count, 0) AS 'AvgElapsedTime', 
      qs.max_logical_reads, qs.max_logical_writes, 
      DATEDIFF(Minute, qs.creation_time, GetDate()) AS 'Age in Cache'
      FROM sys.dm_exec_query_stats AS qs
      CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle) AS qt
      WHERE qt.dbid = db_id() -- Filter by current database
      ORDER BY qs.total_worker_time DESC


This entry was posted in SQL Server 2005. Bookmark the permalink.

1 Response to SQL Server 2005 Emergency Diagnostic and Performance Queries Part 1

  1. guru charan says:

    Trying to formalize process of troubleshooting methodology.. feel free to review this article and comment if time permitshttp://www.sqlfundas.com/post/2009/10/06/Troubleshoot-High-CPU-performance-issue-in-SQL-Server-2005-SQL-Server-2008-(Part-3).aspx

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s