parsetds.py: Extract SQL commands and login credentials from TDS (MS-SQL) TCP data

########################################################
# Python script to parse (MS-SQL) TDS data streams and #
# extract SQL statements and TDS7 login credentials.   #
#                                                      #
# parsetds.py v2013.03.25                              #
# https://malwaremusings.com/scripts/parsetds-py        #
#                                                      #
# usage: parsetds.py <tcpflowfilename>                 #
#   where tcpflowfilename is a file containing the TCP #
#   data from an MS-SQL (or other) TDS based           #
#   network connection.                                #
########################################################

#
# Microsoft's TDS implementation documentation
# http://msdn.microsoft.com/en-us/library/dd304523.aspx
#
# Description of the TDS packet type 0x17 packets
# http://social.msdn.microsoft.com/Forums/da-DK/os_windowsprotocols/thread/bfb8b14e-3696-4c3d-848f-068b6918b70f
#

import sys
import struct

tdsfile = open(sys.argv[1],"r")
tdsdata = tdsfile.read()

# Offset from the start of the TDS stream
off = 0

# Variable to build the SQLBatch SQL command string
# as the command string may span TDS packets
d = ""

# Until we reach the end of the TDS data stream...
while (off < len(tdsdata)):
  # get the packet type of the next packet
  pkttype = ord(tdsdata[off])

  ###
  # A. Check for TLS packet.
  #    If we find one, skip it, as we won't be able to decode the data
  ###

  if (pkttype == 0x17):
    tlsver = struct.unpack(">H",tdsdata[off + 1:off + 1 + 2])[0]
    pktlen = struct.unpack(">H",tdsdata[off + 3:off + 3 + 2])[0] + 5  # + 5 as len doesn't include TLS packet header

    #print("TLS Pkt Ver: %x" % tlsver)
  else:
    ###
    # B. Get the various packet header variables
    #    (packet status, length, spid (TDS documentation)/channel number (Wireshark), packet ID, and window)
    ###

    pktstat = ord(tdsdata[off + 1])
    pktlen = struct.unpack(">H",tdsdata[off + 2:off + 2 + 2])[0]
    pktspid = struct.unpack(">H",tdsdata[off + 4:off + 4 + 2])[0]
    pktpktn = ord(tdsdata[off + 6])
    pktwin = ord(tdsdata[off + 7])

    #print("TDS Pkt Type: %d" % pkttype)

    ###
    # C. Get the TDS packet's data payload and check the packet type
    ###

    tdspkt = tdsdata[off + 8:off + pktlen]

    ###
    # D. Handle SQL Batch packet
    ###

    if (pkttype == 0x01):
      # SQL Batch
      #  ALL_HEADERS
      #  Data

      if (pktpktn == 1):
        # ALL_HEADERS
        ah_totlen = struct.unpack("<L",tdspkt[0:4])[0]
        ahoff = 4
        while (ahoff < ah_totlen):
          ah_hdrlen = struct.unpack("<L",tdspkt[ahoff:ahoff + 4])[0]
          ah_hdrtyp = struct.unpack("<H",tdspkt[ahoff + 4:ahoff + 6])[0]
          ah_hdrdat = tdspkt[ahoff + 6:ahoff + ah_hdrlen]

          if (ah_hdrtyp == 0x02):
            transdesc = struct.unpack("<Q",ah_hdrdat[0:8])[0]
            outreqcnt = struct.unpack("<L",ah_hdrdat[8:12])[0]
          ahoff += ah_hdrlen
        d = tdspkt[ah_totlen:].decode('utf-16')
      else:
        d += tdspkt[0:].decode('utf-16')
      if (pktstat & 0x01): print "SQLBatch:%s" % d

    ###
    # E. Handle TDS7 Login packet
    ###

    if (pkttype == 0x10):

      ###
      # E.1 get variables from TDS7 Login packet header
      ###

      login_totlen = struct.unpack("<L",tdspkt[0:4])[0]
      login_tdsver = struct.unpack("<L",tdspkt[4:8])[0]
      login_clntver = struct.unpack("<L",tdspkt[12:16])[0]
      login_tz = struct.unpack("<L",tdspkt[28:32])[0]

      #print("DBG: totlen %d\ttdsver %x\tclntver %x\ttz %d" % (login_totlen,login_tdsver,login_clntver,login_tz))

      ###
      # E.2 extract each of the variable length values
      #     (client name, user name, password, application name, server name, library name, database name)
      ###

      login_lenoff = tdspkt[36:86]
      login_clntnameoff = struct.unpack("<H",login_lenoff[0:2])[0]
      login_clntnamelen = struct.unpack("<H",login_lenoff[2:4])[0]
      #print("DBG: clntname %d:%d" % (login_clntnameoff,login_clntnamelen))
      login_clntname = tdspkt[login_clntnameoff:login_clntnameoff + (login_clntnamelen * 2)].decode('utf-16').encode(errors = 'backslashreplace')

      login_usernameoff = struct.unpack("<H",login_lenoff[4:6])[0]
      login_usernamelen = struct.unpack("<H",login_lenoff[6:8])[0]
      #print("DBG: username %d:%d" % (login_usernameoff,login_usernamelen))
      login_username = tdspkt[login_usernameoff:login_usernameoff + (login_usernamelen * 2)].decode('utf-16').encode(errors = 'backslashreplace')

      login_passwdoff = struct.unpack("<H",login_lenoff[8:10])[0]
      login_passwdlen = struct.unpack("<H",login_lenoff[10:12])[0]
      #print("DBG: passwd %d:%d" % (login_passwdoff,login_passwdlen))
      login_passwd = tdspkt[login_passwdoff:login_passwdoff + (login_passwdlen * 2)].decode('utf-16').encode(errors = 'backslashreplace')

      login_appnameoff = struct.unpack("<H",login_lenoff[12:14])[0]
      login_appnamelen = struct.unpack("<H",login_lenoff[14:16])[0]
      #print("DBG: appname %d:%d" % (login_appnameoff,login_appnamelen))
      login_appname = tdspkt[login_appnameoff:login_appnameoff + (login_appnamelen * 2)].decode('utf-16').encode(errors = 'backslashreplace')

      login_svrnameoff = struct.unpack("<H",login_lenoff[16:18])[0]
      login_svrnamelen = struct.unpack("<H",login_lenoff[18:20])[0]
      #print("DBG: svrname %d:%d" % (login_svrnameoff,login_svrnamelen))
      login_svrname = tdspkt[login_svrnameoff:login_svrnameoff + (login_svrnamelen * 2)].decode('utf-16').encode(errors = 'backslashreplace')

      login_libnameoff = struct.unpack("<H",login_lenoff[24:26])[0]
      login_libnamelen = struct.unpack("<H",login_lenoff[26:28])[0]
      #print("DBG: libname %d:%d" % (login_libnameoff,login_libnamelen))
      login_libname = tdspkt[login_libnameoff:login_libnameoff + (login_libnamelen * 2)].decode('utf-16').encode(errors = 'backslashreplace')

      login_dbnameoff = struct.unpack("<H",login_lenoff[32:34])[0]
      login_dbnamelen = struct.unpack("<H",login_lenoff[34:36])[0]
      #print("DBG: dbname %d:%d" % (login_dbnameoff,login_dbnamelen))
      login_dbname = tdspkt[login_dbnameoff:login_dbnameoff + (login_dbnamelen * 2)].decode('utf-16').encode(errors = 'backslashreplace')

      print("Login7:%s|%s|%s|%s|%s|%s|%s" % (login_clntname,login_username,login_passwd,login_appname,login_svrname,login_libname,login_dbname))

    ###
    # F. Handle large slab of null bytes caused by tcpflow's handling of
    #    TCP connections reusing the same IP address/TCP port pairs
    ###

    #
    # The TDS documentation does not define a packet type of 0
    # so we *shouldn't* see one in a connection. Hence we'll use this
    # and a packet length of 0, to indicate the problem condition.
    #
    # Unfortunately, this skipping process is awfully slow
    # (it would be better to stop tcpflow from creating such output files)
    #

    if (pkttype == 0x00 and pktlen == 0x00):
      print("Warning: Found tcpflow reused ports problem. Searching file for next TDS message...")
      while (struct.unpack(">Q",tdsdata[off:off + 8]) == 0): off += 8
      while (ord(tdsdata[off]) == 0x00): off += 1
      print("Warning: Resuming")

  ###
  # G. Increment offset to point to start of next TDS packet
  ###

  off += int(pktlen)

2 comments on “parsetds.py: Extract SQL commands and login credentials from TDS (MS-SQL) TCP data

  1. Pingback: Extracting Data From MS-SQL TDS Streams in a libpcap File | Malware Musings

  2. Pingback: A look at some MS-SQL attacks (overview) | Malware Musings

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s