Screen Capture for Fun and Profit

In Screen Sharing from a Browser I wrote about how relatively easy it is to display a continuous snapshot of a remote screen, and even send mouse and keyboard events back to it.  That was the essence of modern day browser based screen sharing.  Everything else is about compression for bandwidth management.

In this article, I’ll present the “server” side of the equation.  Since I’ve discovered the ‘sourcecode’ bracket in WordPress, I can even present the code with line numbers.  So, here in its entirety is the server side:

 


local ffi = require "ffi"

local WebApp = require("WebApp")

local HttpRequest = require "HttpRequest"
local HttpResponse = require "HTTPResponse"
local URL = require("url")
local StaticService = require("StaticService")

local GDI32 = require ("GDI32")
local User32 = require ("User32")
local BinaryStream = require("core.BinaryStream")
local MemoryStream = require("core.MemoryStream")
local WebSocketStream = require("WebSocketStream")
local Network = require("Network")

local utils = require("utils")
local zlib = require ("zlib")

local UIOSimulator = require("UIOSimulator")

--[[
	Application Variables
--]]
local ScreenWidth = User32.GetSystemMetrics(User32.FFI.CXSCREEN);
local ScreenHeight = User32.GetSystemMetrics(User32.FFI.CYSCREEN);

local captureWidth = ScreenWidth;
local captureHeight = ScreenHeight;

local ImageWidth = captureWidth;
local ImageHeight = captureHeight;
local ImageBitCount = 16;

local hbmScreen = GDIDIBSection(ImageWidth, ImageHeight, ImageBitCount);
local hdcScreen = GDI32.CreateDCForDefaultDisplay();

local net = Network();

--[[
	Application Functions
--]]
function captureScreen(nWidthSrc, nHeightSrc, nXOriginSrc, nYOriginSrc)
  nXOriginSrc = nXOriginSrc or 0;
  nYOriginSrc = nYOriginSrc or 0;

  -- Copy some of the screen into a
  -- bitmap that is selected into a compatible DC.
  local ROP = GDI32.FFI.SRCCOPY;

  local nXOriginDest = 0;
  local nYOriginDest = 0;
  local nWidthDest = ImageWidth;
  local nHeightDest = ImageHeight;
  local nWidthSrc = nWidthSrc;
  local nHeightSrc = nHeightSrc;

  GDI32.Lib.StretchBlt(hbmScreen.hDC.Handle,
    nXOriginDest,nYOriginDest,nWidthDest,nHeightDest,
    hdcScreen.Handle,
    nXOriginSrc,nYOriginSrc,nWidthSrc,nHeightSrc,
    ROP);

  hbmScreen.hDC:Flush();
end

-- Serve the screen up as a bitmap image (.bmp)
local getContentSize = function(width, height, bitcount, alignment)
  alignment = alignment or 4

  local rowsize = GDI32.GetAlignedByteCount(width, bitcount, alignment);
  local pixelarraysize = rowsize * math.abs(height);
  local filesize = 54+pixelarraysize;
  local pixeloffset = 54;

  return filesize;
end

local filesize = getContentSize(ImageWidth, ImageHeight, ImageBitCount);
local memstream = MemoryStream.new(filesize);
local zstream = MemoryStream.new(filesize);

local writeImage = function(dibsec, memstream)
  --print("printImage")
  local width = dibsec.Info.bmiHeader.biWidth;
  local height = dibsec.Info.bmiHeader.biHeight;
  local bitcount = dibsec.Info.bmiHeader.biBitCount;
  local rowsize = GDI32.GetAlignedByteCount(width, bitcount, 4);
  local pixelarraysize = rowsize * math.abs(height);
  local filesize = 54+pixelarraysize;
  local pixeloffset = 54;

  -- allocate a MemoryStream to fit the file size
  local streamsize = GDI32.GetAlignedByteCount(filesize, 8, 4);

  memstream:Seek(0);

  local bs = BinaryStream.new(memstream);

  -- Write File Header
  bs:WriteByte(string.byte('B'))
  bs:WriteByte(string.byte('M'))
  bs:WriteInt32(filesize);
  bs:WriteInt16(0);
  bs:WriteInt16(0);
  bs:WriteInt32(pixeloffset);

  -- Bitmap information header
  bs:WriteInt32(40);
  bs:WriteInt32(dibsec.Info.bmiHeader.biWidth);
  bs:WriteInt32(dibsec.Info.bmiHeader.biHeight);
  bs:WriteInt16(dibsec.Info.bmiHeader.biPlanes);
  bs:WriteInt16(dibsec.Info.bmiHeader.biBitCount);
  bs:WriteInt32(dibsec.Info.bmiHeader.biCompression);
  bs:WriteInt32(dibsec.Info.bmiHeader.biSizeImage);
  bs:WriteInt32(dibsec.Info.bmiHeader.biXPelsPerMeter);
  bs:WriteInt32(dibsec.Info.bmiHeader.biYPelsPerMeter);
  bs:WriteInt32(dibsec.Info.bmiHeader.biClrUsed);
  bs:WriteInt32(dibsec.Info.bmiHeader.biClrImportant);

  -- Write the actual pixel data
  memstream:WriteBytes(dibsec.Pixels, pixelarraysize, 0);
end

local getSingleShot = function(response, compressed)
  captureScreen(captureWidth, captureHeight);

  writeImage(hbmScreen, memstream);

  zstream:Seek(0);
  local compressedLen = ffi.new("int[1]", zstream.Length);
  local err = zlib.compress(zstream.Buffer,   compressedLen, memstream.Buffer, memstream:GetPosition() );

  zstream.BytesWritten = compressedLen[0];

  local contentlength = zstream.BytesWritten;
  local headers = {
    ["Content-Length"] = tostring(contentlength);
    ["Content-Type"] = "image/bmp";
    ["Content-Encoding"] = "deflate";
  }

  response:writeHead("200", headers);
  response:WritePreamble();
  return response.DataStream:WriteBytes(zstream.Buffer, zstream.BytesWritten);
end

local handleUIOCommand = function(command)

  local values = utils.parseparams(command)

  if values["action"] == "mousemove" then
    UIOSimulator.MouseMove(tonumber(values["x"]), tonumber(values["y"]))
  elseif values["action"] == "mousedown" then
    UIOSimulator.MouseDown(tonumber(values["x"]), tonumber(values["y"]))
  elseif values["action"] == "mouseup" then
    UIOSimulator.MouseUp(tonumber(values["x"]), tonumber(values["y"]))
  elseif values["action"] == "keydown" then
    UIOSimulator.KeyDown(tonumber(values["which"]))
  elseif values["action"] == "keyup" then
    UIOSimulator.KeyUp(tonumber(values["which"]))
  end
end

local startupContent = nil

local handleStartupRequest = function(request, response)
  -- read the entire contents
  if not startupContent then
    -- load the file into memory
    local fs, err = io.open("viewscreen2.htm")

    if not fs then
      response:writeHead("500")
      response:writeEnd();

      return true
    end

    local content = fs:read("*all")
    fs:close();

    -- perform the substitution of values
    -- assume content looks like this:
    -- <!--?hostip? -->:<!--?serviceport?-->
    local subs = {
      ["frameinterval"]	= 300,
      ["hostip"] 			= net:GetLocalAddress(),
      ["capturewidth"]	= captureWidth,
      ["captureheight"]	= captureHeight,
      ["imagewidth"]		= ImageWidth,
      ["imageheight"]		= ImageHeight,
      ["screenwidth"]		= ScreenWidth,
      ["screenheight"]	= ScreenHeight,
      ["serviceport"] 	= Runtime.config.port,
    }
    startupContent = string.gsub(content, "%<%?(%a+)%?%>", subs)
  end

  -- send the content back to the requester
  response:writeHead("200",{["Content-Type"]="text/html"})
  response:writeEnd(startupContent);

  return true
end

--[[
  Responding to remote user input
]]--
local handleUIOSocketData = function(ws)
  while true do
    local bytes, bytesread = ws:ReadFrame()

    if not bytes then
      print("handleUIOSocketData() - END: ", err);
      break
    end

    local command = ffi.string(bytes, bytesread);
    handleUIOCommand(command);
  end
end

local handleUIOSocket = function(request, response)
  local ws = WebSocketStream();
  ws:RespondWithServerHandshake(request, response);

  Runtime.Scheduler:Spawn(handleUIOSocketData, ws);

  return false;
end

--[[
  Primary Service Response routine
]]--
local HandleSingleRequest = function(stream, pendingqueue)
  local request, err  = HttpRequest.Parse(stream);

  if not request then
    -- dump the stream
    --print("HandleSingleRequest, Dump stream: ", err)
    return
  end

  local urlparts = URL.parse(request.Resource)
  local response = HttpResponse.Open(stream)
  local success = nil;

  if urlparts.path == "/uiosocket" then
    success, err = handleUIOSocket(request, response)
  elseif urlparts.path == "/screen.bmp" then
    success, err = getSingleShot(response, true);
  elseif urlparts.path == "/screen" then
    success, err = handleStartupRequest(request, response)
  elseif urlparts.path == "/favicon.ico" then
    success, err = StaticService.SendFile("favicon.ico", response)
  elseif urlparts.path == "/jquery.js" then
    success, err = StaticService.SendFile("jquery.js", response)
  else
    response:writeHead("404");
    success, err = response:writeEnd();
  end

  if success then
    return pendingqueue:Enqueue(stream)
  end
end

--[[
  Start running the service
--]]
local serviceport = tonumber(arg[1]) or 8080

Runtime = WebApp({port = serviceport, backlog=100})

Runtime:Run(HandleSingleRequest);

As a ‘server’ this code is responsible for handling a couple of things. First, it needs to act as a basic http server, serving up relatively static content to get things started. When the user specifies the url http://localhost/screen, the server will respond by sending back the browser code that I showed in the previous article. The function “handleStartupRequest()” performs this operation. The file ‘viewscreen2.htm’ is HTML, but it’s a bit of a template as well. You can delimit a piece to be replaced by enclosing it in a tag such as: . This tag can be replaced by any bit of code that you choose. In this case, I’m doing replacements for the size of the image, the size of the screen, the refreshinterval, and the hostid and port. This last is most important because without it, you won’t be able to setup the websocket.

The other parts are fairly straight forward. Of particular note is the ‘captureScreen()’ function. In Windows, since the dawn of man, there has been GDI for graphics. Good ol’ GDI still has the ability to capture the screen, or a single window, or a portion of the screen. this still works in Windows 8 as well. So, capturing the screen is nothing more that drawing into a DIBSection, and that’s that. Just one line of code.

The magic happens after that. Rather than handing the raw image back to the client, I want to send it out as a compressed BMP image. I could choose PNG, or JPG, or any other format browsers are capable of handling, but BMP is the absolute easiest to deal with, even if it is the most bulky. I figure that since I’m using zlib to deflate it before sending it out, that will be somewhat helpful, and it turns out this works just fine.

The rest of the machinery there is just to deal with being an http server. A lot is hidden behind the ‘WebApp’ and the ‘WebSocket’ classes. Those are good for another discussion.

So, all in, this is about 300 lines of code. Not too bad for a rudimentary screen sharing service. Of course, there’s a supporting cast that runs into the thousands of lines of code, but I’m assuming this as a given since frameworks such as Node and various others exist.

I could explain each and every line of code here, but I think it’s small enough and easy enough to read that won’t be necessary. I will point out that there’s not much difference between sending single snapshots one at a time vs having an open stream and presenting the screen as h.264 or WebM. For that scenario, you just need a library that can capture snapshots of the screen and turn them into the properly encoded video stream. Since you have the WebSocket, it could easily be put to use for that purpose, rather than just receiving the mouse and keyboard events.

Food for thought.

Advertisements

5 Comments on “Screen Capture for Fun and Profit”

  1. Pasi says:

    Hello.

    Where did you get WebSocketStream?

    • I made it myself. I’ll do a writeup on that in a little while after I’ve completed some testing on the client side of it.

      • Pasi says:

        Also a native Lua version in here:

        https://github.com/lipp/lua-websockets

      • Thanks for the pointer. I did have a peek at your code before doing my implementation. Mine is a bit more tailored to my particular environment. I try not to do any table allocations for example, and I write to a generic stream interface, so I can actually create test cases by writing stream exchanges to files and comparing them.

        There is a larger project this bit fits into which has a more specialized socket management scheme as well, so I don’t use luasocket, or copas, or msgpack and the like.

        I will use your code as an interop test though. If my implementation can work with yours, then that will be at least one more proof point that it is relatively correct.

      • Pasi says:

        That is not my code, but I have tested it a little this new Copas version because I need also Windows. Copas version works, but not all the time. Maybe lua-ev version works better.

        I have been using older libwebsockets binding for a year, but I need native ffi binding because compiling libraries is horror in windows and was not easy in OSX either.

        Actually what I need is simple way to do websockets with my own native ffi-based tcp server.
        I have started to learn Luajit in this project: https://github.com/stuta/Luajit-Tcp-Server


Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s