HTML ASCII webcam

Ascii art photo

We want to capture the webcam stream with an html page and convert it in real-time into a ASCII stream that resembles the grayscale version of the video. If you are interested, we also present a similar C program that output the webcam in ASCII in terminal and a 3D webcam (you’ll need two webcam and filter glasses).

The final result of the video stream will be something like this picture:

ASCII frame of the video
ASCII frame of the video

In this article we consider as known many concepts about color spaces, so we’ll use them without many introduction; by the way color spaces (RGB vs Yuv vs Lab and others) can be a very interesting further study for interested people.

We need to code three steps:

  1. get the video stream
  2. copy a frame of the stream in a canvas
  3. transform pixels in characters

The first two steps are quite straight and technical, you can easily google how to do them. The third step is the one with some fun. Each pixel is a triplet (r,g,b) with 0 ≤ r,g,b ≤ 255. On the other side, characters don’t have color but only luminosity (for example a character like “M” looks darker than “-“, and the space can be used as white), so we want to consider the luminosity of the triplet. We’ll use the formula Y = 0.299*r + 0.587*g + 0.114*b from YUV color space (see YUV, it is not a simple average since each RGB component has a different luminosity to outr eyes, blue looks darker and green lighter).

We than need to define a “palette” of characters that we’ll use to replace different ranges of gray. (some example of palette here: http://paulbourke.net/dataformats/asciiart/)

In particular in my code I have defined a palette where each gray range is replace by 4 possible character, so the stream will not have the effect of several repetition of a character where the luminosity is flat. The array with the palette is already ordered by increasing luminosity, so in the algorithm I’ll just need to divide the luminosity to access the right element of the palette. My palette is palette = [ "MBHW", "OV5T", "aeLo", "i=:;", "-,.'", " "].

Working code

<p><span id="message"></span></p>

<div style="display:none">
    <video id="video" playsinline autoplay></video>
</div>

<canvas id="canvas"></canvas>
<br />
<pre id="output" style="font-size: 6pt">
</pre>

<script>
'use strict';

const video = document.getElementById('video');
const canvas = document.getElementById('canvas');
const output = document.getElementById('output');
const errorMsgElement = document.querySelector('span#message');

const constraints = {
    audio: true,
    video: {
        width: 1280, height: 720
    }
};

var CHAR_W = 3;
var CHAR_H = 6;

async function init() {
	canvas.width = 640/CHAR_W;
	canvas.height = 480/CHAR_H;

    try {
        const stream = await navigator.mediaDevices.getUserMedia(constraints);
        handleSuccess(stream);
    } catch (e) {
        errorMsgElement.innerHTML = `navigator.getUserMedia error:${e.toString()}`;
    }
}

function handleSuccess(stream) {
    window.stream = stream;
    video.srcObject = stream;
	launchInterval();
}

init();

// every 100 ms get a frame from webcam, passing through the canvas
function launchInterval() {
    setInterval( processOneFrame, 100);
};

var context = canvas.getContext('2d');
function processOneFrame() {
    context.drawImage(video, 0, 0, canvas.width, canvas.height);
	
    var text = frameToText(false);
    output.innerHTML = text;
}

// transform a frame in a string, if forceFirstChar the method will use only one char to represent a range of gray
function frameToText(forceFirstChar) {
    let palette = [ "MBHW", "OV5T", "aeLo", "i=:;", "-,.'", "    "];
    let ris = "";

    let t = 255.0/palette.length;
	
    let data = context.getImageData(0, 0, canvas.width, canvas.height);
    let pixels = data.data;

    for (let i=0; i<canvas.height; i++) {
        for (let j=0; j<canvas.width; j++) {
            let idx = (i*canvas.width + j)<<2;
            let r = pixels[idx];
            let g = pixels[idx+1];
            let b = pixels[idx+2];

            let y = parseInt((0.299*r + 0.587*g + 0.114*b)/t);

             if (y<0) y=0;
                if (y>=palette.length) y = palette.length-1;
                if (forceFirstChar) {
                    ris += palette[y].substring(0, 1);
                }
                else {
                    let start = ((i % 2 == 0 ? 0 : 2) + j) % 4;
                    ris += palette[y].substring(start, start + 1);
                }
            }
	    ris += "\n";
        }

        return ris;
}

</script>

Code in action