Getting binary content in node.js with http.request

node.jsHttpBinary

node.js Problem Overview


I would like to retrieve binary data from an https request.

I found a similar question that uses the request method, https://stackoverflow.com/questions/14855015/getting-binary-content-in-node-js-using-request, is says setting encoding to null should work, but it doesn't.

options = {
    hostname: urloptions.hostname,
    path: urloptions.path,
    method: 'GET',
    rejectUnauthorized: false,
    encoding: null
};

req = https.request(options, function(res) {
    var data;
    data = "";
    res.on('data', function(chunk) {
        return data += chunk;
    });
    res.on('end', function() {
        return loadFile(data);
    });
    res.on('error', function(err) {
        console.log("Error during HTTP request");
        console.log(err.message);
    });
})

Edit: setting encoding to 'binary' doesn't work either

node.js Solutions


Solution 1 - node.js

The accepted answer did not work for me (i.e., setting encoding to binary), even the user who asked the question mentioned it did not work.

Here's what worked for me, taken from: http://chad.pantherdev.com/node-js-binary-http-streams/

http.get(url.parse('http://myserver.com:9999/package'), function(res) {
    var data = [];

    res.on('data', function(chunk) {
        data.push(chunk);
    }).on('end', function() {
        //at this point data is an array of Buffers
        //so Buffer.concat() can make us a new Buffer
        //of all of them together
        var buffer = Buffer.concat(data);
        console.log(buffer.toString('base64'));
    });
});

Edit: Update answer following a suggestion by Semicolon

Solution 2 - node.js

Running on NodeJS 6.10(and 8.10, tested in Feb 2019) in the AWS Lambda environment, none of the solutions above worker for me.

What did work for me was the following:

https.get(opt, (res) => {
    res.setEncoding('binary');
    let chunks = [];

    res.on('data', (chunk) => {
        chunks.push(Buffer.from(chunk, 'binary'));
    });

    res.on('end', () => {
        let binary = Buffer.concat(chunks);
        // binary is now a Buffer that can be used as Uint8Array or as
        // any other TypedArray for data processing in NodeJS or 
        // passed on via the Buffer to something else.
    });
});

Take note the res.setEncoding('binary'); and Buffer.from(chunk, 'binary') lines. One sets the response encoding and the other creates a Buffer object from the string provided in the encoding specified previously.

Solution 3 - node.js

You need to set encoding to response, not request:

req = https.request(options, function(res) {
    res.setEncoding('binary');

    var data = [ ];

    res.on('data', function(chunk) {
        data.push(chunk);
    });
    res.on('end', function() {
        var binary = Buffer.concat(data);
        // binary is your data
    });
    res.on('error', function(err) {
        console.log("Error during HTTP request");
        console.log(err.message);
    });
});

Here is useful answer: https://stackoverflow.com/questions/5294470/node-js-writing-image-to-local-server

Solution 4 - node.js

Pärt Johanson I wish I could comment just to thank you for saving me from the recursive loop I've been in all day of ripping my hair out and then reading the (incredibly unhelpful) node docs on this, over, and over. Upon finding your answer, I went to dig into the docs, and I can't even find the res.setEncoding method documented anywhere! It's just shown as part of two examples, wherein they call res.setEncoding('utf8'); Where did you find this or how did you figure it out!?

Since I don't have enough reputation to comment, I'll at least contribute something useful with my answer: Pärt Johanson's answer worked 100% for me, I just tweaked it a bit for my needs because I'm using it to download and eval a script hosted on my server (and compiled with nwjc) using nw.Window.get().evalNWBin() on NWJS 0.36.4 / Node 11.11.0:

let opt = {...};
let req = require('https').request(opt, (res) => {
  // server error returned
  if (200 !== res.statusCode) {
    res.setEncoding('utf8');
    let data = '';
    res.on('data', (strData) => {
      data += strData;
    });
    res.on('end', () => {
      if (!res.complete) {
        console.log('Server error, incomplete response: ' + data);
      } else {
        console.log('Server error, response: ' + data);
      }
    });
  }
  // expected response
  else {
    res.setEncoding('binary');
    let data = [];
    res.on('data', (binData) => {
      data.push(Buffer.from(binData, 'binary'));
    });
    res.on('end', () => {
      data = Buffer.concat(data);
      if (!res.complete) {
        console.log('Request completed, incomplete response, ' + data.length + ' bytes received');
      } else {
        console.log('Request completed, ' + data.length + ' bytes received');
        nw.Window.get().evalNWBin(null, data);
      }
    });
  }
};

Edit: P.S. I posted this just in case anyone wanted to know how to handle a non-binary response -- my actual code goes a little deeper and checks response content type header to parse JSON (intended failure, i.e. 400, 401, 403) or HTML (unexpected failure, i.e. 404 or 500)

Solution 5 - node.js

  1. Don't call setEncoding() method, because by default, no encoding is assigned and stream data will be returned as Buffer objects
  2. Call Buffer.from() in on.data callback method to convert the chunk value to a Buffer object.
http.get('my_url', (response) => {
  const chunks = [];
  response.on('data', chunk => chunks.push(Buffer.from(chunk))) // Converte `chunk` to a `Buffer` object.
    .on('end', () => {
      const buffer = Buffer.concat(chunks);
      console.log(buffer.toString('base64'));
    });
});

Solution 6 - node.js

As others here, I needed to process binary data chunks from Node.js HTTP response (aka http.IncomingMessage).

None of the existing answers really worked for my Electron 6 project (bundled with Node.js 12.4.0, at the time of posting), besides Pärt Johanson's answer and its variants.

Still, even with that solution, the chunks were always arriving at the response.on('data', ondata) handler as string objects (rather than expected and desired Buffer objects). That incurred extra conversion with Buffer.from(chunk, 'binary'). I was getting strings regardless of whether I explicitly specified binary encoding with response.setEncoding('binary') or response.setEncoding(null).

The only way I managed to get the original Buffer chunks was to pipe the response to an instance of stream.Writable where I provide a custom write method:

const https = require('https');
const { Writable } = require('stream');

async function getBinaryDataAsync(url) {
  // start HTTP request, get binary response
  const { request, response } = await new Promise((resolve, reject) => {
    const request = https.request(url, { 
      method: 'GET', 
        headers: { 
          'Accept': 'application/pdf', 
          'Accept-Encoding': 'identity'
        }        
      }
    );

    request.on('response', response => 
      resolve({request, response}));
    request.on('error', reject);
    request.end();
  });

  // read the binary response by piping it to stream.Writable
  const buffers = await new Promise((resolve, reject) => {

    response.on('aborted', reject);
    response.on('error', reject);
  
    const chunks = [];

    const stream = new Writable({
      write: (chunk, encoding, notifyComplete) => {
        try {
          chunks.push(chunk);
          notifyComplete();      
        }
        catch(error) {
          notifyComplete(error);      
        }
      }
    });

    stream.on('error', reject);
    stream.on('finish', () => resolve(chunks));
    response.pipe(stream);
  });

  const buffer = Buffer.concat(buffers);
  return buffer.buffer; // as ArrayBuffer
}

async function main() {
  const arrayBuff = await getBinaryDataAsync('https://download.microsoft.com/download/8/A/4/8A48E46A-C355-4E5C-8417-E6ACD8A207D4/VisualStudioCode-TipsAndTricks-Vol.1.pdf');
  console.log(arrayBuff.byteLength);
};

main().catch(error => console.error(error));

Updated, as it turns, this behavior only manifests for our Web API server. So, response.on('data') actually works well for the sample URL I use in the above code snippet and the stream is not needed for it. It's weird though this is sever-specific, I'm investigating it further.

Solution 7 - node.js

Everyone here is on the right track, but to put the bed the issue, you cannot call .setEncoding() EVER.

If you call .setEncoding(), it will create a StringDecoder and set it as the default decoder. If you try to pass null or undefined, then it will still create a StringDecoder with its default decoder of UTF-8. Even if you call .setEncoding('binary'), it's the same as calling .setEncoding('latin1'). Yes, seriously.

I wish I could say you set ._readableState.encoding and _readableState.decoder back to null, but when you call .setEncoding() buffer gets wiped and replaced with a binary encoding of the decoded string of what was there before. That means your data has already been changed.

If you want to "undo" the decoding, you have to re-encode the data stream back into binary like so:

  req.on('data', (chunk) => {
      let buffer;
      if (typeof chunk === 'string') {
        buffer = Buffer.from(chunk, req.readableEncoding);
      } else {
        buffer = chunk;
      }
      // Handle chunk
  });

Of course, if you never call .setEncoding(), then you don't have to worry about the chunk being returned as a string.


After you have a your chunk as Buffer, then you can work with it as you chose. In the interested of thoroughness, here's how to use with a preset buffer size, while also checking Content-Length:

const BUFFER_SIZE = 4096;

/**
 * @param {IncomingMessage} req
 * @return {Promise<Buffer>}
 */
function readEntireRequest(req) {
  return new Promise((resolve, reject) => {
    const expectedSize = parseInt(req.headers['content-length'], 10) || null;
    let data = Buffer.alloc(Math.min(BUFFER_SIZE, expectedSize || BUFFER_SIZE));
    let bytesWritten = 0;
    req.on('data', (chunk) => {
      if ((chunk.length + bytesWritten) > data.length) {
        // Buffer is too small. Double it.
        let newLength = data.length * 2;
        while (newLength < chunk.length + data.length) {
          newLength *= 2;
        }
        const newBuffer = Buffer.alloc(newLength);
        data.copy(newBuffer);
        data = newBuffer;
      }
      bytesWritten += chunk.copy(data, bytesWritten);
      if (bytesWritten === expectedSize) {
        // If we trust Content-Length, we could return immediately here.
      }
    });
    req.on('end', () => {
      if (data.length > bytesWritten) {
        // Return a slice of the original buffer
        data = data.subarray(0, bytesWritten);
      }
      resolve(data);
    });
    req.on('error', (err) => {
      reject(err);
    });
  });
}

The choice to use a buffer size here is to avoid immediately reserving a large amount of memory, but instead only fetch RAM as needed. The Promise functionality is just for convenience.

Attributions

All content for this solution is sourced from the original question on Stackoverflow.

The content on this page is licensed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

Content TypeOriginal AuthorOriginal Content on Stackoverflow
Questionedi9999View Question on Stackoverflow
Solution 1 - node.jsGuaycuruView Answer on Stackoverflow
Solution 2 - node.jsPärt JohansonView Answer on Stackoverflow
Solution 3 - node.jsmokaView Answer on Stackoverflow
Solution 4 - node.jscaffeinatedbitsView Answer on Stackoverflow
Solution 5 - node.jsNaijia LiuView Answer on Stackoverflow
Solution 6 - node.jsnoseratioView Answer on Stackoverflow
Solution 7 - node.jsShortFuseView Answer on Stackoverflow