Downloading images with node.js

node.jsImageDownload

node.js Problem Overview


I'm trying to write a script to download images using node.js. This is what I have so far:

var maxLength = 10 // 10mb
var download = function(uri, callback) {
  http.request(uri)
    .on('response', function(res) {
      if (res.headers['content-length'] > maxLength*1024*1024) {
        callback(new Error('Image too large.'))
      } else if (!~[200, 304].indexOf(res.statusCode)) {
        callback(new Error('Received an invalid status code.'))
      } else if (!res.headers['content-type'].match(/image/)) {
        callback(new Error('Not an image.'))
      } else {
        var body = ''
        res.setEncoding('binary')
        res
          .on('error', function(err) {
            callback(err)
          })
          .on('data', function(chunk) {
            body += chunk
          })
          .on('end', function() {
            // What about Windows?!
            var path = '/tmp/' + Math.random().toString().split('.').pop()
            fs.writeFile(path, body, 'binary', function(err) {
              callback(err, path)
            })
          })
      }
    })
    .on('error', function(err) {
      callback(err)
    })
    .end();
}

I, however, want to make this more robust:

  1. Are there libraries that do this and do this better?
  2. Is there a chance that response headers lie (about length, about content type)?
  3. Are there any other status codes I should care about? Should I bother with redirects?
  4. I think I read somewhere that binary encoding is going to be deprecated. What do I do then?
  5. How can I get this to work on windows?
  6. Any other ways you can make this script better?

Why: for a feature similar to imgur where users can give me a URL, I download that image, and rehost the image in multiple sizes.

node.js Solutions


Solution 1 - node.js

I'd suggest using the request module. Downloading a file is as simple as the following code:

var fs = require('fs'),
    request = require('request');

var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){
    console.log('content-type:', res.headers['content-type']);
    console.log('content-length:', res.headers['content-length']);

    request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
  });
};

download('https://www.google.com/images/srpr/logo3w.png', 'google.png', function(){
  console.log('done');
});

Solution 2 - node.js

I ran into this problem some days ago, for a pure NodeJS answer I would suggest using Stream to merge the chunks together.

var http = require('http'),                                                
    Stream = require('stream').Transform,                                  
    fs = require('fs');                                                    
                                                                           
var url = 'http://www.google.com/images/srpr/logo11w.png';                    
                                                                              
http.request(url, function(response) {                                        
  var data = new Stream();                                                    
                                                                              
  response.on('data', function(chunk) {                                       
    data.push(chunk);                                                         
  });                                                                         
                                                                              
  response.on('end', function() {                                             
    fs.writeFileSync('image.png', data.read());                               
  });                                                                         
}).end();

The newest Node versions won't work well with binary strings, so merging chunks with strings is not a good idea when working with binary data.

*Just be careful when using 'data.read()', it will empty the stream for the next 'read()' operation. If you want to use it more than once, store it somewhere.

Solution 3 - node.js

You can use Axios (a promise-based HTTP client for Node.js) to download images in the order of your choosing in an asynchronous environment:

npm i axios

Then, you can use the following basic example to begin downloading images:

const fs = require('fs');
const axios = require('axios');

/* ============================================================
  Function: Download Image
============================================================ */

const download_image = (url, image_path) =>
  axios({
    url,
    responseType: 'stream',
  }).then(
    response =>
      new Promise((resolve, reject) => {
        response.data
          .pipe(fs.createWriteStream(image_path))
          .on('finish', () => resolve())
          .on('error', e => reject(e));
      }),
  );

/* ============================================================
  Download Images in Order
============================================================ */

(async () => {
  let example_image_1 = await download_image('https://example.com/test-1.png', 'example-1.png');
  
  console.log(example_image_1.status); // true
  console.log(example_image_1.error); // ''
  
  let example_image_2 = await download_image('https://example.com/does-not-exist.png', 'example-2.png');
  
  console.log(example_image_2.status); // false
  console.log(example_image_2.error); // 'Error: Request failed with status code 404'
  
  let example_image_3 = await download_image('https://example.com/test-3.png', 'example-3.png');
  
  console.log(example_image_3.status); // true
  console.log(example_image_3.error); // ''
})();

Solution 4 - node.js

if you want progress download try this:

var fs = require('fs');
var request = require('request');
var progress = require('request-progress');

module.exports = function (uri, path, onProgress, onResponse, onError, onEnd) {
    progress(request(uri))
    .on('progress', onProgress)
    .on('response', onResponse)
    .on('error', onError)
    .on('end', onEnd)
    .pipe(fs.createWriteStream(path))
};

how to use:

  var download = require('../lib/download');
  download("https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png", "~/download/logo.png", function (state) {
            console.log("progress", state);
        }, function (response) {
            console.log("status code", response.statusCode);
        }, function (error) {
            console.log("error", error);
        }, function () {
            console.log("done");
        });

note: you should install both request & request-progress modules using:

npm install request request-progress --save

Solution 5 - node.js

var fs = require('fs'),
http = require('http'),
https = require('https');

var Stream = require('stream').Transform;

var downloadImageToUrl = (url, filename, callback) => {

    var client = http;
    if (url.toString().indexOf("https") === 0){
      client = https;
     }

    client.request(url, function(response) {                                        
      var data = new Stream();                                                    

      response.on('data', function(chunk) {                                       
         data.push(chunk);                                                         
      });                                                                         

      response.on('end', function() {                                             
         fs.writeFileSync(filename, data.read());                               
      });                                                                         
   }).end();
};

downloadImageToUrl('https://www.google.com/images/srpr/logo11w.png', 'public/uploads/users/abc.jpg');

Solution 6 - node.js

This is an extension to Cezary's answer. If you want to download it to a specific directory, use this. Also, use const instead of var. Its safe this way.

const fs = require('fs');
const request = require('request');
var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){    
    request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
  });
};

download('https://www.google.com/images/srpr/logo3w.png', './images/google.png', function(){
  console.log('done');
});

Solution 7 - node.js

Building on the above, if anyone needs to handle errors in the write/read streams, I used this version. Note the stream.read() in case of a write error, it's required so we can finish reading and trigger close on the read stream.

var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){
    if (err) callback(err, filename);
    else {
        var stream = request(uri);
        stream.pipe(
            fs.createWriteStream(filename)
                .on('error', function(err){
                    callback(error, filename);
                    stream.read();
                })
            )
        .on('close', function() {
            callback(null, filename);
        });
    }
  });
};

Attributions

All content for this solution is sourced from the original question on Stackoverflow.

The content on this page is licensed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

Content TypeOriginal AuthorOriginal Content on Stackoverflow
QuestionJonathan OngView Question on Stackoverflow
Solution 1 - node.jsCezary WojtkowskiView Answer on Stackoverflow
Solution 2 - node.jsNihey TakizawaView Answer on Stackoverflow
Solution 3 - node.jsGrant MillerView Answer on Stackoverflow
Solution 4 - node.jsFareed AlnamroutiView Answer on Stackoverflow
Solution 5 - node.jsChandan ChhajerView Answer on Stackoverflow
Solution 6 - node.jsAhsan AhmedView Answer on Stackoverflow
Solution 7 - node.jsVladFrView Answer on Stackoverflow