Parse XLSX with Node and create json

JavascriptJsonnode.jsExcelXlsx

Javascript Problem Overview


Ok so I found this really well documented node_module called js-xlsx

Question: How can I parse an xlsx to output json?

Here is what the excel sheet looks like:

enter image description here

In the end the json should look like this:

[
   {
   "id": 1,
   "Headline": "Team: Sally Pearson",
   "Location": "Austrailia",
   "BodyText": "...",
   "Media: "..."
   },
   {
   "id": 2,
   "Headline": "Team: Rebeca Andrade",
   "Location": "Brazil",
   "BodyText": "...",
   "Media: "..."
   }
]

index.js:

if(typeof require !== 'undefined') {
	console.log('hey');
	XLSX = require('xlsx');
}
var workbook = XLSX.readFile('./assets/visa.xlsx');
var sheet_name_list = workbook.SheetNames;
sheet_name_list.forEach(function(y) { /* iterate through sheets */
  var worksheet = workbook.Sheets[y];
  for (z in worksheet) {
    /* all keys that do not begin with "!" correspond to cell addresses */
    if(z[0] === '!') continue;
    // console.log(y + "!" + z + "=" + JSON.stringify(worksheet[z].v));
	
  }
  
});
XLSX.writeFile(workbook, 'out.xlsx');

Javascript Solutions


Solution 1 - Javascript

You can also use

var XLSX = require('xlsx');
var workbook = XLSX.readFile('Master.xlsx');
var sheet_name_list = workbook.SheetNames;
console.log(XLSX.utils.sheet_to_json(workbook.Sheets[sheet_name_list[0]]))

Solution 2 - Javascript

Improved Version of "Josh Marinacci" answer , it will read beyond Z column (i.e. AA1).

var XLSX = require('xlsx');
var workbook = XLSX.readFile('test.xlsx');
var sheet_name_list = workbook.SheetNames;
sheet_name_list.forEach(function(y) {
    var worksheet = workbook.Sheets[y];
    var headers = {};
    var data = [];
    for(z in worksheet) {
        if(z[0] === '!') continue;
        //parse out the column, row, and value
		var tt = 0;
		for (var i = 0; i < z.length; i++) {
			if (!isNaN(z[i])) {
				tt = i;
				break;
			}
		};
        var col = z.substring(0,tt);
        var row = parseInt(z.substring(tt));
        var value = worksheet[z].v;

        //store header names
        if(row == 1 && value) {
            headers[col] = value;
            continue;
        }

        if(!data[row]) data[row]={};
        data[row][headers[col]] = value;
    }
    //drop those first two rows which are empty
    data.shift();
    data.shift();
    console.log(data);
});

Solution 3 - Javascript

I think this code will do what you want. It stores the first row as a set of headers, then stores the rest in a data object which you can write to disk as JSON.

var XLSX = require('xlsx');
var workbook = XLSX.readFile('test.xlsx');
var sheet_name_list = workbook.SheetNames;
sheet_name_list.forEach(function(y) {
    var worksheet = workbook.Sheets[y];
    var headers = {};
    var data = [];
    for(z in worksheet) {
        if(z[0] === '!') continue;
        //parse out the column, row, and value
        var col = z.substring(0,1);
        var row = parseInt(z.substring(1));
        var value = worksheet[z].v;

        //store header names
        if(row == 1) {
            headers[col] = value;
            continue;
        }
        
        if(!data[row]) data[row]={};
        data[row][headers[col]] = value;
    }
    //drop those first two rows which are empty
    data.shift();
    data.shift();
    console.log(data);
});

prints out

[ { id: 1,
    headline: 'team: sally pearson',
    location: 'Australia',
    'body text': 'majority have…',
    media: 'http://www.youtube.com/foo' },
  { id: 2,
    headline: 'Team: rebecca',
    location: 'Brazil',
    'body text': 'it is a long established…',
    media: 'http://s2.image.foo/' } ]

Solution 4 - Javascript

**podria ser algo asi en react y electron**

 xslToJson = workbook => {
        //var data = [];
        var sheet_name_list = workbook.SheetNames[0];
        return XLSX.utils.sheet_to_json(workbook.Sheets[sheet_name_list], {
            raw: false,
            dateNF: "DD-MMM-YYYY",
            header:1,
            defval: ""
        });
    };

    handleFile = (file /*:File*/) => {
        /* Boilerplate to set up FileReader */
        const reader = new FileReader();
        const rABS = !!reader.readAsBinaryString;

        reader.onload = e => {
            /* Parse data */
            const bstr = e.target.result;
            const wb = XLSX.read(bstr, { type: rABS ? "binary" : "array" });
            /* Get first worksheet */
            let arr = this.xslToJson(wb);

            console.log("arr ", arr)
            var dataNueva = []

            arr.forEach(data => {
                console.log("data renaes ", data)
            })
            // this.setState({ DataEESSsend: dataNueva })
            console.log("dataNueva ", dataNueva)

        };


        if (rABS) reader.readAsBinaryString(file);
        else reader.readAsArrayBuffer(file);
    };

    handleChange = e => {
        const files = e.target.files;
        if (files && files[0]) {
            this.handleFile(files[0]);
        }
    };

Solution 5 - Javascript

here's angular 5 method version of this with unminified syntax for those who struggling with that y, z, tt in accepted answer. usage: parseXlsx().subscribe((data)=> {...})

parseXlsx() {
    let self = this;
    return Observable.create(observer => {
        this.http.get('./assets/input.xlsx', { responseType: 'arraybuffer' }).subscribe((data: ArrayBuffer) => {
            const XLSX = require('xlsx');
            let file = new Uint8Array(data);
            let workbook = XLSX.read(file, { type: 'array' });
            let sheetNamesList = workbook.SheetNames;

            let allLists = {};
            sheetNamesList.forEach(function (sheetName) {
                let worksheet = workbook.Sheets[sheetName];
                let currentWorksheetHeaders: object = {};
                let data: Array<any> = [];
                for (let cellName in worksheet) {//cellNames example: !ref,!margins,A1,B1,C1

                    //skipping serviceCells !margins,!ref
                    if (cellName[0] === '!') {
                        continue
                    };

                    //parse colName, rowNumber, and getting cellValue
                    let numberPosition = self.getCellNumberPosition(cellName);
                    let colName = cellName.substring(0, numberPosition);
                    let rowNumber = parseInt(cellName.substring(numberPosition));
                    let cellValue = worksheet[cellName].w;// .w is XLSX property of parsed worksheet

                    //treating '-' cells as empty on Spot Indices worksheet
                    if (cellValue.trim() == "-") {
                        continue;
                    }

                    //storing header column names
                    if (rowNumber == 1 && cellValue) {
                        currentWorksheetHeaders[colName] = typeof (cellValue) == "string" ? cellValue.toCamelCase() : cellValue;
                        continue;
                    }

                    //creating empty object placeholder to store current row
                    if (!data[rowNumber]) {
                        data[rowNumber] = {}
                    };

                    //if header is date - for spot indices headers are dates
                    data[rowNumber][currentWorksheetHeaders[colName]] = cellValue;

                }

                //dropping first two empty rows
                data.shift();
                data.shift();
                allLists[sheetName.toCamelCase()] = data;
            });

            this.parsed = allLists;

            observer.next(allLists);
            observer.complete();
        })
    });
}

Solution 6 - Javascript

I found a better way of doing this

  function genrateJSONEngine() {
    var XLSX = require('xlsx');
    var workbook = XLSX.readFile('test.xlsx');
    var sheet_name_list = workbook.SheetNames;
    sheet_name_list.forEach(function (y) {
      var array = workbook.Sheets[y];

      var first = array[0].join()
      var headers = first.split(',');

      var jsonData = [];
      for (var i = 1, length = array.length; i < length; i++) {

        var myRow = array[i].join();
        var row = myRow.split(',');

        var data = {};
        for (var x = 0; x < row.length; x++) {
          data[headers[x]] = row[x];
        }
        jsonData.push(data);

      }

Solution 7 - Javascript

Here is my solution (in typescript) using some of ramdas helpers. It supports multiple sheets and returns an object with the key as the sheet name.

const parseXLSX = (file: File, cb: callback) => {
  const reader = new FileReader()
  const rABS = !!reader.readAsBinaryString

  reader.onload = (e) => {
    const bstr = e?.target?.result
    const wb = XLSX.read(bstr, { type: rABS ? 'binary' : 'array' })
    const sheetNames = wb.SheetNames
    const sheetsData = sheetNames.reduce((acc, sheetName) => {
      const worksheet = wb.Sheets[sheetName]
      const headers: Record<string, string> = {}
      const data: Record<string, unknown>[] = []

      keys(worksheet).forEach((key) => {
        // removes !ref column
        if (String(key)?.[0] !== '!') {
          // supports wide tables ex: AA1
          const column = String(key).replace(/[0-9]/g, '')
          const row = parseInt(String(key).replace(/\D/g, ''), 10)
          const value = worksheet[key].v

          if (row === 1) {
            headers[column] = value
          }

          // this solution does not support when header is not first row
          if (headers[column] !== undefined) {
            if (!data[row]) {
              data[row] = {}
            }

            data[row][headers[column]] = value
          }
        }
      })

      return {
        ...acc,
        [sheetName]: drop(1, data.filter(Boolean)),
      }
    }, {})

    cb(sheetsData)
  }
  if (rABS) reader.readAsBinaryString(file)
  else reader.readAsArrayBuffer(file)
}

Solution 8 - Javascript

Just improving @parijat answer a little.

var XLSX = require('xlsx');
    var workbook = XLSX.readFile('test.xlsx');
    var sheet_name_list = workbook.SheetNames;
    sheet_name_list.forEach(function(y) {
        var worksheet = workbook.Sheets[y];
        var headers = {};
        var data = [];
        for(z in worksheet) {
            if(z[0] === '!') continue;
            //parse out the column, row, and value
            var tt = 0;
            for (var i = 0; i < z.length; i++) {
                if (!isNaN(z[i])) {
                    tt = i;
                    break;
                }
            };
            var col = z.substring(0,tt);
            var row = parseInt(z.substring(tt));
            var value = worksheet[z].v;
    
            //store header names
            if(row == 1 && value) {
                headers[col] = value;
                continue;
            }
    
            if(!data[row-2]) data[row-2]={};
            data[row-2][headers[col]] = value;
        }
        //Now no need to drop the data element
        console.log(data);
    });

Attributions

All content for this solution is sourced from the original question on Stackoverflow.

The content on this page is licensed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

Content TypeOriginal AuthorOriginal Content on Stackoverflow
QuestionArmeen HarwoodView Question on Stackoverflow
Solution 1 - JavascriptaksanobleView Answer on Stackoverflow
Solution 2 - JavascriptParijatView Answer on Stackoverflow
Solution 3 - JavascriptJosh MarinacciView Answer on Stackoverflow
Solution 4 - JavascriptAN GermanView Answer on Stackoverflow
Solution 5 - JavascriptgodblessstrawberryView Answer on Stackoverflow
Solution 6 - Javascriptmad ManView Answer on Stackoverflow
Solution 7 - JavascriptricopellaView Answer on Stackoverflow
Solution 8 - Javascriptkaushik_pmView Answer on Stackoverflow