Calculating a directory's size using Python?


Before I re-invent this particular wheel, has anybody got a nice routine for calculating the size of a directory using Python? It would be very nice if the routine would format the size nicely in Mb/Gb etc.

This walks all sub-directories; summing file sizes:

import os

def get_size(start_path = '.'):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)

    return total_size

print(get_size(), 'bytes')

And a oneliner for fun using os.listdir (Does not include sub-directories):

import os
sum(os.path.getsize(f) for f in os.listdir('.') if os.path.isfile(f))


Updated To use os.path.getsize, this is clearer than using the os.stat().st_size method.

Thanks to ghostdog74 for pointing this out!

os.stat - st_size Gives the size in bytes. Can also be used to get file size and other file related information.

import os

nbytes = sum(d.stat().st_size for d in os.scandir('.') if d.is_file())

Update 2018

If you use Python 3.4 or previous then you may consider using the more efficient walk method provided by the third-party scandir package. In Python 3.5 and later, this package has been incorporated into the standard library and os.walk has received the corresponding increase in performance.

Update 2019

Recently I've been using pathlib more and more, here's a pathlib solution:

from pathlib import Path

root_directory = Path('.')
sum(f.stat().st_size for f in root_directory.glob('**/*') if f.is_file())

Some of the approaches suggested so far implement a recursion, others employ a shell or will not produce neatly formatted results. When your code is one-off for Linux platforms, you can get formatting as usual, recursion included, as a one-liner. Except for the print in the last line, it will work for current versions of python2 and python3:
import subprocess

def du(path):
	"""disk usage in human readable format (e.g. '2,1GB')"""
	return subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8')

if __name__ == "__main__":

is simple, efficient and will work for files and multilevel directories:

$ chmod 750
$ ./

Here is a recursive function (it recursively sums up the size of all subfolders and their respective files) which returns exactly the same bytes as when running "du -sb ." in linux (where the "." means "the current folder"):

import os

def getFolderSize(folder):
    total_size = os.path.getsize(folder)
    for item in os.listdir(folder):
        itempath = os.path.join(folder, item)
        if os.path.isfile(itempath):
            total_size += os.path.getsize(itempath)
        elif os.path.isdir(itempath):
            total_size += getFolderSize(itempath)
    return total_size

print "Size: " + str(getFolderSize("."))

Python 3.5 recursive folder size using os.scandir

def folder_size(path='.'):
    total = 0
    for entry in os.scandir(path):
        if entry.is_file():
            total += entry.stat().st_size
        elif entry.is_dir():
            total += folder_size(entry.path)
    return total

Using pathlib I came up with this one-liner to get the size of a folder:

sum(file.stat().st_size for file in Path(folder).rglob('*'))

And this is what I came up with for a nicely formatted output:

from pathlib import Path

def get_folder_size(folder):
    return ByteSize(sum(file.stat().st_size for file in Path(folder).rglob('*')))

class ByteSize(int):

    _kB = 1024
    _suffixes = 'B', 'kB', 'MB', 'GB', 'PB'

    def __new__(cls, *args, **kwargs):
        return super().__new__(cls, *args, **kwargs)

    def __init__(self, *args, **kwargs):
        self.bytes = self.B = int(self)
        self.kilobytes = self.kB = self / self._kB**1
        self.megabytes = self.MB = self / self._kB**2
        self.gigabytes = self.GB = self / self._kB**3
        self.petabytes = self.PB = self / self._kB**4
        *suffixes, last = self._suffixes
        suffix = next((
            for suffix in suffixes
            if 1 < getattr(self, suffix) < self._kB
        ), last)
        self.readable = suffix, getattr(self, suffix)


    def __str__(self):
        return self.__format__('.2f')

    def __repr__(self):
        return '{}({})'.format(self.__class__.__name__, super().__repr__())

    def __format__(self, format_spec):
        suffix, val = self.readable
        return '{val:{fmt}} {suf}'.format(val=val, fmt=format_spec, suf=suffix)

    def __sub__(self, other):
        return self.__class__(super().__sub__(other))

    def __add__(self, other):
        return self.__class__(super().__add__(other))
    def __mul__(self, other):
        return self.__class__(super().__mul__(other))

    def __rsub__(self, other):
        return self.__class__(super().__sub__(other))

    def __radd__(self, other):
        return self.__class__(super().__add__(other))
    def __rmul__(self, other):
        return self.__class__(super().__rmul__(other))   


>>> size = get_folder_size("c:/users/tdavis/downloads")
>>> print(size)
5.81 GB
>>> size.GB
>>> size.gigabytes
>>> size.PB
>>> size.MB
>>> size

I also came across this question, which has some more compact and probably more performant strategies for printing file sizes.

for python3.5+

from pathlib import Path

def get_size(folder: str) -> int:
    return sum(p.stat().st_size for p in Path(folder).rglob('*'))


In [6]: get_size('/etc/not-exist-path')
Out[6]: 0
In [7]: get_size('.')
Out[7]: 12038689
In [8]: def filesize(size: int) -> str:
   ...:     for unit in ("B", "K", "M", "G", "T"):
   ...:         if size < 1024:
   ...:             break
   ...:         size /= 1024
   ...:     return f"{size:.1f}{unit}"

In [9]: filesize(get_size('.'))
Out[9]: '11.5M'

monknut answer is good but it fails on broken symlink, so you also have to check if this path really exists

if os.path.exists(fp):
	total_size += os.stat(fp).st_size

The accepted answer doesn't take into account hard or soft links, and would count those files twice. You'd want to keep track of which inodes you've seen, and not add the size for those files.

import os
def get_size(start_path='.'):
    total_size = 0
    seen = {}
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
                stat = os.stat(fp)
            except OSError:

            except KeyError:
                seen[stat.st_ino] = True

            total_size += stat.st_size

    return total_size

print get_size()

a recursive one-liner:

def getFolderSize(p):
   from functools import partial
   prepend = partial(os.path.join, p)
   return sum([(os.path.getsize(f) if os.path.isfile(f) else getFolderSize(f)) for f in map(prepend, os.listdir(p))])

Chris' answer is good but could be made more idiomatic by using a set to check for seen directories, which also avoids using an exception for control flow:

def directory_size(path):
    total_size = 0
    seen = set()

    for dirpath, dirnames, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)

                stat = os.stat(fp)
            except OSError:

            if stat.st_ino in seen:


            total_size += stat.st_size

    return total_size  # size in bytes

A little late to the party but in one line provided that you have glob2 and humanize installed. Note that in Python 3, the default iglob has a recursive mode. How to modify the code for Python 3 is left as a trivial exercise for the reader.

>>> import os
>>> from humanize import naturalsize
>>> from glob2 import iglob
>>> naturalsize(sum(os.path.getsize(x) for x in iglob('/var/**'))))
'546.2 MB'

For the second part of the question

def human(size):

	B = "B"
	KB = "KB" 
	MB = "MB"
	GB = "GB"
	TB = "TB"
	UNITS = [B, KB, MB, GB, TB]
	HUMANFMT = "%f %s"

	for u in UNITS[:-1]:
		if size < HUMANRADIX : return HUMANFMT % (size, u)
		size /= HUMANRADIX

	return HUMANFMT % (size,  UNITS[-1])

You can do something like this :

import commands   
size = commands.getoutput('du -sh /path/').split()[0]

in this case I have not tested the result before returning it, if you want you can check it with commands.getstatusoutput.

One-liner you say... Here is a one liner:

sum([sum(map(lambda fname: os.path.getsize(os.path.join(directory, fname)), files)) for directory, folders, files in os.walk(path)])

Although I would probably split it out and it performs no checks.

To convert to kb see and work it in

The following script prints directory size of all sub-directories for the specified directory. It also tries to benefit (if possible) from caching the calls of a recursive functions. If an argument is omitted, the script will work in the current directory. The output is sorted by the directory size from biggest to smallest ones. So you can adapt it for your needs.

PS i've used recipe 578019 for showing directory size in human-friendly format (

from __future__ import print_function
import os
import sys
import operator

def null_decorator(ob):
    return ob

if sys.version_info >= (3,2,0):
    import functools
    my_cache_decorator = functools.lru_cache(maxsize=4096)
    my_cache_decorator = null_decorator

start_dir = os.path.normpath(os.path.abspath(sys.argv[1])) if len(sys.argv) > 1 else '.'

def get_dir_size(start_path = '.'):
    total_size = 0
    if 'scandir' in dir(os):
        # using fast 'os.scandir' method (new in version 3.5)
        for entry in os.scandir(start_path):
            if entry.is_dir(follow_symlinks = False):
                total_size += get_dir_size(entry.path)
            elif entry.is_file(follow_symlinks = False):
                total_size += entry.stat().st_size
        # using slow, but compatible 'os.listdir' method
        for entry in os.listdir(start_path):
            full_path = os.path.abspath(os.path.join(start_path, entry))
            if os.path.isdir(full_path):
                total_size += get_dir_size(full_path)
            elif os.path.isfile(full_path):
                total_size += os.path.getsize(full_path)
    return total_size

def get_dir_size_walk(start_path = '.'):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size

def bytes2human(n, format='%(value).0f%(symbol)s', symbols='customary'):

    Convert n bytes into a human readable string based on format.
    symbols can be either "customary", "customary_ext", "iec" or "iec_ext",

      >>> bytes2human(0)
      '0.0 B'
      >>> bytes2human(0.9)
      '0.0 B'
      >>> bytes2human(1)
      '1.0 B'
      >>> bytes2human(1.9)
      '1.0 B'
      >>> bytes2human(1024)
      '1.0 K'
      >>> bytes2human(1048576)
      '1.0 M'
      >>> bytes2human(1099511627776127398123789121)
      '909.5 Y'

      >>> bytes2human(9856, symbols="customary")
      '9.6 K'
      >>> bytes2human(9856, symbols="customary_ext")
      '9.6 kilo'
      >>> bytes2human(9856, symbols="iec")
      '9.6 Ki'
      >>> bytes2human(9856, symbols="iec_ext")
      '9.6 kibi'

      >>> bytes2human(10000, "%(value).1f %(symbol)s/sec")
      '9.8 K/sec'

      >>> # precision can be adjusted by playing with %f operator
      >>> bytes2human(10000, format="%(value).5f %(symbol)s")
      '9.76562 K'
    SYMBOLS = {
        'customary'     : ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
        'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
                           'zetta', 'iotta'),
        'iec'           : ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'),
        'iec_ext'       : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
                           'zebi', 'yobi'),
    n = int(n)
    if n < 0:
        raise ValueError("n < 0")
    symbols = SYMBOLS[symbols]
    prefix = {}
    for i, s in enumerate(symbols[1:]):
        prefix[s] = 1 << (i+1)*10
    for symbol in reversed(symbols[1:]):
        if n >= prefix[symbol]:
            value = float(n) / prefix[symbol]
            return format % locals()
    return format % dict(symbol=symbols[0], value=n)

###  main ()
if __name__ == '__main__':
    dir_tree = {}
    ### version, that uses 'slow' [os.walk method]
    #get_size = get_dir_size_walk
    ### this recursive version can benefit from caching the function calls (functools.lru_cache)
    get_size = get_dir_size

    for root, dirs, files in os.walk(start_dir):
        for d in dirs:
            dir_path = os.path.join(root, d)
            if os.path.isdir(dir_path):
                dir_tree[dir_path] = get_size(dir_path)

    for d, size in sorted(dir_tree.items(), key=operator.itemgetter(1), reverse=True):
        print('%s\t%s' %(bytes2human(size, format='%(value).2f%(symbol)s'), d))

    print('-' * 80)
    if sys.version_info >= (3,2,0):

Sample output:

37.61M  .\subdir_b
2.18M   .\subdir_a
2.17M   .\subdir_a\subdir_a_2
4.41K   .\subdir_a\subdir_a_1
CacheInfo(hits=2, misses=4, maxsize=4096, currsize=4)

EDIT: moved null_decorator above, as user2233949 recommended

use library sh: the module du does it:

pip install sh

import sh
print( sh.du("-s", ".") )
91154728        .

if you want to pass asterix, use glob as described here.

to convert the values in human readables, use humanize:

pip install humanize

import humanize
print( humanize.naturalsize( 91157384 ) )
91.2 MB

Get directory size

Properties of the solution:

  • returns both: the apparent size (number of bytes in the file) and the actual disk space the files uses.
  • counts hard linked files only once
  • counts symlinks the same way du does
  • does not use recursion
  • uses st.st_blocks for disk space used, thus works only on Unix-like systems

The code:

import os

def du(path):
    if os.path.islink(path):
        return (os.lstat(path).st_size, 0)
    if os.path.isfile(path):
        st = os.lstat(path)
        return (st.st_size, st.st_blocks * 512)
    apparent_total_bytes = 0
    total_bytes = 0
    have = []
    for dirpath, dirnames, filenames in os.walk(path):
        apparent_total_bytes += os.lstat(dirpath).st_size
        total_bytes += os.lstat(dirpath).st_blocks * 512
        for f in filenames:
            fp = os.path.join(dirpath, f)
            if os.path.islink(fp):
                apparent_total_bytes += os.lstat(fp).st_size
            st = os.lstat(fp)
            if st.st_ino in have:
                continue  # skip hardlinks which were already counted
            apparent_total_bytes += st.st_size
            total_bytes += st.st_blocks * 512
        for d in dirnames:
            dp = os.path.join(dirpath, d)
            if os.path.islink(dp):
                apparent_total_bytes += os.lstat(dp).st_size
    return (apparent_total_bytes, total_bytes)

Example usage:

>>> du('/lib')
(236425839, 244363264)

$ du -sb /lib
236425839	/lib
$ du -sB1 /lib
244363264	/lib

Human readable file size

Properties of the solution:

The code:

def humanized_size(num, suffix='B', si=False):
    if si:
        units = ['','K','M','G','T','P','E','Z']
        last_unit = 'Y'
        div = 1000.0
        units = ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']
        last_unit = 'Yi'
        div = 1024.0
    for unit in units:
        if abs(num) < div:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= div
    return "%.1f%s%s" % (num, last_unit, suffix)

Example usage:

>>> humanized_size(236425839)
>>> humanized_size(236425839, si=True)
>>> humanized_size(236425839, si=True, suffix='')

for getting the size of one file, there is os.path.getsize()

>>> import os
>>> os.path.getsize("/path/file")

its reported in bytes.

For what it's worth... the tree command does all of this for free:

tree -h --du /path/to/dir  # files and dirs
tree -h -d --du /path/to/dir  # dirs only

I love Python, but by far the simplest solution to the problem requires no new code.

It is handy:

import os
import stat

size = 0
path_ = ""
def calculate(path=os.environ["SYSTEMROOT"]):
    global size, path_
    size = 0
    path_ = path

    for x, y, z in os.walk(path):
        for i in z:
            size += os.path.getsize(x + os.sep + i)

def cevir(x):
    global path_
    print(path_, x, "Byte")
    print(path_, x/1024, "Kilobyte")
    print(path_, x/1048576, "Megabyte")
    print(path_, x/1073741824, "Gigabyte")


C:\Users\Jundullah\Desktop 87874712211 Byte
C:\Users\Jundullah\Desktop 85815148.64355469 Kilobyte
C:\Users\Jundullah\Desktop 83803.85609722137 Megabyte
C:\Users\Jundullah\Desktop 81.83970321994275 Gigabyte

Here is a one liner that does it recursively (recursive option available as of Python 3.5):

import os
import glob
print(sum(os.path.getsize(f) for f in glob.glob('**', recursive=True) if os.path.isfile(f))/(1024*1024))

def recursive_dir_size(path):
    size = 0
    for x in os.listdir(path):
        if not os.path.isdir(os.path.join(path,x)):
            size += os.stat(os.path.join(path,x)).st_size
            size += recursive_dir_size(os.path.join(path,x))
    return size

I wrote this function which gives me accurate overall size of a directory, i tried other for loop solutions with os.walk but i don't know why the end result was always less than the actual size (on ubuntu 18 env). I must have done something wrong but who cares wrote this one works perfectly fine.

I'm using python 2.7.13 with scandir and here's my one-liner recursive function to get the total size of a folder:

from scandir import scandir
def getTotFldrSize(path):
    return sum([s.stat(follow_symlinks=False).st_size for s in scandir(path) if s.is_file(follow_symlinks=False)]) + \
    + sum([getTotFldrSize(s.path) for s in scandir(path) if s.is_dir(follow_symlinks=False)])

>>> print getTotFldrSize('.')

When size of the sub-directories is computed, it should update its parent's folder size and this will go on till it reaches the root parent.

The following function computes the size of the folder and all its sub-folders.

import os
def folder_size(path):
    parent = {}  # path to parent path mapper
    folder_size = {}  # storing the size of directories
    folder = os.path.realpath(path)

    for root, _, filenames in os.walk(folder):
        if root == folder:
            parent[root] = -1  # the root folder will not have any parent
            folder_size[root] = 0.0  # intializing the size to 0

        elif root not in parent:
            immediate_parent_path = os.path.dirname(root)  # extract the immediate parent of the subdirectory
            parent[root] = immediate_parent_path  # store the parent of the subdirectory
            folder_size[root] = 0.0  # initialize the size to 0

        total_size = 0
        for filename in filenames:
            filepath = os.path.join(root, filename)
            total_size += os.stat(filepath).st_size  # computing the size of the files under the directory
        folder_size[root] = total_size  # store the updated size

        temp_path = root  # for subdirectories, we need to update the size of the parent till the root parent
        while parent[temp_path] != -1:
            folder_size[parent[temp_path]] += total_size
            temp_path = parent[temp_path]

    return folder_size[folder]/1000000.0

A solution that works on Python 3.6 using pathlib.

from pathlib import Path

sum([f.stat().st_size for f in Path("path").glob("**/*")])

du does not follow symlinks by default. No answer here make use of follow_symlinks=False.

Here is an implementation which follows default behavior of du:

def du(path) -> int:
    total = 0
    for entry in os.scandir(path):
        if entry.is_file(follow_symlinks=False):
            total += entry.stat().st_size
        elif entry.is_dir(follow_symlinks=False):
            total += du(entry.path)
    return total


class Test(unittest.TestCase):
    def test_du(self):
        root = '/tmp/du_test'['rm', '-rf', root])
        test_utils.create_file(root, 'A', '1M')
        test_utils.create_file(root, 'B', '1M')
        sub = '/'.join([root, 'sub'])
        test_utils.create_file(sub, 'C', '1M')
        test_utils.create_file(sub, 'D', '1M')['ln', '-s', '/tmp', '/'.join([root, 'link']), ])
        self.assertEqual(4 << 20, util.du(root))

import os
def get_size(path = os.getcwd()):
    print("Calculating Size: ",path)
    total_size = 0
    #if path is directory--
    if os.path.isdir(path):
      print("Path type : Directory/Folder")
      for dirpath, dirnames, filenames in os.walk(path):
          for f in filenames:
              fp = os.path.join(dirpath, f)
              # skip if it is symbolic link
              if not os.path.islink(fp):
                  total_size += os.path.getsize(fp)
    #if path is a file---
    elif os.path.isfile(path):
      print("Path type : File")
      print("Path Type : Special File (Socket, FIFO, Device File)" )
    print(bytesize, 'bytes')
    print(bytesize/(1024), 'kilobytes')
    print(bytesize/(1024*1024), 'megabytes')
    print(bytesize/(1024*1024*1024), 'gegabytes')
    return total_size


I'm sure this helps! For folders and files as well!

This script tells you which file is the biggest in the CWD and also tells you in which folder the file is. This script works for me on win8 and python 3.3.3 shell

import os



for root, dirs, files in os.walk(folder):
    for file in files:
##        print (pathname)
##        print (os.path.getsize(pathname)/1024/1024)
        if number < os.path.getsize(pathname):
            number = os.path.getsize(pathname)
##        print ()

print (string)
print ()
print (number)
print ("Number in bytes")

Admittedly, this is kind of hackish and only works on Unix/Linux.

It matches du -sb . because in effect this is a Python bash wrapper that runs the du -sb . command.

import subprocess

def system_command(cmd):
    """"Function executes cmd parameter as a bash command."""
    p = subprocess.Popen(cmd,
    stdout, stderr = p.communicate()
    return stdout, stderr
size = int(system_command('du -sb . ')[0].split()[0])

I'm a little late (and new) here but I chose to use the subprocess module and the 'du' command line with Linux to retrieve an accurate value for folder size in MB. I had to use if and elif for root folder because otherwise subprocess raises error due to non-zero value returned.

import subprocess
import os

# get folder size
def get_size(self, path):
    if os.path.exists(path) and path != '/':
        cmd = str(subprocess.check_output(['sudo', 'du', '-s', path])).\
            replace('b\'', '').replace('\'', '').split('\\t')[0]
        return float(cmd) / 1000000
    elif os.path.exists(path) and path == '/':
        cmd = str(subprocess.getoutput(['sudo du -s /'])). \
            replace('b\'', '').replace('\'', '').split('\n')
        val = cmd[len(cmd) - 1].replace('/', '').replace(' ', '')
        return float(val) / 1000000
    else: raise ValueError

If you are in Windows OS you can do:

install the module pywin32 by launching:

pip install pywin32

and then coding the following:

import win32com.client as com

def get_folder_size(path):
	   fso = com.Dispatch("Scripting.FileSystemObject")
	   folder = fso.GetFolder(path)
	   size = str(round(folder.Size / 1048576))
	   print("Size: " + size + " MB")
   except Exception as e:
       print("Error --> " + str(e))

Python 3.6+ recursive folder/file size using os.scandir. As powerful as in the answer by @blakev, but shorter and in EAFP python style.

import os

def size(path, *, follow_symlinks=False):
        with os.scandir(path) as it:
            return sum(size(entry, follow_symlinks=follow_symlinks) for entry in it)
    except NotADirectoryError:
        return os.stat(path, follow_symlinks=follow_symlinks).st_size

import os

def get_size(path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(path):
        for f in filenames:
            if os.path.exists(fp):
                fp = os.path.join(dirpath, f)
                total_size += os.path.getsize(fp)

    return total_size   # in megabytes

Thanks monkut & troex!


