The command du -sh ./* | sort -h
is a convenient way to see the largest items in a directory. Its output looks something like this:
0 ./empty
2.0K ./B2SUMS
2.0K ./backup.sh
2.0K ./list
6.0K ./glob.tcl
74M ./caddy
350M ./go
536M ./cargo
1001M ./private.cgd
2.1G ./junk.iso
In NetBSD 9 sort(1) does not implement the -h
flag for sorting human-readable file sizes with unit prefixes. (du -h
is implemented.) This wiki page presents two alternatives. Both are awk scripts that transform the output of du -k
.
hum.awk
Script by rvp.
Code
#!/usr/bin/awk -f
#
# Humanize the 1st field (KB assumed) in the output of commands like
# du(1) or sort(1)--if they don't provide a `-h' (humanize) flag.
#
# Eg: du ~ | sort -n -k1,1 | hum.awk
BEGIN {
K = 1024
N = split("KMGTPEZY", SUF, "")
}
function H(num, i) { # num (x KB)
for (i = 1; i < N && num >= K; i++)
num /= K
return sprintf("%.1f%c", num, SUF[i])
}
$1 ~ /^[[:digit:]]+$/ { # num something...
printf "%6s%s\n", H($1), substr($0, length($1)+1)
next
}
Dedicated to the public domain by the author.
Sample output
$ du -sk ./* | sort -n | hum.awk
0.0K ./empty
2.0K ./B2SUMS
2.0K ./backup.sh
2.0K ./list
6.0K ./glob.tcl
73.7M ./caddy
353.5M ./go
537.1M ./cargo
1000.5M ./private.cgd
2.1G ./junk.iso
humsize
Script by dbohdan.
Code
#! /usr/bin/awk -f
# humsize -- humanize du(1) output and other columns of file, disk, etc.
# sizes in bytes. This is useful on systems without `du -h` (commercial
# UNIX?) or `sort -h` (NetBSD 9, for which I wrote this script). Pipe
# the output of `du -k` to `humsize`. For example,
#
# $ du -sk ./* | sort -n | humsize
#
# https://opensource.org/licenses/Fair
#
# Copyright 2023 D. Bohdan.
#
# Usage of the works is permitted provided that this instrument is
# retained with the works, so that any entity that uses the works is
# notified of this instrument.
#
# DISCLAIMER: THE WORKS ARE WITHOUT WARRANTY.
BEGIN {
version = "0.2.0"
# Options. Set with `humsize name=value`.
defaults["field"] = 1
help["field"] = "What field to humanize."
help_order[++help_order_length] = "field"
defaults["format"] = " %4.0f%1s"
help["format"] = "The field format string: file size, unit prefix."
help_order[++help_order_length] = "format"
defaults["large"] = "%6.1f%1s"
help["large"] = "The field format for sizes above the threshold."
help_order[++help_order_length] = "large"
defaults["threshold"] = 1024 * 1024 * 1024
help["threshold"] = "The size in bytes above which to use the " \
"\"large\" format."
help_order[++help_order_length] = "threshold"
defaults["units"] = ",K,M,G,T"
help["units"] = "The file size units in 1024 increments. Format: " \
"\"bytes,KiB,MiB,...\"."
help_order[++help_order_length] = "units"
defaults["zero"] = ""
help["zero"] = "A special string to use instead of the format " \
"string for size zero. The default value is empty, " \
"which tells the program to use the same formatting " \
"it uses for non-zero size."
help_order[++help_order_length] = "zero"
for (key in defaults) {
config[key] = defaults[key]
}
# Do not set "large" to the default yet. Wait for the logic that must
# run after the options have been parsed.
config["large"] = ""
for (i = 1; i < ARGC; i++) {
if (ARGV[i] == "help") {
showhelp()
exit 0
}
if (ARGV[i] == "version") {
printf "%s\n", version
exit 0
}
}
for (i = 1; i < ARGC; i++) {
sp = index(ARGV[i], "=")
if (sp == 0) {
errorexit("Invalid argument", ARGV[i])
}
key = substr(ARGV[i], 1, sp - 1)
value = substr(ARGV[i], sp + 1)
if (!(key in config)) {
errorexit("Unknown option", key)
}
config[key] = value
ARGV[i] = ""
}
if (config["large"] == "") {
if (config["format"] == defaults["format"]) {
config["large"] = defaults["large"]
} else {
config["large"] = config["format"]
}
}
}
function showhelp() {
printf "humsize %s\nHumanize du(1) output and other columns of " \
"file, disk, etc. sizes in bytes.\n\n", version
printf "Usage: humsize [option=value ...]\n"
printf "\nOptions:\n\nThe values assigned to the options below are " \
"the default values.\n\n"
for (i = 1; i <= help_order_length; i++) {
key = help_order[i]
value = defaults[key]
if (value == "" || index(value, " ") > 0) {
value = "'" value "'"
}
printf " %-21s %s\n", key "=" value, help[key]
if (i != help_order_length) {
printf "\n"
}
}
}
function errorexit(message, bad_value) {
printf "%s: \"%s\"\n", message, bad_value
exit 1
}
# The following functions are self-contained and not tied to the rest
# of the script. You can use them.
# Return the index of the start of field $f in s (usually $0). This
# function is intended for use with FS == " ". It will return an
# incorrect index if a field's value is contained in a separator.
#
# Based on public domain code by rvp.
function fieldindex(s, f,
# Local variables.
i, f_start, cutoff) {
if (f < 1 || f > NF) {
return 0
}
for (i = 1; i < f; i++) {
cutoff = index(s, $i) + length($i)
s = substr(s, cutoff + 1)
f_start += cutoff
}
f_start += index(s, $f)
return f_start
}
# Make a size in bytes more human-readable through unit prefixes.
function humanize(size, format, zero, units,
large_format, large_threshold,
# Local variables.
actual_format, u, u_length) {
if (size == 0 && zero != "") {
return zero
}
if (format == "") {
format = "%.0f%s"
}
if (large_format == "") {
large_format = format
}
if (units == "") {
units = ",K,M,G,T"
}
u_length = split(units, u, ",");
actual_format = size < large_threshold ? format : large_format
i = 1
while (size >= 1024 && i < u_length) {
size /= 1024
i++
}
return sprintf(actual_format, size, u[i])
}
# Return s with the character range from start to end (inclusive)
# replaced with replacement.
function replace(s, start, end, replacement) {
return substr(s, 1, start - 1) replacement substr(s, end + 1)
}
# Pass through headers, blank lines, etc.
$config["field"] + 0 != $config["field"] {
print
next
}
# $config["field"] is a number.
{
human = humanize($config["field"] * 1024,
config["format"],
config["zero"],
config["units"],
config["large"],
config["threshold"]) " "
start = fieldindex($0, config["field"])
end = fieldindex($0, config["field"] + 1) - 1
if (end < 1) {
end = start + length($config["field"]) - 1
}
$0 = replace($0, start, end, human)
# Trim trailing whitespace if we are responsible for it.
if (config["field"] == NF) {
sub(/[[:space:]]+$/, "")
}
print
}
# vim: tabstop=2 shiftwidth=2 textwidth=72 expandtab
A short permissive license is embedded in the code. Git repository.
Sample output
$ du -sk ./* | sort -n | humsize
0 ./empty
2K ./B2SUMS
2K ./backup.sh
2K ./list
6K ./glob.tcl
74M ./caddy
353M ./go
537M ./cargo
1001M ./private.cgd
2.1G ./junk.iso