ubuntu-buildroot/output/build/host-gawk-5.2.0/awklib/eg/prog/uniq.awk

125 lines
2.9 KiB
Awk

# uniq.awk --- do uniq in awk
#
# Requires getopt() and join() library functions
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
# Updated August 2020 to current POSIX
function usage()
{
print("Usage: uniq [-udc [-f fields] [-s chars]] " \
"[ in [ out ]]") > "/dev/stderr"
exit 1
}
# -c count lines. overrides -d and -u
# -d only repeated lines
# -u only nonrepeated lines
# -f n skip n fields
# -s n skip n characters, skip fields first
# As of 2020, '+' can be used as the option character in addition to '-'
# Previously allowed use of -N to skip fields and +N to skip
# characters is no longer allowed, and not supported by this version.
BEGIN {
# Convert + to - so getopt can handle things
for (i = 1; i < ARGC; i++) {
first = substr(ARGV[i], 1, 1)
if (ARGV[i] == "--" || (first != "-" && first != "+"))
break
else if (first == "+")
# Replace "+" with "-"
ARGV[i] = "-" substr(ARGV[i], 2)
}
}
BEGIN {
count = 1
outputfile = "/dev/stdout"
opts = "udcf:s:"
while ((c = getopt(ARGC, ARGV, opts)) != -1) {
if (c == "u")
non_repeated_only++
else if (c == "d")
repeated_only++
else if (c == "c")
do_count++
else if (c == "f")
fcount = Optarg + 0
else if (c == "s")
charcount = Optarg + 0
else
usage()
}
for (i = 1; i < Optind; i++)
ARGV[i] = ""
if (repeated_only == 0 && non_repeated_only == 0)
repeated_only = non_repeated_only = 1
if (ARGC - Optind == 2) {
outputfile = ARGV[ARGC - 1]
ARGV[ARGC - 1] = ""
}
}
function are_equal( n, m, clast, cline, alast, aline)
{
if (fcount == 0 && charcount == 0)
return (last == $0)
if (fcount > 0) {
n = split(last, alast)
m = split($0, aline)
clast = join(alast, fcount+1, n)
cline = join(aline, fcount+1, m)
} else {
clast = last
cline = $0
}
if (charcount) {
clast = substr(clast, charcount + 1)
cline = substr(cline, charcount + 1)
}
return (clast == cline)
}
NR == 1 {
last = $0
next
}
{
equal = are_equal()
if (do_count) { # overrides -d and -u
if (equal)
count++
else {
printf("%4d %s\n", count, last) > outputfile
last = $0
count = 1 # reset
}
next
}
if (equal)
count++
else {
if ((repeated_only && count > 1) ||
(non_repeated_only && count == 1))
print last > outputfile
last = $0
count = 1
}
}
END {
if (do_count)
printf("%4d %s\n", count, last) > outputfile
else if ((repeated_only && count > 1) ||
(non_repeated_only && count == 1))
print last > outputfile
close(outputfile)
}