125 lines
2.9 KiB
Awk
125 lines
2.9 KiB
Awk
# uniq.awk --- do uniq in awk
|
|
#
|
|
# Requires getopt() and join() library functions
|
|
#
|
|
# Arnold Robbins, arnold@skeeve.com, Public Domain
|
|
# May 1993
|
|
# Updated August 2020 to current POSIX
|
|
|
|
function usage()
|
|
{
|
|
print("Usage: uniq [-udc [-f fields] [-s chars]] " \
|
|
"[ in [ out ]]") > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
|
|
# -c count lines. overrides -d and -u
|
|
# -d only repeated lines
|
|
# -u only nonrepeated lines
|
|
# -f n skip n fields
|
|
# -s n skip n characters, skip fields first
|
|
# As of 2020, '+' can be used as the option character in addition to '-'
|
|
# Previously allowed use of -N to skip fields and +N to skip
|
|
# characters is no longer allowed, and not supported by this version.
|
|
|
|
BEGIN {
|
|
# Convert + to - so getopt can handle things
|
|
for (i = 1; i < ARGC; i++) {
|
|
first = substr(ARGV[i], 1, 1)
|
|
if (ARGV[i] == "--" || (first != "-" && first != "+"))
|
|
break
|
|
else if (first == "+")
|
|
# Replace "+" with "-"
|
|
ARGV[i] = "-" substr(ARGV[i], 2)
|
|
}
|
|
}
|
|
BEGIN {
|
|
count = 1
|
|
outputfile = "/dev/stdout"
|
|
opts = "udcf:s:"
|
|
while ((c = getopt(ARGC, ARGV, opts)) != -1) {
|
|
if (c == "u")
|
|
non_repeated_only++
|
|
else if (c == "d")
|
|
repeated_only++
|
|
else if (c == "c")
|
|
do_count++
|
|
else if (c == "f")
|
|
fcount = Optarg + 0
|
|
else if (c == "s")
|
|
charcount = Optarg + 0
|
|
else
|
|
usage()
|
|
}
|
|
|
|
for (i = 1; i < Optind; i++)
|
|
ARGV[i] = ""
|
|
|
|
if (repeated_only == 0 && non_repeated_only == 0)
|
|
repeated_only = non_repeated_only = 1
|
|
|
|
if (ARGC - Optind == 2) {
|
|
outputfile = ARGV[ARGC - 1]
|
|
ARGV[ARGC - 1] = ""
|
|
}
|
|
}
|
|
function are_equal( n, m, clast, cline, alast, aline)
|
|
{
|
|
if (fcount == 0 && charcount == 0)
|
|
return (last == $0)
|
|
|
|
if (fcount > 0) {
|
|
n = split(last, alast)
|
|
m = split($0, aline)
|
|
clast = join(alast, fcount+1, n)
|
|
cline = join(aline, fcount+1, m)
|
|
} else {
|
|
clast = last
|
|
cline = $0
|
|
}
|
|
if (charcount) {
|
|
clast = substr(clast, charcount + 1)
|
|
cline = substr(cline, charcount + 1)
|
|
}
|
|
|
|
return (clast == cline)
|
|
}
|
|
NR == 1 {
|
|
last = $0
|
|
next
|
|
}
|
|
|
|
{
|
|
equal = are_equal()
|
|
|
|
if (do_count) { # overrides -d and -u
|
|
if (equal)
|
|
count++
|
|
else {
|
|
printf("%4d %s\n", count, last) > outputfile
|
|
last = $0
|
|
count = 1 # reset
|
|
}
|
|
next
|
|
}
|
|
|
|
if (equal)
|
|
count++
|
|
else {
|
|
if ((repeated_only && count > 1) ||
|
|
(non_repeated_only && count == 1))
|
|
print last > outputfile
|
|
last = $0
|
|
count = 1
|
|
}
|
|
}
|
|
|
|
END {
|
|
if (do_count)
|
|
printf("%4d %s\n", count, last) > outputfile
|
|
else if ((repeated_only && count > 1) ||
|
|
(non_repeated_only && count == 1))
|
|
print last > outputfile
|
|
close(outputfile)
|
|
}
|