ubuntu-buildroot/output/build/host-gawk-5.2.0/awklib/eg/prog/split.awk

145 lines
3.2 KiB
Awk

# split.awk --- do split in awk
#
# Requires getopt() library function.
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
# Revised slightly, May 2014
# Rewritten September 2020
function usage( common)
{
common = "[-a suffix-len] [file [outname]]"
printf("usage: split [-l count] %s\n", common) > "/dev/stderr"
printf(" split [-b N[k|m]] %s\n", common) > "/dev/stderr"
exit 1
}
BEGIN {
# Set defaults:
Suffix_length = 2
Line_count = 1000
Byte_count = 0
Outfile = "x"
parse_arguments()
init_suffix_data()
Output = (Outfile compute_suffix())
}
function parse_arguments( i, c, l, modifier)
{
while ((c = getopt(ARGC, ARGV, "a:b:l:")) != -1) {
if (c == "a")
Suffix_length = Optarg + 0
else if (c == "b") {
Byte_count = Optarg + 0
Line_count = 0
l = length(Optarg)
modifier = substr(Optarg, l, 1)
if (modifier == "k")
Byte_count *= 1024
else if (modifier == "m")
Byte_count *= 1024 * 1024
} else if (c == "l") {
Line_count = Optarg + 0
Byte_count = 0
} else
usage()
}
# Clear out options
for (i = 1; i < Optind; i++)
ARGV[i] = ""
# Check for filename
if (ARGV[Optind]) {
Optind++
# Check for different prefix
if (ARGV[Optind]) {
Outfile = ARGV[Optind]
ARGV[Optind] = ""
if (++Optind < ARGC)
usage()
}
}
}
function compute_suffix( i, result, letters)
{
# Logical step 3
if (Reached_last) {
printf("split: too many files!\n") > "/dev/stderr"
exit 1
} else if (on_last_file())
Reached_last = 1 # fail when wrapping after 'zzz'
# Logical step 1
result = ""
letters = "abcdefghijklmnopqrstuvwxyz"
for (i = 1; i <= Suffix_length; i++)
result = result substr(letters, Suffix_ind[i], 1)
# Logical step 2
for (i = Suffix_length; i >= 1; i--) {
if (++Suffix_ind[i] > 26) {
Suffix_ind[i] = 1
} else
break
}
return result
}
function init_suffix_data( i)
{
for (i = 1; i <= Suffix_length; i++)
Suffix_ind[i] = 1
Reached_last = 0
}
function on_last_file( i, on_last)
{
on_last = 1
for (i = 1; i <= Suffix_length; i++) {
on_last = on_last && (Suffix_ind[i] == 26)
}
return on_last
}
Line_count > 0 {
if (++tcount > Line_count) {
close(Output)
Output = (Outfile compute_suffix())
tcount = 1
}
print > Output
}
Byte_count > 0 {
# `+ 1' is for the final newline
if (tcount + length($0) + 1 > Byte_count) { # would overflow
# compute leading bytes
leading_bytes = Byte_count - tcount
# write leading bytes
printf("%s", substr($0, 1, leading_bytes)) > Output
# close old file, open new file
close(Output)
Output = (Outfile compute_suffix())
# set up first bytes for new file
$0 = substr($0, leading_bytes + 1) # trailing bytes
tcount = 0
}
# write full record or trailing bytes
tcount += length($0) + 1
print > Output
}
END {
close(Output)
}