145 lines
3.2 KiB
Awk
145 lines
3.2 KiB
Awk
# split.awk --- do split in awk
|
|
#
|
|
# Requires getopt() library function.
|
|
#
|
|
# Arnold Robbins, arnold@skeeve.com, Public Domain
|
|
# May 1993
|
|
# Revised slightly, May 2014
|
|
# Rewritten September 2020
|
|
|
|
|
|
function usage( common)
|
|
{
|
|
common = "[-a suffix-len] [file [outname]]"
|
|
printf("usage: split [-l count] %s\n", common) > "/dev/stderr"
|
|
printf(" split [-b N[k|m]] %s\n", common) > "/dev/stderr"
|
|
exit 1
|
|
}
|
|
BEGIN {
|
|
# Set defaults:
|
|
Suffix_length = 2
|
|
Line_count = 1000
|
|
Byte_count = 0
|
|
Outfile = "x"
|
|
|
|
parse_arguments()
|
|
|
|
init_suffix_data()
|
|
|
|
Output = (Outfile compute_suffix())
|
|
}
|
|
function parse_arguments( i, c, l, modifier)
|
|
{
|
|
while ((c = getopt(ARGC, ARGV, "a:b:l:")) != -1) {
|
|
if (c == "a")
|
|
Suffix_length = Optarg + 0
|
|
else if (c == "b") {
|
|
Byte_count = Optarg + 0
|
|
Line_count = 0
|
|
|
|
l = length(Optarg)
|
|
modifier = substr(Optarg, l, 1)
|
|
if (modifier == "k")
|
|
Byte_count *= 1024
|
|
else if (modifier == "m")
|
|
Byte_count *= 1024 * 1024
|
|
} else if (c == "l") {
|
|
Line_count = Optarg + 0
|
|
Byte_count = 0
|
|
} else
|
|
usage()
|
|
}
|
|
|
|
# Clear out options
|
|
for (i = 1; i < Optind; i++)
|
|
ARGV[i] = ""
|
|
|
|
# Check for filename
|
|
if (ARGV[Optind]) {
|
|
Optind++
|
|
|
|
# Check for different prefix
|
|
if (ARGV[Optind]) {
|
|
Outfile = ARGV[Optind]
|
|
ARGV[Optind] = ""
|
|
|
|
if (++Optind < ARGC)
|
|
usage()
|
|
}
|
|
}
|
|
}
|
|
function compute_suffix( i, result, letters)
|
|
{
|
|
# Logical step 3
|
|
if (Reached_last) {
|
|
printf("split: too many files!\n") > "/dev/stderr"
|
|
exit 1
|
|
} else if (on_last_file())
|
|
Reached_last = 1 # fail when wrapping after 'zzz'
|
|
|
|
# Logical step 1
|
|
result = ""
|
|
letters = "abcdefghijklmnopqrstuvwxyz"
|
|
for (i = 1; i <= Suffix_length; i++)
|
|
result = result substr(letters, Suffix_ind[i], 1)
|
|
|
|
# Logical step 2
|
|
for (i = Suffix_length; i >= 1; i--) {
|
|
if (++Suffix_ind[i] > 26) {
|
|
Suffix_ind[i] = 1
|
|
} else
|
|
break
|
|
}
|
|
|
|
return result
|
|
}
|
|
function init_suffix_data( i)
|
|
{
|
|
for (i = 1; i <= Suffix_length; i++)
|
|
Suffix_ind[i] = 1
|
|
|
|
Reached_last = 0
|
|
}
|
|
function on_last_file( i, on_last)
|
|
{
|
|
on_last = 1
|
|
for (i = 1; i <= Suffix_length; i++) {
|
|
on_last = on_last && (Suffix_ind[i] == 26)
|
|
}
|
|
|
|
return on_last
|
|
}
|
|
Line_count > 0 {
|
|
if (++tcount > Line_count) {
|
|
close(Output)
|
|
Output = (Outfile compute_suffix())
|
|
tcount = 1
|
|
}
|
|
print > Output
|
|
}
|
|
Byte_count > 0 {
|
|
# `+ 1' is for the final newline
|
|
if (tcount + length($0) + 1 > Byte_count) { # would overflow
|
|
# compute leading bytes
|
|
leading_bytes = Byte_count - tcount
|
|
|
|
# write leading bytes
|
|
printf("%s", substr($0, 1, leading_bytes)) > Output
|
|
|
|
# close old file, open new file
|
|
close(Output)
|
|
Output = (Outfile compute_suffix())
|
|
|
|
# set up first bytes for new file
|
|
$0 = substr($0, leading_bytes + 1) # trailing bytes
|
|
tcount = 0
|
|
}
|
|
|
|
# write full record or trailing bytes
|
|
tcount += length($0) + 1
|
|
print > Output
|
|
}
|
|
END {
|
|
close(Output)
|
|
}
|