awk,jq,sed refs

This commit is contained in:
mlot
2026-05-13 12:52:30 -04:00
parent 4cc63974f1
commit cf662e7708
4 changed files with 1477 additions and 0 deletions

0
exit_return_codes/exercise1.sh Normal file → Executable file
View File

354
learnawk.awk Normal file
View File

@@ -0,0 +1,354 @@
#!/usr/bin/awk -f
# Comments are like this
# AWK programs consist of a collection of patterns and actions.
pattern1 { action; } # just like lex
pattern2 { action; }
# There is an implied loop and AWK automatically reads and parses each
# record of each file supplied. Each record is split by the FS delimiter,
# which defaults to white-space (multiple spaces,tabs count as one)
# You can assign FS either on the command line (-F C) or in your BEGIN
# pattern
# One of the special patterns is BEGIN. The BEGIN pattern is true
# BEFORE any of the files are read. The END pattern is true after
# an End-of-file from the last file (or standard-in if no files specified)
# There is also an output field separator (OFS) that you can assign, which
# defaults to a single space
BEGIN {
# BEGIN will run at the beginning of the program. It's where you put all
# the preliminary set-up code, before you process any text files. If you
# have no text files, then think of BEGIN as the main entry point.
# Variables are global. Just set them or use them, no need to declare.
count = 0;
# Operators just like in C and friends
a = count + 1;
b = count - 1;
c = count * 1;
d = count / 1; # integer division
e = count % 1; # modulus
f = count ^ 1; # exponentiation
a += 1;
b -= 1;
c *= 1;
d /= 1;
e %= 1;
f ^= 1;
# Incrementing and decrementing by one
a++;
b--;
# As a prefix operator, it returns the incremented value
++a;
--b;
# Notice, also, no punctuation such as semicolons to terminate statements
# Control statements
if (count == 0)
print "Starting with count of 0";
else
print "Huh?";
# Or you could use the ternary operator
print (count == 0) ? "Starting with count of 0" : "Huh?";
# Blocks consisting of multiple lines use braces
while (a < 10) {
print "String concatenation is done" " with a series" " of"
" space-separated strings";
print a;
a++;
}
for (i = 0; i < 10; i++)
print "Good ol' for loop";
# As for comparisons, they're the standards:
# a < b # Less than
# a <= b # Less than or equal
# a != b # Not equal
# a == b # Equal
# a > b # Greater than
# a >= b # Greater than or equal
# Logical operators as well
# a && b # AND
# a || b # OR
# In addition, there's the super useful regular expression match
if ("foo" ~ "^fo+$")
print "Fooey!";
if ("boo" !~ "^fo+$")
print "Boo!";
# Arrays
arr[0] = "foo";
arr[1] = "bar";
# You can also initialize an array with the built-in function split()
n = split("foo:bar:baz", arr, ":");
# You also have associative arrays (indeed, they're all associative arrays)
assoc["foo"] = "bar";
assoc["bar"] = "baz";
# And multi-dimensional arrays, with some limitations I won't mention here
multidim[0,0] = "foo";
multidim[0,1] = "bar";
multidim[1,0] = "baz";
multidim[1,1] = "boo";
# You can test for array membership
if ("foo" in assoc)
print "Fooey!";
# You can also use the 'in' operator to traverse the keys of an array
for (key in assoc)
print assoc[key];
# The command line is in a special array called ARGV
for (argnum in ARGV)
print ARGV[argnum];
# You can remove elements of an array
# This is particularly useful to prevent AWK from assuming the arguments
# are files for it to process
delete ARGV[1];
# The number of command line arguments is in a variable called ARGC
print ARGC;
# AWK has several built-in functions. They fall into three categories. I'll
# demonstrate each of them in their own functions, defined later.
return_value = arithmetic_functions(a, b, c);
string_functions();
io_functions();
}
# Here's how you define a function
function arithmetic_functions(a, b, c, d) {
# Probably the most annoying part of AWK is that there are no local
# variables. Everything is global. For short scripts, this is fine, even
# useful, but for longer scripts, this can be a problem.
# There is a work-around (ahem, hack). Function arguments are local to the
# function, and AWK allows you to define more function arguments than it
# needs. So just stick local variable in the function declaration, like I
# did above. As a convention, stick in some extra whitespace to distinguish
# between actual function parameters and local variables. In this example,
# a, b, and c are actual parameters, while d is merely a local variable.
# Now, to demonstrate the arithmetic functions
# Most AWK implementations have some standard trig functions
d = sin(a);
d = cos(a);
d = atan2(b, a); # arc tangent of b / a
# And logarithmic stuff
d = exp(a);
d = log(a);
# Square root
d = sqrt(a);
# Truncate floating point to integer
d = int(5.34); # d => 5
# Random numbers
srand(); # Supply a seed as an argument. By default, it uses the time of day
d = rand(); # Random number between 0 and 1.
# Here's how to return a value
return d;
}
function string_functions( localvar, arr) {
# AWK, being a string-processing language, has several string-related
# functions, many of which rely heavily on regular expressions.
# Search and replace, first instance (sub) or all instances (gsub)
# Both return number of matches replaced
localvar = "fooooobar";
sub("fo+", "Meet me at the ", localvar); # localvar => "Meet me at the bar"
gsub("e", ".", localvar); # localvar => "M..t m. at th. bar"
# Search for a string that matches a regular expression
# index() does the same thing, but doesn't allow a regular expression
match(localvar, "t"); # => 4, since the 't' is the fourth character
# Split on a delimiter
n = split("foo-bar-baz", arr, "-");
# result: arr[1] = "foo"; arr[2] = "bar"; arr[3] = "baz"; n = 3
# Other useful stuff
sprintf("%s %d %d %d", "Testing", 1, 2, 3); # => "Testing 1 2 3"
substr("foobar", 2, 3); # => "oob"
substr("foobar", 4); # => "bar"
length("foo"); # => 3
tolower("FOO"); # => "foo"
toupper("foo"); # => "FOO"
}
function io_functions( localvar) {
# You've already seen print
print "Hello world";
# There's also printf
printf("%s %d %d %d\n", "Testing", 1, 2, 3);
# AWK doesn't have file handles, per se. It will automatically open a file
# handle for you when you use something that needs one. The string you used
# for this can be treated as a file handle, for purposes of I/O. This makes
# it feel sort of like shell scripting, but to get the same output, the
# string must match exactly, so use a variable:
outfile = "/tmp/foobar.txt";
print "foobar" > outfile;
# Now the string outfile is a file handle. You can close it:
close(outfile);
# Here's how you run something in the shell
system("echo foobar"); # => prints foobar
# Reads a line from standard input and stores in localvar
getline localvar;
# Reads a line from a pipe (again, use a string so you close it properly)
cmd = "echo foobar";
cmd | getline localvar; # localvar => "foobar"
close(cmd);
# Reads a line from a file and stores in localvar
infile = "/tmp/foobar.txt";
getline localvar < infile;
close(infile);
}
# As I said at the beginning, AWK programs consist of a collection of patterns
# and actions. You've already seen the BEGIN pattern. Other
# patterns are used only if you're processing lines from files or standard
# input.
#
# When you pass arguments to AWK, they are treated as file names to process.
# It will process them all, in order. Think of it like an implicit for loop,
# iterating over the lines in these files. these patterns and actions are like
# switch statements inside the loop.
/^fo+bar$/ {
# This action will execute for every line that matches the regular
# expression, /^fo+bar$/, and will be skipped for any line that fails to
# match it. Let's just print the line:
print;
# Whoa, no argument! That's because print has a default argument: $0.
# $0 is the name of the current line being processed. It is created
# automatically for you.
# You can probably guess there are other $ variables. Every line is
# implicitly split before every action is called, much like the shell
# does. And, like the shell, each field can be access with a dollar sign
# This will print the second and fourth fields in the line
print $2, $4;
# AWK automatically defines many other variables to help you inspect and
# process each line. The most important one is NF
# Prints the number of fields on this line
print NF;
# Print the last field on this line
print $NF;
}
# Every pattern is actually a true/false test. The regular expression in the
# last pattern is also a true/false test, but part of it was hidden. If you
# don't give it a string to test, it will assume $0, the line that it's
# currently processing. Thus, the complete version of it is this:
$0 ~ /^fo+bar$/ {
print "Equivalent to the last pattern";
}
a > 0 {
# This will execute once for each line, as long as a is positive
}
# You get the idea. Processing text files, reading in a line at a time, and
# doing something with it, particularly splitting on a delimiter, is so common
# in UNIX that AWK is a scripting language that does all of it for you, without
# you needing to ask. All you have to do is write the patterns and actions
# based on what you expect of the input, and what you want to do with it.
# Here's a quick example of a simple script, the sort of thing AWK is perfect
# for. It will read a name from standard input and then will print the average
# age of everyone with that first name. Let's say you supply as an argument the
# name of a this data file:
#
# Bob Jones 32
# Jane Doe 22
# Steve Stevens 83
# Bob Smith 29
# Bob Barker 72
#
# Here's the script:
BEGIN {
# First, ask the user for the name
print "What name would you like the average age for?";
# Get a line from standard input, not from files on the command line
getline name < "/dev/stdin";
}
# Now, match every line whose first field is the given name
$1 == name {
# Inside here, we have access to a number of useful variables, already
# pre-loaded for us:
# $0 is the entire line
# $3 is the third field, the age, which is what we're interested in here
# NF is the number of fields, which should be 3
# NR is the number of records (lines) seen so far
# FILENAME is the name of the file being processed
# FS is the field separator being used, which is " " here
# ...etc. There are plenty more, documented in the man page.
# Keep track of a running total and how many lines matched
sum += $3;
nlines++;
}
# Another special pattern is called END. It will run after processing all the
# text files. Unlike BEGIN, it will only run if you've given it input to
# process. It will run after all the files have been read and processed
# according to the rules and actions you've provided. The purpose of it is
# usually to output some kind of final report, or do something with the
# aggregate of the data you've accumulated over the course of the script.
END {
if (nlines)
print "The average age for " name " is " sum / nlines;
}

883
learnjq.sh Normal file
View File

@@ -0,0 +1,883 @@
# When running jq from the command line, jq program code can be specified as the
# first argument after any options to `jq`. We often quote such jq program with
# single quotes (`'`) to prevent any special interpretation from the command line
# shell.
#
jq -n '# Comments start with # until the end of line.
# The -n option sets the input to the value, `null`, and prevents `jq`
# from reading inputs from external sources.
'
# Output:
# null
# By default jq reads from *STDIN* a stream of JSON inputs (values). It
# processes each input with the jq program (filters) specified at the command
# line, and prints the outputs of processing each input with the program to
# *STDOUT*.
#
echo '
"hello" 123 [
"one",
"two",
"three"
]
{ "name": "jq" }
' |
jq '. # <-- the jq program here is the single dot (.), called the identity
# operator, which stands for the current input.
'
# Output:
# "hello"
# 123
# [
# "one",
# "two",
# "three"
# ]
# {
# "name": "jq"
# }
# Notice that jq pretty-prints the outputs by default, therefore, piping
# to `jq` is a simple way to format a response from some REST API endpoint
# that returns JSON. E.g., `curl -s https://freegeoip.app/json/ | jq`
# Instead of processing each JSON input with a jq program, you can also
# ask jq to slurp them up as an array.
#
echo '1 "two" 3' | jq -s .
# Output:
# [
# 1,
# "two",
# 3
# ]
# Or, treat each line as a string.
#
(echo line 1; echo line 2) | jq -R .
# Output:
# "line 1"
# "line 2"
# Or, combine -s and -R to slurp the input lines into a single string.
#
(echo line 1; echo line 2) | jq -sR .
# Output:
# "line 1\nline2\n"
# Inputs can also come from a JSON file specified at the command line:
#
echo '"hello"' > hello.json
jq . hello.json
# Output:
# "hello"
# Passing a value into a jq program can be done with the `--arg` option.
# Below, `val` is the variable name to bind the value, `123`, to.
# The variable is then referenced as `$val`.
#
jq -n --arg val 123 '$val' # $val is the string "123" here
# Output:
# "123"
# If you need to pass a JSON value, use `--argjson`
#
jq -n --argjson val 123 '$val' # $val is a number
# Output:
# 123
# Using `--arg` or `--argjson` is an useful way of building JSON output from
# existing input.
#
jq --arg text "$(date; echo "Have a nice day!")" -n '{ "today": $text }'
# Output:
# {
# "today": "Sun Apr 10 09:53:07 PM EDT 2022\nHave a nice day!"
# }
# Instead of outputting values as JSON, you can use the `-r` option to print
# string values unquoted / unescaped. Non-string values are still printed as
# JSON.
#
echo '"hello" 2 [1, "two", null] {}' | jq -r .
# Output:
# hello
# 2
# [
# 1,
# "two",
# null
# ]
# {}
# Inside a string in jq, `\(expr)` can be used to substitute the output of
# `expr` into the surrounding string context.
#
jq -rn '"1 + 2 = \(1+2)"'
# Output:
# 1 + 2 = 3
# The `-r` option is most useful for generating text outputs to be processed
# down in a shell pipeline, especially when combined with an interpolated
# string that is prefixed the `@sh` prefix operator.
#
# The `@sh` operator escapes the outputs of `\(...)` inside a string with
# single quotes so that each resulting string of `\(...)` can be evaluated
# by the shell as a single word / token / argument without special
# interpretations.
#
env_vars=$(
echo '{"var1": "value one", "var2": "value\ntwo"}' \
|
jq -r '
"export " + @sh "var1=\(.var1) var2=\(.var2)"
# ^^^^^^^^ ^^^^^^^^
# "'value one'" "'value\ntwo'"
#
# NOTE: The + (plus) operator here concatenates strings.
'
)
echo "$env_vars"
eval "$env_vars"
declare -p var1 var2
# Output:
# export var1='value one' var2='value
# two'
# declare -- var1="value one"
# declare -- var2="value
# two"
# There are other string `@prefix` operators (e.g., @base64, @uri, @csv, ...)
# that might be useful to you. See `man jq` for details.
# The comma (`,`) operator in jq evaluates each operand and generates multiple
# outputs:
#
jq -n '"one", 2, ["three"], {"four": 4}'
# Output:
# "one"
# 2
# [
# "three"
# ]
# {
# "four": 4
# }
# Any JSON value is a valid jq expression that evaluates to the JSON value
# itself.
#
jq -n '1, "one", [1, 2], {"one": 1}, null, true, false'
# Output:
# 1
# "one"
# [
# 1,
# 2
# ]
# {
# "one": 1
# }
# null
# true
# false
# Any jq expression can be used where a JSON value is expected, even as object
# keys. (though parenthesis might be required for object keys or values)
#
jq -n '[2*3, 8-1, 16/2], {("tw" + "o"): (1 + 1)}'
# Output:
# [
# 6,
# 7,
# 8
# ]
# {
# "two": 2
# }
# As a shortcut, if a JSON object key looks like a valid identifier (matching
# the regex `^[a-zA-Z_][a-zA-Z_0-9]*$`), quotes can be omitted.
#
jq -n '{ key_1: "value1" }'
# If a JSON object's key's value is omitted, it is looked up in the current
# input using the key: (see next example for the meaning of `... | ...`)
#
jq -n '{c: 3} | {a: 1, "b", c}'
# Output:
# {
# "a": 1,
# "b": null,
# "c": 3
# }
# jq programs are more commonly written as a series of expressions (filters)
# connected by the pipe (`|`) operator, which makes the output of its left
# filter the input to its right filter.
#
jq -n '1 | . + 2 | . + 3' # first dot is 1; second dot is 3
# Output:
# 6
# If an expression evaluates to multiple outputs, then jq will iterate through
# them and propagate each output down the pipeline, and generate multiple
# outputs in the end.
#
jq -n '1, 2, 3 | ., 4 | .'
# Output:
# 1
# 4
# 2
# 4
# 3
# 4
# The flows of the data in the last example can be visualized like this:
# (number prefixed with `*` indicates the current output)
#
# *1, 2, 3 | *1, 4 | *1
# 1, 2, 3 | 1, *4 | *4
# 1, *2, 3 | *2, 4 | *2
# 1, 2, 3 | 2, *4 | *4
# 1, 2, *3 | *3, 4 | *3
# 1, 2, 3 | 3, *4 | *4
#
#
# To put it another way, the evaluation of the above example is very similar
# to the following pieces of code in other programming languages:
#
# In Python:
#
# for first_dot in 1, 2, 3:
# for second_dot in first_dot, 4:
# print(second_dot)
#
# In Ruby:
#
# [1, 2, 3].each do |dot|
# [dot, 4].each { |dot| puts dot }
# end
#
# In JavaScript:
#
# [1, 2, 3].forEach(dot => {
# [dot, 4].forEach(dot => console.log(dot))
# })
#
# Below are some examples of array index and object attribute lookups using
# the `[expr]` operator after an expression. If `expr` is a number then it's
# an array index lookup; otherwise, it should be a string, in which case it's
# an object attribute lookup:
# Array index lookup
#
jq -n '[2, {"four": 4}, 6][1 - 1]' # => 2
jq -n '[2, {"four": 4}, 6][0]' # => 2
jq -n '[2, {"four": 4}, 6] | .[0]' # => 2
# You can chain the lookups since they are just expressions.
#
jq -n '[2, {"four": 4}, 6][1]["fo" + "ur"]' # => 4
# For object attributes, you can also use the `.key` shortcut.
#
jq -n '[2, {"four": 4}, 6][1].four' # => 4
# Use `."key"` if the key is not a valid identifier.
#
jq -n '[2, {"f o u r": 4}, 6][1]."f o u r"' # => 4
# Array index lookup returns null if the index is not found.
#
jq -n '[2, {"four": 4}, 6][99]' # => null
# Object attribute lookup returns null if the key is not found.
#
jq -n '[2, {"four": 4}, 6][1].whatever' # => null
# The alternative operator `//` can be used to provide a default
# value when the result of the left operand is either `null` or `false`.
#
jq -n '.unknown_key // 7' # => 7
# If the thing before the lookup operator (`[expr]`) is neither an array
# or an object, then you will get an error:
#
jq -n '123 | .[0]' # => jq: error (at <unknown>): Cannot index number with number
jq -n '"abc" | .name' # => jq: error (at <unknown>): Cannot index string with string "name"
jq -n '{"a": 97} | .[0]' # => jq: error (at <unknown>): Cannot index object with number
jq -n '[89, 64] | .["key"]' # => jq: error (at <unknown>): Cannot index array with string "key"
# You can, however, append a `?` to a lookup to make jq return `empty`
# instead when such error happens.
#
jq -n '123 | .[0]?' # no output since it's empty.
jq -n '"abc" | .name?' # no output since it's empty.
# The alternative operator (`//`) also works with `empty`:
#
jq -n '123 | .[0]? // 99' # => 99
jq -n '"abc" | .name? // "unknown"' # => "unknown"
# NOTE: `empty` is actually a built-in function in jq.
# With the nested loop explanation we illustrated earlier before,
# `empty` is like the `continue` or the `next` keyword that skips
# the current iteration of the loop in some programming languages.
# Strings and arrays can be sliced with the same syntax (`[i:j]`, but no
# stepping) and semantic as found in the Python programming language:
#
# 0 1 2 3 4 5 ... infinite
# array = ["a", "b", "c", "d"]
# -infinite ... -4 -3 -2 -1
#
jq -n '["Peter", "Jerry"][1]' # => "Jerry"
jq -n '["Peter", "Jerry"][-1]' # => "Jerry"
jq -n '["Peter", "Jerry", "Tom"][1:]' # => ["Jerry", "Tom"]
jq -n '["Peter", "Jerry", "Tom"][:1+1]' # => ["Peter", "Jerry"]
jq -n '["Peter", "Jerry", "Tom"][1:99]' # => ["Jerry", "Tom"]
# If the lookup index or key is omitted then jq iterates through
# the collection, generating one output value from each iteration.
#
# These examples produce the same outputs.
#
echo 1 2 3 | jq .
jq -n '1, 2, 3'
jq -n '[1, 2, 3][]'
jq -n '{a: 1, b: 2, c: 3}[]'
# Output:
# 1
# 2
# 3
# You can build an array out of multiple outputs.
#
jq -n '{values: [{a: 1, b: 2, c: 3}[] | . * 2]}'
# Output:
# {
# "values": [
# 2,
# 4,
# 6
# ]
# }
# If multiple outputs are not contained, then we'd get multiple outputs
# in the end.
#
jq -n '{values: ({a: 1, b: 2, c: 3}[] | . * 2)}'
# Output:
# {
# "values": 2
# }
# {
# "values": 4
# }
# {
# "values": 6
# }
# Conditional `if ... then ... else ... end` in jq is an expression, so
# both the `then` part and the `else` part are required. In jq, only
# two values, `null` and `false`, are false; all other values are true.
#
jq -n 'if 1 > 2 | not and 1 <= 2 then "Makes sense" else "WAT?!" end'
# Output
# "Makes sense"
# Notice that `not` is a built-in function that takes zero arguments,
# that's why it's used as a filter to negate its input value.
# We'll talk about functions soon.
# Another example using a conditional:
#
jq -n '1, 2, 3, 4, 5 | if . % 2 != 0 then . else empty end'
# Output
# 1
# 3
# 5
# The `empty` above is a built-in function that takes 0 arguments and
# generates no outputs. Let's see more examples of built-in functions.
# The above conditional example can be written using the `select/1` built-in
# function (`/1` indicates the number of arguments expected by the function).
#
jq -n '1, 2, 3, 4, 5 | select(. % 2 != 0)' # NOTE: % gives the remainder.
# Output
# 1
# 3
# 5
# Function arguments in jq are passed with call-by-name semantic, which
# means, an argument is not evaluated at call site, but instead, is
# treated as a lambda expression with the calling context of the call
# site as its scope for variable and function references used in the
# expression.
#
# In the above example, the expression `. % 2 != 0` is what's passed to
# `select/1` as the argument, not `true` or `false`, which is what would
# have been the case had the (boolean) expression was evaluated before it's
# passed to the function.
# The `range/1`, `range/2`, and `range/3` built-in functions generate
# integers within a given range.
#
jq -n '[range(3)]' # => [0, 1, 2]
jq -n '[range(0; 4)]' # => [0, 1, 2, 3]
jq -n '[range(2; 10; 2)]' # => [2, 4, 6, 8]
# Notice that `;` (semicolon) is used to separate function arguments.
# The `map/1` function applies a given expression to each element of
# the current input (array) and outputs a new array.
#
jq -n '[range(1; 6) | select(. % 2 != 0)] | map(. * 2)'
# Output:
# [
# 2,
# 6,
# 10
# ]
# Without using `select/1` and `map/1`, we could have also written the
# above example like this:
#
jq -n '[range(1; 6) | if . % 2 != 0 then . else empty end | . * 2]'
# `keys/0` returns an array of keys of the current input. For an object,
# these are the object's attribute names; for an array, these are the
# array indices.
#
jq -n '[range(2; 10; 2)] | keys' # => [0, 1, 2, 3]
jq -n '{a: 1, b: 2, c: 3} | keys' # => ["a", "b", "c"]
# `values/0` returns an array of values of the current input. For an object,
# these are the object's attribute values; for an array, these are the
# elements of the array.
#
jq -n '[range(2; 10; 2)] | values' # => [2, 4, 6, 8]
jq -n '{a: 1, b: 2, c: 3} | values' # => [1, 2, 3]
# `to_entries/0` returns an array of key-value objects of the current input
# object.
#
jq -n '{a: 1, b: 2, c: 3} | to_entries'
# Output:
# [
# {
# "key": "a",
# "value": 1
# },
# {
# "key": "b",
# "value": 2
# },
# {
# "key": "c",
# "value": 3
# }
# ]
# Here's how you can turn an object's attribute into environment variables
# using what we have learned so far.
#
env_vars=$(
jq -rn '{var1: "1 2 3 4", var2: "line1\nline2\n"}
| to_entries[]
| "export " + @sh "\(.key)=\(.value)"
'
)
eval "$env_vars"
declare -p var1 var2
# Output:
# declare -x var1="1 2 3 4"
# declare -x var2="line1
# line2
# "
# `from_entries/0` is the opposite of `to_entries/0` in that it takes an
# an array of key-value objects and turn that into an object with keys
# and values from the `key` and `value` attributes of the objects.
#
# It's useful together with `to_entries/0` when you need to iterate and
# do something to each attribute of an object.
#
jq -n '{a: 1, b: 2, c: 3} | to_entries | map(.value *= 2) | from_entries'
# Output:
# {
# "a": 2,
# "b": 4,
# "c": 6
# }
# The example above can be further shortened with the `with_entries/1` built-in:
#
jq -n '{a: 1, b: 2, c: 3} | with_entries(.value *= 2)'
# The `group_by/1` generates an array of groups (arrays) from the current
# input (array). The classification is done by applying the expression argument
# to each member of the input array.
#
# Let's look at a contrived example (Note that `tostring`, `tonumber`,
# `length` and `max` are all built-in jq functions. Feel free to look
# them up in the jq manual):
#
# Generate some random numbers.
numbers=$(echo $RANDOM{,,,,,,,,,,,,,,,,,,,,})
#
# Feed the numbers to jq, classifying them into groups and calculating their
# averages, and finally generate a report.
#
echo $numbers | jq -rs ' # Slurp the numbers into an array.
[
[ map(tostring) # Turn it into an array of strings.
| group_by(.[0:1]) # Group the numbers by their first digits.
| .[] # Iterate through the array of arrays (groups).
| map(tonumber) # Turn each group back to an array of numbers.
] # Finally, contain all groups in an array.
| sort_by([length, max]) # Sort the groups by their sizes.
# If two groups have the same size then the one with the largest
# number wins (is bigger).
| to_entries[] # Enumerate the array, generating key-value objects.
| # For each object, generate two lines:
"Group \(.key): \(.value | sort | join(" "))" + "\n" +
"Average: \( .value | (add / length) )"
] # Contain the group+average lines in an array.
# Join the array elements by separator lines (dashes) to produce the report.
| join("\n" + "-"*78 + "\n")
'
# Output:
#
# Group 0: 3267
# Average: 3267
# ------------------------------------------------------------------------------
# Group 1: 7854
# Average: 7854
# ------------------------------------------------------------------------------
# Group 2: 4415 4447
# Average: 4431
# ------------------------------------------------------------------------------
# Group 3: 681 6426
# Average: 3553.5
# ------------------------------------------------------------------------------
# Group 4: 21263 21361 21801 21832 22947 23523 29174
# Average: 23128.714285714286
# ------------------------------------------------------------------------------
# Group 5: 10373 12698 13132 13924 17444 17963 18934 18979
# Average: 15430.875
# The `add/1` built-in "reduces" an array of values to a single value.
# You can think of it as sticking the `+` operator in between each value of
# the collection. Here are some examples:
#
jq -n '[1, 2, 3, 4, 5] | add' # => 15
jq -n '["a", "b", "c"] | add' # => "abc"
# `+` concatenates arrays
jq -n '[["a"], ["b"], ["c"]] | add'
# Output:
# [
# "a",
# "b",
# "c"
# ]
# `+` merges objects non-recursively.
jq -n '[{a: 1, b: {c: 3}}, {b: 2, c: 4}] | add'
# Output:
# {
# "a": 1,
# "b": 2,
# "c": 4
# }
# jq provides a special syntax for writing an expression that reduces
# the outputs generated by a given expression to a single value.
# It has this form:
#
# reduce outputs_expr as $var (initial_value; reduction_expr)
#
# Examples:
#
jq -n 'reduce range(1; 6) as $i (0; . + $i)' # => 15
jq -n 'reduce (1, 2, 3, 4, 5) as $i (0; . + $i)' # => 15
jq -n '[1, 2, 3, 4, 5] | reduce .[] as $i (0; . + $i)' # => 15
jq -n '["a", "b", "c"] | reduce .[] as $i (""; . + $i)' # => "abc"
# Notice the `.` in the `reduction_expr` is the `initial_value` at first,
# and then it becomes the result of applying the `reduction_expr` as
# we iterate through the values of `outputs_expr`. The expression:
#
# reduce (1, 2, 3, 4, 5) as $i (0; . + $i)
#
# can be thought of as doing:
#
# 0 + 1 | . + 2 | . + 3 | . + 4 | . + 5
#
# The `*` operator when used on two objects, merges both recursively.
# Therefore, to merge JSON objects recursively, you can use `reduce`
# with the `*` operator. For example:
#
echo '
{"a": 1, "b": {"c": 3}}
{ "b": {"d": 4}}
{"a": 99, "e": 5 }
' | jq -s 'reduce .[] as $m ({}; . * $m)'
# Output:
# {
# "a": 99,
# "b": {
# "c": 3,
# "d": 4
# },
# "e": 5
# }
# jq has variable assignment in the form of `expr as $var`, which binds
# the value of `expr` to `$var`, and `$var` is immutable. Further more,
# `... as ...` doesn't change the input of the next filter; its introduction
# in a filter pipeline is only for establishing the binding of a value to a
# variable, and its scope extends to the filters following its definition.
# (i.e., to look up a variable's definition, scan to the left of the filter
# chain from the expression using it until you find the definition)
#
jq -rn '[1, 2, 3, 4, 5]
| (.[0] + .[-1]) as $sum # Always put ( ) around the binding `expr` to avoid surprises.
| ($sum * length / 2) as $result # The current input at this step is still the initial array.
| "The result is: \($result)" # Same.
'
# Output:
# The result is: 15
# With the `expr as $var` form, if multiple values are generated by `expr`
# then jq will iterate through them and bind each value to `$var` in turn
# for the rest of the pipeline.
#
jq -rn 'range(2; 4) as $i
| range(1; 6) as $j
| "\($i) * \($j) = \($i * $j)"
'
# Output:
# 2 * 1 = 2
# 2 * 2 = 4
# 2 * 3 = 6
# 2 * 4 = 8
# 2 * 5 = 10
# 3 * 1 = 3
# 3 * 2 = 6
# 3 * 3 = 9
# 3 * 4 = 12
# 3 * 5 = 15
# It's sometimes useful to bind the initial input to a variable at the
# start of a program, so that you can refer to it later down the pipeline.
#
jq -rn "$(cat <<'EOF'
{lookup: {a: 1, b: 2, c: 3},
bonuses: {a: 5, b: 2, c: 9}
}
| . as $doc
| .bonuses
| to_entries[]
| "\(.key)'s total is \($doc.lookup[.key] + .value)"
EOF
)"
# Output:
# a's total is 6
# b's total is 4
# c's total is 12
# jq supports destructing during variable binding. This lets you extract values
# from an array or an object and bind them to variables.
#
jq -n '[range(5)] | . as [$first, $second] | $second'
# Output:
# 1
jq -n '{ name: "Tom", numbers: [1, 2, 3], age: 32}
| . as {
name: $who, # bind .name to $who
$name, # shorthand for `name: $name`
numbers: [$first, $second],
}
| $name, $second, $first, $who
'
# Output:
# "Tom"
# 2
# 1
# "Tom"
# In jq, values can be assigned to an array index or object key via the
# assignment operator, `=`. The same current input is given to both sides
# of the assignment operator, and the assignment itself evaluates to the
# current input. In other words, the assignment expression is evaluated
# for its side effect, and doesn't generate a new output.
#
jq -n '.a = 1 | .b = .a + 1' # => {"a": 1, "b": 2}
# Note that input is `null` due to `jq -n`, so `.` is `null` in the first
# filter, and assigning to a key under `null` turns it into an object with
# the key. The same input (now an object) then gets piped to the next filter,
# which then sets the `b` key to the value of the `a` key plus `1`, which is `2`.
#
# Another example:
#
jq -n '.a=1, .a.b=2' # => {"a": 1} {"a": {"b": 2}}
# In the above example, two objects are generated because both assignments
# received `null` as their inputs, and each operand of the comma operator
# is evaluated independently. Notice also how you can easily generate
# nested objects.
# In addition to the assignment operator, jq also has operators like:
# `+=`, `-=`, `*=`, and '/=', ... etc. Basically, `a op= b` is a shorthand
# for `a = a op b`, and they are handy for updating an object attribute or
# an item in an array based on its current value. Examples:
#
jq -n '.a.b.c = 3 | .a.b.c = .a.b.c + 1' # => {"a": {"b": {"c": 4}}}
jq -n '.a.b.c = 3 | .a.b.c += 1' # => {"a": {"b": {"c": 4}}}
# To delete a value, use `del/1`, which takes a path expression that specifies
# the locations of the things to be deleted. Example:
#
jq -n '{a: 1, b: {c: 2}, d: [3, 4, 5]} | del(.b.c, .d[1]) | .b.x = 6'
# Output:
# {
# "a": 1,
# "b": {
# "x": 6
# },
# "d": [
# 3,
# 5
# ]
# }
# Other than using jq's built-in functions, you can define your own.
# In fact, many built-in functions are defined using jq (see the link
# to jq's built-in functions at the end of the doc).
#
jq -n '
def my_select(expr): if expr then . else empty end;
def my_map(expr): [.[] | expr];
def sum: reduce .[] as $x (0; . + $x);
def my_range($from; $to):
if $from >= $to then
empty
else
$from, my_range($from + 1; $to)
end
;
[my_range(1; 6)] | my_map(my_select(. % 2 != 0)) | sum
'
# Output:
# 9
# Some notes about function definitions:
#
# - Functions are usually defined at the beginning, so that they are available
# to the rest of the jq program.
#
# - Each function definition should end with a `;` (semicolon).
#
# - It's also possible to define a function within another, though it's not shown here.
#
# - Function parameters are separated by `;` (semicolon). This is consistent with
# passing multiple arguments when calling a function.
#
# - A function can call itself; in fact, jq has TCO (Tail Call Optimization).
#
# - `def f($a; $b): ...;` is a shorthand for: `def f(a; b): a as $a | b as $b | ...`

240
learnsed.sed Normal file
View File

@@ -0,0 +1,240 @@
#!/usr/bin/sed -f
# Files that begin with the above line and are given execute permission
# can be run as regular scripts.
# Comments are like this.
# Commands consist of a single letter and many can be preceded
# by a specification of the lines to which they apply.
# Delete the input's third line.
3d
# The same command specified the command line as an argument to sed:
# sed 3d
# For many commands the specification can consist of two addresses,
# which select an inclusive range.
# Addresses can be specified numerically ($ is the last line) or through
# regular expressions delimited by /.
# Delete lines 1-10
1,10d
# Lines can also be specified as regular expressions, delimited by /.
# Delete empty lines.
/^$/d
# Delete blocks starting with SPOILER-BEGIN and ending with SPOILER-END.
/SPOILER-BEGIN/,/SPOILER-END/d
# A command without an address is applied to all lines.
# List lines in in a visually unambiguous form (e.g. tab appears as \t).
l
# A command prefixed by ! will apply to non-matching lines.
# Keep only lines starting with a #.
/^#/!d
# Below are examples of the most often-used commands.
# Substitute the first occurrence in a line of John with Mary.
s/John/Mary/
# Remove all underscore characters (global substitution).
s/_//g
# Remove all HTML tags.
s/<[^>]*>//g
# In the replacement string & is the regular expression matched.
# Put each line inside double quotes.
s/.*/"&"/
# In the matched regular expression \(pattern\) is used to store
# a pattern into a buffer.
# In the replacement string \1 refers to the first pattern, \2 to the second
# and so on. \u converts the following character to uppercase \l to lowercase.
# Convert snake_case_identifiers into camelCaseIdentifiers.
s/_\(.\)/\u\1/g
# The p (print) command is typically used together with the -n
# command-line option, which disables the print by default functionality.
# Output all lines between ``` and ```.
/```/,/```/p
# The y command maps characters from one set to another.
# Swap decimal and thousand separators (1,234,343.55 becomes 1.234.343,55).
y/.,/,./
# Quit after printing the line starting with END.
/^END/q
# You can stop reading here, and still get 80% of sed's benefits.
# Below are examples of how you can specify multiple sed commands.
# You can apply multiple commands by separating them with a newline or
# a semicolon.
# Delete the first and the last line.
1d
$d
# Delete the first and the last line.
1d;$d
# You can group commands in { } blocks.
# Convert first line to uppercase and print it.
1 {
s/./\u&/g
p
}
# Convert first line to uppercase and print it (less readable one-liner).
1{s/./\u&/g;p;}
# You can also stop reading here, if you're not interested in creating
# sed script files.
# Below are more advanced commands. You typically put these in a file
# rather than specify them on a command line. If you have to use
# many of these commands in a script, consider using a general purpose
# scripting language, such as Python or Perl.
# Append a line containing "profile();" after each line ending with ";".
/;$/a\
profile();
# Insert a line containing "profile();" before each line ending with ";".
/;$/i\
profile();
# Change each line text inside REDACTED blocks into [REDACTED].
/REDACTED-BEGIN/,/REDACTED-END/c\
[REDACTED]
# Replace the tag "<ourstyle>" by reading and outputting the file style.css.
/<ourstyle>/ {
r style.css
d
}
# Change each line inside REDACTED blocks into [REDACTED].
# Also write (append) a copy of the redacted text in the file redacted.txt.
/REDACTED-BEGIN/,/REDACTED-END/ {
w redacted.txt
c\
[REDACTED]
}
# All operations described so far operate on a buffer called "pattern space".
# In addition, sed offers another buffer called "hold space".
# The following commands operate on the two, and can be used to keep
# state or combine multiple lines.
# Replace the contents of the pattern space with the contents of
# the hold space.
g
# Append a newline character followed by the contents of the hold
# space to the pattern space.
G
# Replace the contents of the hold space with the contents of the
# pattern space.
h
# Append a newline character followed by the contents of the
# pattern space to the hold space.
H
# Delete the initial segment of the pattern space through the first
# newline character and start the next cycle.
D
# Replace the contents of the pattern space with the contents of
# the hold space.
g
# Append a newline character followed by the contents of the hold
# space to the pattern space.
G
# Replace the contents of the hold space with the contents of the
# pattern space.
h
# Append a newline character followed by the contents of the
# pattern space to the hold space.
H
# Write the pattern space to the standard output if the default
# output has not been suppressed, and replace the pattern space
# with the next line of input.
n
# Append the next line of input to the pattern space, using an
# embedded newline character to separate the appended material from
# the original contents. Note that the current line number
# changes.
N
# Write the pattern space, up to the first newline character to the
# standard output.
P
# Swap the contents of the pattern and hold spaces.
x
# Here is a complete example of some of the buffer commands.
# Move the file's first line to its end.
1 {
h
d
}
$ {
p
x
}
# Three sed commands influence a script's control flow
# Name this script position "my_label", to which the "b" and
# "t" commands may branch.
:my_label
# Continue executing commands from the position of my_label.
b my_label
# Branch to the end of the script.
b
# Branch to my_label if any substitutions have been made since the most
# recent reading of an input line or execution of a "t" (test) function.
t my_label
# Here is a complete example of branching:
# Join lines that end with a backslash into a single space-separated one.
# Name this position "loop"
: loop
# On lines ending with a backslash
/\\$/ {
# Read the next line and append it to the pattern space
N
# Substitute backslash newline with a space
s/\\\n/ /
# Branch to the top for testing this line's ending
b loop
}