Associative Arrays
By Robert Laing
Bash does not support nested arrays, but that’s not a problem for importing and exporting JSON since jq’s path syntax helps make arbitrarily nested JSON objects map to individual strings to be used as keys, remembering to escape the double quotes inside the string.
Converting from JSON to bash and back again
jq has getpath(PATHS) and
setpath(PATHS; VALUE) which I’ve used in my json2array
and array2json
functions to make this fairly easy.
The idea is to gradually build up a library I’m storing as /usr/local/lib/json-utils.sh
of bash functions to do data munging on the various JSON files used for my events sites joeblog.co.za and seatavern.co.za.
#!/usr/bin/bash
shopt -s extglob
# json2array arrname json_text
function json2array {
local keyvals
declare -n arr
arr=$1
keyvals="$(jq -r '
def recursive_function(key; val): val |
if (type == "object") or (type == "array") then
path(.[]) as $p | getpath($p) as $v | recursive_function(key+$p; $v)
else
"\(key | @json | sub("\"";"\\\"";"g"))=\(val | @sh)"
end;
path(.[]) as $p | getpath($p) as $v | recursive_function($p; $v)
' <<< "$2")"
eval "arr=( $keyvals )"
}
# array2json arrname
function array2json {
local -n arr
arr=$1
local json
local path
local val
json='{}'
for path in "${!arr[@]}"; do
val="$(jq -R '.' <<< "${arr["$path"]}")"
json="$(jq -r "setpath([$path]; $val)" <<< "$json")"
done;
jq -rS '.' <<< "$json"
}
# find_schema 'artists' 'Arno Carstens'
# Returns just the first match
function find_schema {
local path
local file
path="${2@L}"
path="*${path//[^[:alnum:]]/\*}*"
path="${HOME}/webapps2/*/content/${1}/${path}/schema.json"
for file in $path; do
if [[ -f $file ]]; then
echo "$file"
return 0
fi
done
return 1
}
# dictionary_matches $dictionary $text
function json_dict_words {
local arr
arr=()
while IFS= read -r line; do
arr+=("$line")
done <<< "$(grep -Fwio -f "$1" <<< "$2" | sort -u)"
if [[ -n ${arr[*]} ]]; then
jq -cn '$ARGS.positional' --args "${arr[@]}"
fi
}
# getkeys myarr '"performer"'
function getkeys {
local -n arr
arr=$1
local keys
keys=$(for key in "${!arr[@]}"; do
if [[ $key =~ $2 ]]; then
echo "$key"
fi
done | sort -u)
if [[ -z $keys ]]; then
echo "Error: $2 is not a valid key" >&2
return 1;
fi
echo "$keys"
}
# Uses getkeys to find how many elements are in the JSON array
# Warning: assumes array index in col 2, ie can't be used for "\"performer\",0,\"sameAs\",0"
function arraylength {
local rows
local cols
readarray -t rows <<< "$(getkeys "$1" "$2")"
if [[ ${rows[*]} ]]; then
readarray -d ',' -t cols <<< "${rows[-1]}"
if [[ ! ${cols[1]} =~ ^[0-9]+$ ]]; then
echo "Error: $2 is not an array" >&2
return 2
fi
echo $(( cols[1] + 1 ))
return 0
else
return 1
fi
}
# object2array myarr '"offers"'
function object2array {
declare -n arr
arr=$1
local rows
local cols
local key
local newkey
readarray -t rows <<< "$(getkeys "$1" "$2")"
for key in "${rows[@]}"; do
readarray -d ',' -t cols <<< "$key"
if [[ ${cols[1]} =~ ^[0-9]+$ ]]; then
echo "Error: $2 is already an array" >&2
return 1
fi
newkey="${cols[0]},0,${cols[@]:1}"
newkey="${newkey%[[:cntrl:]]}" # Needed because ${cols[@]:1} sneaks a \n in the text
arr["${newkey}"]="${arr["$key"]}"
unset arr["$key"]
done
}
# array_append myarray "key" "json text"
# TODO: check arraylength doesn't respond with error
function array_append {
declare -n arr
arr=$1
declare -A tmparr
local key
json2array tmparr "$3"
idx=$(arraylength "$1" "$2")
for key in "${!tmparr[@]}"; do
arr["\"$2\",${idx},${key}"]="${tmparr["$key"]}"
done
}
# del_key myarray "key"
function del_key {
declare -n arr
arr=$1
for key in "${!arr[@]}"; do
if [[ $key =~ "$2" ]]; then
unset arr["$key"]
fi
done
}
#get_venues "$json_in"
# TODO no longer used remove
function get_venues {
venues_given="$(jq -c '.location |
if (type == "object")
then [.name | gsub("^\\s+|\\s+$"; "")]
elif (type == "array")
then [.[].name | gsub("^\\s+|\\s+$"; "")]
end' <<< "$json_in")"
venues="$(json_dict_words '/usr/local/share/dict/venues' "$venues_given")"
if [[ -n $venues ]]; then
echo "$venues"
else
echo "$venues_given"
fi
}
#get_artists "$json_in"
function get_artists {
json_in="$1"
len="$(jq '.performer | length' <<< "$json_in")"
if (( len > 0 )); then
artists="$(jq -c '[.performer[].name | gsub("^\\s+|\\s+$"; "")]' <<< "$json_in")"
else
text="$(jq -r '.name' <<< "$json_in")"
artists="$(json_dict_words "/usr/local/share/dict/artists" "$text")"
if [[ -z $artists ]]; then
text="$(jq -r '.organizer.name' <<< "$json_in")"
artists="$(json_dict_words "/usr/local/share/dict/artists" "$text")"
fi
if [[ -z $artists ]]; then
text="$(jq -r '.description' <<< "$json_in")"
artists="$(json_dict_words "/usr/local/share/dict/artists" "${text:0:1500}}")"
fi
fi
echo "$artists"
}
I attempted without much success to get a discussion going at codereview on my technique for using Bash’s associative arrays to handle JSON.
What’s handy about keeping the code here is it has evolved from standalone scripts to functions as my knowledge of bash has grown. declare -n arr=$1
within a function lets a globally created associative array, declare -Ag schema
, get mutated by function, whereas the standalone script involved echoing [key]=value\n...
to be used, very convoluted and messy.
#!/bin/bash
source /usr/local/lib/json-utils.sh
json='{
"@context": "https://schema.org",
"@type": "MusicEvent",
"location": {
"@type": "MusicVenue",
"address": "220 S. Michigan Ave, Chicago, Illinois, USA",
"name": "Chicago Symphony Center"
},
"name": "Shostakovich Leningrad",
"offers": {
"@type": "Offer",
"availability": "https://schema.org/InStock",
"price": "40",
"priceCurrency": "USD",
"url": "/examples/ticket/12341234"
},
"performer": [
{
"@type": "MusicGroup",
"name": "Chicago Symphony Orchestra",
"sameAs": [
"http://cso.org/",
"http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra"
]
},
{
"@type": "Person",
"image": "/examples/jvanzweden_s.jpg",
"name": "Jaap van Zweden",
"sameAs": "http://www.jaapvanzweden.com/"
}
],
"startDate": "2014-05-23T20:00",
"workPerformed": [
{
"@type": "CreativeWork",
"name": "Britten Four Sea Interludes and Passacaglia from Peter Grimes",
"sameAs": "http://en.wikipedia.org/wiki/Peter_Grimes"
},
{
"@type": "CreativeWork",
"name": "Shostakovich Symphony No. 7 (Leningrad)",
"sameAs": "http://en.wikipedia.org/wiki/Symphony_No._7_(Shostakovich)"
}
]
}'
ExampleGroup 'Using the json2array function'
Example 'create an array and read the "name" key'
declare -A myarr
json2array myarr "$json"
When call echo "${myarr["\"name\""]}"
The output should equal 'Shostakovich Leningrad'
End
Example 'note all the escaped quotes needed for complex paths'
declare -A myarr
json2array myarr "$json"
When call echo "${myarr["\"performer\",0,\"sameAs\",1"]}"
The output should equal 'http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra'
End
Example 'Create bash array and then reverse back into JSON'
declare -A myarr
json2array myarr "$json"
When call array2json myarr
The output should equal "$json"
End
End
JSON path syntax
Note in the examples above when retrieving JSON values with path syntax, the quotes around keys have to be preserved by escaping them as in "${myarr["\"performer\",0,\"sameAs\",1"]}"
Using this example taken from schema.org/MusicEvent as input:
{
"@context": "https://schema.org",
"@type": "MusicEvent",
"location": {
"@type": "MusicVenue",
"name": "Chicago Symphony Center",
"address": "220 S. Michigan Ave, Chicago, Illinois, USA"
},
"name": "Shostakovich Leningrad",
"offers": {
"@type": "Offer",
"url": "/examples/ticket/12341234",
"price": "40",
"priceCurrency": "USD",
"availability": "https://schema.org/InStock"
},
"performer": [
{
"@type": "MusicGroup",
"name": "Chicago Symphony Orchestra",
"sameAs": [
"http://cso.org/",
"http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra"
]
},
{
"@type": "Person",
"image": "/examples/jvanzweden_s.jpg",
"name": "Jaap van Zweden",
"sameAs": "http://www.jaapvanzweden.com/"
}
],
"startDate": "2014-05-23T20:00",
"workPerformed": [
{
"@type": "CreativeWork",
"name": "Britten Four Sea Interludes and Passacaglia from Peter Grimes",
"sameAs": "http://en.wikipedia.org/wiki/Peter_Grimes"
},
{
"@type": "CreativeWork",
"name": "Shostakovich Symphony No. 7 (Leningrad)",
"sameAs": "http://en.wikipedia.org/wiki/Symphony_No._7_(Shostakovich)"
}
]
}
#!/bin/bash
source /usr/local/lib/json-utils.sh
ExampleGroup 'iterate through paths'
Example 'show all keys in array'
declare -A myarr
json2array myarr "$(< musicevent.json)"
keys="$(for key in "${!myarr[@]}"; do echo "$key"; done)"
When call echo "$keys"
The output should equal '"performer",1,"sameAs"
"performer",1,"@type"
"location","name"
"workPerformed",1,"name"
"@type"
"name"
"startDate"
"@context"
"offers","priceCurrency"
"offers","availability"
"offers","@type"
"performer",1,"name"
"workPerformed",0,"name"
"workPerformed",0,"@type"
"workPerformed",0,"sameAs"
"performer",1,"image"
"location","@type"
"performer",0,"sameAs",1
"performer",0,"sameAs",0
"offers","price"
"workPerformed",1,"@type"
"location","address"
"performer",0,"@type"
"workPerformed",1,"sameAs"
"offers","url"
"performer",0,"name"'
End
End
The important thing to note that each of these is a single string from bash’s perspective, making them legal “one dimensional array” keys.. But jq splits them by comman and expects object keys to be quoted strings and array keys to be unquoted integers.