Frontier Software

Associative Arrays

By Robert Laing

Bash does not support nested arrays, but that’s not a problem for importing and exporting JSON since jq’s path syntax helps make arbitrarily nested JSON objects map to individual strings to be used as keys, remembering to escape the double quotes inside the string.

Converting from JSON to bash and back again

jq has getpath(PATHS) and setpath(PATHS; VALUE) which I’ve used in my json2array and array2json functions to make this fairly easy.

The idea is to gradually build up a library I’m storing as /usr/local/lib/json-utils.sh of bash functions to do data munging on the various JSON files used for my events sites joeblog.co.za and seatavern.co.za.

#!/usr/bin/bash

shopt -s extglob

# json2array arrname json_text
function json2array {
  local keyvals
  declare -n arr
  arr=$1
  keyvals="$(jq -r '
def recursive_function(key; val): val |
  if (type == "object") or (type == "array") then
    path(.[]) as $p | getpath($p) as $v | recursive_function(key+$p; $v)
  else 
    "\(key | @json | sub("\"";"\\\"";"g"))=\(val | @sh)"
  end;

path(.[]) as $p | getpath($p) as $v | recursive_function($p; $v)
' <<< "$2")"
  eval "arr=( $keyvals )"
}

# array2json arrname
function array2json {
  local -n arr
  arr=$1
  local json
  local path
  local val
  json='{}'
  for path in "${!arr[@]}"; do
    val="$(jq -R '.' <<< "${arr["$path"]}")"
    json="$(jq -r "setpath([$path]; $val)" <<< "$json")"
  done;
  jq -rS '.' <<< "$json"
}


# find_schema 'artists' 'Arno Carstens'
# Returns just the first match
function find_schema {
  local path
  local file
  path="${2@L}"
  path="*${path//[^[:alnum:]]/\*}*"
  path="${HOME}/webapps2/*/content/${1}/${path}/schema.json"
  for file in $path; do
    if [[ -f $file ]]; then
      echo "$file"
      return 0
    fi
  done
  return 1
}

# dictionary_matches $dictionary $text
function json_dict_words {
  local arr
  arr=()
  while IFS= read -r line; do
    arr+=("$line")
  done <<< "$(grep -Fwio -f "$1" <<< "$2" | sort -u)"
  if [[ -n ${arr[*]} ]]; then
    jq -cn '$ARGS.positional' --args "${arr[@]}"
  fi
}

# getkeys myarr '"performer"'
function getkeys {
  local -n arr
  arr=$1
  local keys
  keys=$(for key in "${!arr[@]}"; do
    if [[ $key =~ $2 ]]; then
      echo "$key"
    fi
  done | sort -u)
  if [[ -z $keys ]]; then 
    echo "Error: $2 is not a valid key" >&2
    return 1;
  fi
  echo "$keys"
}

# Uses getkeys to find how many elements are in the JSON array
# Warning: assumes array index in col 2, ie can't be used for "\"performer\",0,\"sameAs\",0"
function arraylength {
  local rows
  local cols
  readarray -t rows <<< "$(getkeys "$1" "$2")"
  if [[ ${rows[*]} ]]; then
    readarray -d ',' -t cols <<< "${rows[-1]}"
    if [[ ! ${cols[1]} =~ ^[0-9]+$ ]]; then
      echo "Error: $2 is not an array" >&2
      return 2
    fi
    echo $(( cols[1] + 1 ))
    return 0
  else
    return 1
  fi
}

# object2array myarr '"offers"'
function object2array {
  declare -n arr
  arr=$1
  local rows
  local cols
  local key
  local newkey
  readarray -t rows <<< "$(getkeys "$1" "$2")"
  for key in "${rows[@]}"; do
    readarray -d ',' -t cols <<< "$key"
    if [[ ${cols[1]} =~ ^[0-9]+$ ]]; then
      echo "Error: $2 is already an array" >&2
      return 1
    fi
    newkey="${cols[0]},0,${cols[@]:1}"
    newkey="${newkey%[[:cntrl:]]}" # Needed because ${cols[@]:1} sneaks a \n in the text
    arr["${newkey}"]="${arr["$key"]}"
    unset arr["$key"]
  done
}

# array_append myarray "key" "json text"
# TODO: check arraylength doesn't respond with error
function array_append {
  declare -n arr
  arr=$1
  declare -A tmparr
  local key
  json2array tmparr "$3"
  idx=$(arraylength "$1" "$2")
  for key in "${!tmparr[@]}"; do
     arr["\"$2\",${idx},${key}"]="${tmparr["$key"]}"
  done
}

# del_key myarray "key"
function del_key {
  declare -n arr
  arr=$1
  for key in "${!arr[@]}"; do
    if [[ $key =~ "$2" ]]; then
      unset arr["$key"]
    fi
  done
}

#get_venues "$json_in"
# TODO no longer used remove
function get_venues {
  venues_given="$(jq -c '.location |
if (type == "object")
then [.name | gsub("^\\s+|\\s+$"; "")]
elif (type == "array")
then [.[].name | gsub("^\\s+|\\s+$"; "")]
end' <<< "$json_in")"
  venues="$(json_dict_words '/usr/local/share/dict/venues' "$venues_given")"
  if [[ -n $venues ]]; then
    echo "$venues"
  else
    echo "$venues_given"
  fi
}

#get_artists "$json_in"
function get_artists {
  json_in="$1"
  len="$(jq '.performer | length' <<< "$json_in")"
  if (( len > 0 )); then
    artists="$(jq -c '[.performer[].name | gsub("^\\s+|\\s+$"; "")]' <<< "$json_in")"
  else
    text="$(jq -r '.name' <<< "$json_in")"
    artists="$(json_dict_words "/usr/local/share/dict/artists" "$text")"
    if [[ -z $artists ]]; then
      text="$(jq -r '.organizer.name' <<< "$json_in")"
      artists="$(json_dict_words "/usr/local/share/dict/artists" "$text")"
    fi
    if [[ -z $artists ]]; then
      text="$(jq -r '.description' <<< "$json_in")"
      artists="$(json_dict_words "/usr/local/share/dict/artists" "${text:0:1500}}")"
    fi
  fi
  echo "$artists"
}

I attempted without much success to get a discussion going at codereview on my technique for using Bash’s associative arrays to handle JSON.

What’s handy about keeping the code here is it has evolved from standalone scripts to functions as my knowledge of bash has grown. declare -n arr=$1 within a function lets a globally created associative array, declare -Ag schema, get mutated by function, whereas the standalone script involved echoing [key]=value\n... to be used, very convoluted and messy.

#!/bin/bash

source /usr/local/lib/json-utils.sh

json='{
  "@context": "https://schema.org",
  "@type": "MusicEvent",
  "location": {
    "@type": "MusicVenue",
    "address": "220 S. Michigan Ave, Chicago, Illinois, USA",
    "name": "Chicago Symphony Center"
  },
  "name": "Shostakovich Leningrad",
  "offers": {
    "@type": "Offer",
    "availability": "https://schema.org/InStock",
    "price": "40",
    "priceCurrency": "USD",
    "url": "/examples/ticket/12341234"
  },
  "performer": [
    {
      "@type": "MusicGroup",
      "name": "Chicago Symphony Orchestra",
      "sameAs": [
        "http://cso.org/",
        "http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra"
      ]
    },
    {
      "@type": "Person",
      "image": "/examples/jvanzweden_s.jpg",
      "name": "Jaap van Zweden",
      "sameAs": "http://www.jaapvanzweden.com/"
    }
  ],
  "startDate": "2014-05-23T20:00",
  "workPerformed": [
    {
      "@type": "CreativeWork",
      "name": "Britten Four Sea Interludes and Passacaglia from Peter Grimes",
      "sameAs": "http://en.wikipedia.org/wiki/Peter_Grimes"
    },
    {
      "@type": "CreativeWork",
      "name": "Shostakovich Symphony No. 7 (Leningrad)",
      "sameAs": "http://en.wikipedia.org/wiki/Symphony_No._7_(Shostakovich)"
    }
  ]
}'

ExampleGroup 'Using the json2array function'

  Example 'create an array and read the "name" key'
    declare -A myarr
    json2array myarr "$json"
    When call echo "${myarr["\"name\""]}"
    The output should equal 'Shostakovich Leningrad'
  End

  Example 'note all the escaped quotes needed for complex paths'
    declare -A myarr
    json2array myarr "$json"
    When call echo "${myarr["\"performer\",0,\"sameAs\",1"]}"
    The output should equal 'http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra'
  End

  Example 'Create bash array and then reverse back into JSON'
    declare -A myarr
    json2array myarr "$json"
    When call array2json myarr
    The output should equal "$json"
  End

End

JSON path syntax

Note in the examples above when retrieving JSON values with path syntax, the quotes around keys have to be preserved by escaping them as in "${myarr["\"performer\",0,\"sameAs\",1"]}"

Using this example taken from schema.org/MusicEvent as input:

{
  "@context": "https://schema.org",
  "@type": "MusicEvent",
  "location": {
    "@type": "MusicVenue",
    "name": "Chicago Symphony Center",
    "address": "220 S. Michigan Ave, Chicago, Illinois, USA"
  },
  "name": "Shostakovich Leningrad",
  "offers": {
    "@type": "Offer",
    "url": "/examples/ticket/12341234",
    "price": "40",
    "priceCurrency": "USD",
    "availability": "https://schema.org/InStock"
  },
  "performer": [
    {
      "@type": "MusicGroup",
      "name": "Chicago Symphony Orchestra",
      "sameAs": [
        "http://cso.org/",
        "http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra"
      ]
    },
    {
      "@type": "Person",
      "image": "/examples/jvanzweden_s.jpg",
      "name": "Jaap van Zweden",
      "sameAs": "http://www.jaapvanzweden.com/"
    }
  ],
  "startDate": "2014-05-23T20:00",
  "workPerformed": [
    {
      "@type": "CreativeWork",
      "name": "Britten Four Sea Interludes and Passacaglia from Peter Grimes",
      "sameAs": "http://en.wikipedia.org/wiki/Peter_Grimes"
    },
    {
      "@type": "CreativeWork",
      "name": "Shostakovich Symphony No. 7 (Leningrad)",
      "sameAs": "http://en.wikipedia.org/wiki/Symphony_No._7_(Shostakovich)"
    }
  ]
}
#!/bin/bash

source /usr/local/lib/json-utils.sh

ExampleGroup 'iterate through paths'

  Example 'show all keys in array'
    declare -A myarr
    json2array myarr "$(< musicevent.json)"
    keys="$(for key in "${!myarr[@]}"; do echo "$key"; done)"
    When call echo "$keys"
    The output should equal '"performer",1,"sameAs"
"performer",1,"@type"
"location","name"
"workPerformed",1,"name"
"@type"
"name"
"startDate"
"@context"
"offers","priceCurrency"
"offers","availability"
"offers","@type"
"performer",1,"name"
"workPerformed",0,"name"
"workPerformed",0,"@type"
"workPerformed",0,"sameAs"
"performer",1,"image"
"location","@type"
"performer",0,"sameAs",1
"performer",0,"sameAs",0
"offers","price"
"workPerformed",1,"@type"
"location","address"
"performer",0,"@type"
"workPerformed",1,"sameAs"
"offers","url"
"performer",0,"name"'
  End

End

The important thing to note that each of these is a single string from bash’s perspective, making them legal “one dimensional array” keys.. But jq splits them by comman and expects object keys to be quoted strings and array keys to be unquoted integers.

appending

Following from iteration, the next step is to add JSON objects to the version converted to bash. #!/bin/bash source /usr/local/lib/json-utils.sh ExampleGroup 'append new JSON object to an array' Example 'add another "workPerformed" object' declare -A myarr json2array myarr "$(< ../musicevent.json)" json1='{ "@type": "CreativeWork", "name": "Sinfonia da Requiem", "sameAs": "https://en.wikipedia.org/wiki/Sinfonia_da_Requiem" }' array_append myarr '"workPerformed"' "$json1" When call array2json myarr The output should equal '{ "@context": "https://schema.org", "@type": "MusicEvent", "location": { "@type": "MusicVenue", "address": "220 S.

deletion

#!/bin/bash source /usr/local/lib/json-utils.sh ExampleGroup 'delete objects or array elements' Example 'delete "location"' declare -A myarr json2array myarr "$(< ../musicevent.json)" del_key myarr '"location"' When call array2json myarr The output should equal '{ "@context": "https://schema.org", "@type": "MusicEvent", "name": "Shostakovich Leningrad", "offers": { "@type": "Offer", "availability": "https://schema.org/InStock", "price": "40", "priceCurrency": "USD", "url": "/examples/ticket/12341234" }, "performer": [ { "@type": "MusicGroup", "name": "Chicago Symphony Orchestra", "sameAs": [ "http://cso.org/", "http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra" ] }, { "@type": "Person", "image": "/examples/jvanzweden_s.

iteration

Iterating over JSON arrays via Bash associative-arrays requires two more functions to the json-utils to json2array and array2json in the top section. A basic form of itereratino is getkeys which returns the keys from any given level of the path. It will work for objects and arrays. function getkeys { local -n arr arr=$1 local keys keys=$(for key in "${!arr[@]}"; do if [[ $key =~ $2 ]]; then echo "$key" fi done | sort -u) if [[ -z $keys ]]; then echo "Error: $2 is not a valid key" >&2 return 1; fi echo "$keys" } The return 1 part is something I had to add after my initial attempt as I got more familiar with the “logic programing” style of bash, to move to an alternative block of code if the function doesn’t receive a valid key, which could create the key or whatever.