How to produce two glossaries from one dictionary or source file?

I am looking for some advice. I would like to create a file that serves as a dictionary, where each entry has these three fields

(a) English term
(b) transliterated foreign term
(c) comment

Then, I wish to produce two separate glossaries:

  • one organised by (a) the English term, and in which the “definition” of the entry is (b) the transliterated foreign term and (c) the comment
  • one organised by (b) the transliterated foreign term, and in which the “definition” of the entry is (a) the English term and (c) the comment

I do not need the terms to be indexed against their occurrence in an article or book.

I am aware of both the glossy and glossarium packages, but this might exceed their abilities. I would be grateful for any advice on whether one of the existing packages can handle this, and/or any best practices on executing this. TIA.

Hello @fungai2000! Are you looking to simply print the glossary? Do you also want to preserve the ability to “reference” a term?

In which case, I don’t see why you wouldn’t be able to use either glossy or glossarium.

A quick example without further formatting is like

#import "@preview/glossarium:0.5.6": *

#let list-a = (
  (
    key: "Foo",
    long: "Lorem",
    description: [Comment A]
  ),
  (
    key: "Bar",
    long: "Ipsum",
    description: [Comment B]
  ),
)
#let list-b = list-a.map(
  x => (
    key: x.long,
    long: x.key,
    description: x.description
  )
)
#show: make-glossary
#register-glossary(list-a)
#register-glossary(list-b)

= English to Latin
#print-glossary(list-a, show-all: true)

= Latin to English
#print-glossary(list-b, show-all: true)

Many thanks for this. That indeed solves for the question of drawing on one list for two printed glossaries.

However, I have a follow-up question, if I may.

In my “dictionary”, I have a need to map a single Latin term to multiple English equivalents.

#let list-a = (
    (   key: "Foo",
        long: "Bar", "Lorem", "Ipsum", 
        description: [Some description].
    ),
)

Is there a way, using the solutions you pointed out, to have the printed glossaries automatically render a one-to-many relationship for the Latin term to the multiple English terms, but then for each English term a new entry in the glossary? Such that the printed result would be something like:

Latin to English

Foo – Bar, Lorem, Ipsum

English to Latin

Bar – Foo
Lorem – Foo
Ipsum – Foo

Sure! That’s not too complicated. You can simply have a list of terms in long and iterate over them to create the second list. Then make sure you actually pass a content in long to glossarium.

#import "@preview/glossarium:0.5.6": *

#let list-a = (
  (
    key: "Foo",
    long: ("Lorem", "Ipsum"),
    description: [Comment A]
  ),
  (
    key: "Bar",
    long: ("Dolor",),
    description: [Comment B]
  ),
)
#let list-b = list-a.map(
  x => for y in x.long {((
    key: y,
    long: x.key,
    description: x.description
  ),)}
).flatten()
#list-b
#let list-a = list-a.map(
  x => (
    key: x.key,
    long: x.long.join(", "),
    description: x.description
  )
)
#show: make-glossary
#register-glossary(list-a)
#register-glossary(list-b)

= English to Latin
#print-glossary(list-a, show-all: true)

= Latin to English
#print-glossary(list-b, show-all: true)
1 Like

This is really great, thank you! (I think there might be a mistake in the line #list-b; the above only worked when I removed that).

One further step: is there a way in which to have one key term take multiple long values, where each of those long values is mapped to its own description? Such that the printed glossary would output something like:

Latin to English

FooLorem: Comment A. Ipsum: Comment B.

English to Latin

LoremFoo: Comment A.
IpsumFoo: Comment B.

1 Like

I printed list-b for debug and forgot to remove it. For the descriptions, without testing I think it’s possible as long as you have a list of description (same length), and then iterate over all items in the lists. Something like this essentially.

#let list-a = list-a.map(
  x => (
    key: x.key,
    long: x.long.first(),
    description: x.description.slice(1).zip(x.long.slice(1)).fold(x.description.at(0), (desc, d) => {
      desc + ". " + d.at(1) + ": " + d.at(0)
    }) 
  )
)

Make sure the list-b is updated to match the new structure, then

#let list-a = (
  (
    key: "Foo",
    long: ("Lorem", "Ipsum"),
    description: ([Comment A1], [Comment A2])
  ),
  (
    key: "Bar",
    long: ("Dolor",),
    description: ([Comment B],)
  ),
)
#let list-b = list-a.map(
  x => for (i, y) in x.long.enumerate() {((
    key: y,
    long: x.key,
    description: x.description.at(i)
  ),)}
).flatten()
1 Like

Thank you ever so much! The code you provided was the key I needed to get to my final resolution. I used VS Code with the integrated Claude assistant API to help me make a few small changes and formatting/styling modifications, until it produced the following desired result:

#import "@preview/glossarium:0.5.6": *

#let my-custom-print-glossary(
  entry-list,
  show-all: true,
  disable-back-references: false,
  enable-group-pagebreak: false,
  user-group-break: linebreak()
) = {
  // Your custom logic here
  for entry in entry-list {
    // Custom styling for each part with hanging indent
    par(
      first-line-indent: 0pt,
      hanging-indent: 0em,
      spacing: 12pt
    )[
      // Short form - apply your styling
      #text(weight: "bold")[#entry.key#h(.3em)] 
      // Long form - apply italic styling
      #if "long" in entry {
        // Check if this is from list-a (already formatted) or list-b (needs formatting)
        if type(entry.long) == content {
          // This is from list-a, already formatted
          entry.long
        } else {
          // This is from list-b, needs formatting
          if "description" in entry and entry.description != [] {
            emph(entry.long) + [: ] + entry.description
          } else {
            emph(entry.long) + [.]
          }
        }
      }
    ]
  }
}

#let list-original = (
  (
    key: "Foo",
    long: ("Lorem", "Ipsum"),
    description: ([Comment A1], [Comment A2])
  ),
  (
    key: "Bar",
    long: ("Dolor",),
    description: ([Comment B],)
  ),
)

#let list-a = list-original.map(
  x => for (i, y) in x.long.enumerate() {((
    key: y,
    long: x.key,
    description: if x.description.at(i) != [] { x.description.at(i) + [.] } else { [] }
  ),)}
).flatten().sorted(key: entry => entry.key)

#let list-b = list-original.map(
  x => (
    key: {x.key},
    long: {
      let pairs = x.long.zip(x.description).map(pair => {
        let term = pair.at(0)
        let desc = pair.at(1)
        if desc != [] {
          emph(term) + [: ] + desc
        } else {
          emph(term)
        }
      })
      if x.long.len() == 1 {
        pairs.join([]) + [.]
      } else {
        pairs.join([; ]) + [.]
      }
    },
    description: []
  )
).sorted(key: entry => entry.key)

#show: make-glossary
#register-glossary(list-a)
#register-glossary(list-b)

=== English to Latin
#my-custom-print-glossary(list-a, show-all: true)

=== Latin to English
#my-custom-print-glossary(list-b, show-all: true)

Many thanks again, @quachpas, both the package and for your help in customising it above!

Good job on making it work! You probably don’t need to load glossarium, since you dropped the print-glossary for a custom one.

It generated some stuff that is not used, and obviously a lot of verbose stuff (cause LLM). Here is a smaller version:

#let my-custom-print-glossary(
  entry-list,
  // show-all: true,
  // disable-back-references: false,
  // enable-group-pagebreak: false,
  // user-group-break: linebreak(),
) = for entry in entry-list {
  if "long" not in entry { continue }
  let body = {
    strong[#entry.key]
    h(.3em)
    if type(entry.long) == content { entry.long } else {
      if "description" in entry and entry.description != [] {
        [#emph(entry.long): #entry.description]
      } else [#emph(entry.long).]
    }
  }
  par(first-line-indent: 0pt, hanging-indent: 0em, spacing: 12pt, body)
}


#let list-original = (
  (
    key: "Foo",
    long: ("Lorem", "Ipsum"),
    description: ([Comment A1], [Comment A2]),
  ),
  (
    key: "Bar",
    long: ("Dolor",),
    description: ([Comment B],),
  ),
)

#let list-a = {
  let mapper(x) = ((i, y)) => (
    (
      key: y,
      long: x.key,
      description: if x.description.at(i) != [] [#x.description.at(i).] else [],
    ),
  )
  list-original
    .map(x => x.long.enumerate().map(mapper(x)))
    .flatten()
    .sorted(key: entry => entry.key)
}

#let list-b = {
  let long(x) = {
    let pairs = x
      .long
      .zip(x.description)
      .map(pair => {
        let term = pair.first()
        let desc = pair.last()
        if desc != [] [#emph(term): #desc] else { emph(term) }
      })
    if x.long.len() == 1 [#pairs.join().] else [#pairs.join("; ").]
  }
  list-original
    .map(x => (key: x.key, long: long(x), description: []))
    .sorted(key: entry => entry.key)
}

=== English to Latin
#my-custom-print-glossary(list-a)

=== Latin to English
#my-custom-print-glossary(list-b)

It’s basically the same, but it doesn’t create unaccounted spaces because of content block, comments, etc.

It still looks overengineered, without diving deep into the topic. Also list-a/b probably could be functions, so that you have dictionary(-ies) and then convert them with a single function call. The functions then can go into a separate file.

What I don’t understand is that for some reason key gets swapped with long, and then list sorted by key, i.e., long. But I didn’t read the thread.

Many thanks, @Andrew, for helping improve upon the LLM-generated aspects of the code.

As to your question: Key is the English term, and Long is the Latin term. From this we make two glossaries: one English-to-Latin, the other Latin-to-English. Within each list, I wanted the entries sorted.

I’d be very happy to learn how to restructure this based on a dictionary and functions as you suggest. This solution grew out of glossarium, though by the end it was self-contained enough to not require the package any further.

Now I feel more confident it what needs to be changed. The only thing I don’t like is “key” and “long”. I would use something like maybe “first”/“second” or “from”/“to”.

#let print-glossary(entry-list) = for entry in entry-list {
  show: block.with(spacing: 1em)
  strong(entry.key)
  h(.3em)
  emph(entry.long)
  if entry.description != none [: #entry.description]
  "."
}

/// Swaps key/long (English/Latin) values. Sorts by Latin.
#let glossary-english-to-latin(glossary-data) = {
  glossary-data
    .map(entry => array
      .zip(entry.long, entry.description)
      .map(((long, description)) => (
        (
          key: long,
          long: entry.key,
          description: if description != [] [#description],
        ),
      )))
    .flatten()
    .sorted(key: entry => entry.key)
}

/// Doesn't swap key/long (English/Latin) values. Sorts by English.
#let glossary-latin-to-english(glossary-data) = {
  glossary-data
    .map(entry => array
      .zip(entry.long, entry.description)
      .map(((long, description)) => (
        (
          key: entry.key,
          long: long,
          description: if description != [] [#description],
        ),
      )))
    .flatten()
    .sorted(key: entry => entry.key)
}


#let glossary-data = (
  (
    key: "Foo",
    long: ("Lorem", "Ipsum"),
    description: ([Comment A1], [Comment A2]),
  ),
  (
    key: "Bar",
    long: ("Dolor",),
    description: ([Comment B],),
  ),
)

#let english-to-latin-glossary = glossary-english-to-latin(glossary-data)
#let latin-to-english-glossary = glossary-latin-to-english(glossary-data)

=== English to Latin
#print-glossary(english-to-latin-glossary)

=== Latin to English
#print-glossary(latin-to-english-glossary)

So, basically, we have the same input data, but then it’s passed through a processing function that spits out ready-to-print data. If you don’t need to access it, then just do

=== English to Latin
#print-glossary(glossary-english-to-latin(glossary-data))

=== Latin to English
#print-glossary(glossary-latin-to-english(glossary-data))

Inside, a lot of array methods were used in chains. For each entry, we go through a pair of long + description and make a new dictionary. Then sort by key and print.

Many thanks for this, @Andrew. I look forward to studying this to understand better the rationale behind the discrete changes.

One thing has been lost, however, is a single term listing multiple meanings within the same entry rather than as separate entries.

I looked at the spec, and didn’t find anything about listings, so it works as was required in the OP, is the way I see it.

This is true, the original spec didn’t mention it, so you make a fair point. (I am the OP.) It developed over the course of the conversation.

For posterity, and anyone interested, here is a modified version of Andrew’s helpful code above (using VS Code’s integrated Claude LLM assistant), to achieve the goal of (a) two glossaries drawn from one dictionary, (b) where one key term may have multiple target terms, (c) where each target term may have its own description, and (d) each key term finally prints out all the associated target terms coupled with their respective descriptions:

#let print-glossary(entry-list) = for entry in entry-list {
  show: block.with(spacing: 1em)
  strong(entry.key)
  h(.75em)
  entry.long
  if entry.description != none [: #entry.description]
  "."
}

#let transform-glossary(data, swap: false) = {
  if swap {
    data
      .map(entry => array
        .zip(entry.long, entry.description)
        .map(((long, description)) => (
          key: long,
          long: emph(entry.key),
          description: if description != [] [#description]
        )))
      .flatten()
  } else {
    data
      .map(entry => {
        let parts = array
          .zip(entry.long, entry.description)
          .map(((long, description)) => {
            if description != [] {
              emph(long) + [: ] + description
            } else {
              emph(long)
            }
          })
        (
          key: entry.key,
          long: parts.join([; ]),
          description: none
        )
      })
  }
    .sorted(key: entry => entry.key)
}

#let glossary-data = (
  (
    key: "Foo",
    long: ("Lorem", "Ipsum"),
    description: ([Comment A1], [Comment A2]),
  ),
  (
    key: "Bar",
    long: ("Dolor",),
    description: ([Comment B],),
  ),
)

#let english-to-latin = transform-glossary(glossary-data, swap: true)
#let latin-to-english = transform-glossary(glossary-data, swap: false)

=== English to Latin
#print-glossary(english-to-latin)

=== Latin to English
#print-glossary(latin-to-english)

Again, it might be a bit verbose (again, due to the LLM bits). In any case, many thanks to both @quachpas and @Andrew for their help!

You can also do something like this:

#let transform-glossary(data, swap: false) = if swap {
  data
    .map(entry => array
      .zip(entry.long, entry.description)
      .map(((long, description)) => (
        key: long,
        long: emph(entry.key),
        description: if description != [] [#description],
      )))
    .flatten()
} else {
  data.map(entry => (
    key: entry.key,
    long: array
      .zip(entry.long, entry.description)
      .map(((long, description)) => {
        emph(long) + if description != [] [: #description]
      })
      .join[; ],
    description: none,
  ))
}.sorted(key: entry => entry.key)
1 Like