Fast queries using cts:value-tuples

MarkLogic

has an extremely fast search engine but it can be adversely affected by inefficient queries and the absence of element/attribute indexing. Here is an example which makes use of a very efficient API function called

cts:value-tuples

that searches on indexed elements and attributes.

Explaining the code:
  • utility function to transform results to CSV format (lines 5-13)
  • fetch journal titles for a given year (lines 15-24)
  • iterate through the years and journal titles (lines 26-27)
    • get all records for a given journal for a given year (lines 29-34)
    • build references for fetching journal title, search term and publication year (lines 36-40)
    • return frequency of a given search term per journal per year using

      cts:value-tuples

      (lines 42-46)
Note how results are output by frequency in descending order and transformed into CSV format.
 
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
xquery version "1.0-ml";

declare default collation "http://marklogic.com/collation/codepoint";

declare function local:tuples-to-csv($value) {
  let $value := fn:replace($value,'\[','')
  let $value := fn:replace($value,'\] ',',"')
  let $value := fn:replace($value,'(.$)','$1"')
  let $value := fn:replace($value,'(.*)([0-9]{4}),(.*)','"$2",$1$3')
  let $value := fn:replace($value,' "','"')
  return
    $value
};

let $years := (2005, 2010, 2015, 2020)

let $journals :=
  cts:element-values(
    xs:QName("JournalTitle"),(),(),
    cts:and-query((
      cts:element-attribute-range-query(xs:QName("PublicationYear"),fn:QName("","Start"),'=',$years),
      cts:collection-query("Main")
    ))
  )

for $year in $years
  for $journal in $journals
    
    let $query :=
      cts:and-query((
        cts:element-value-query(xs:QName("JournalTitle"),$journal),
        cts:element-attribute-range-query(xs:QName("PublicationYear"),fn:QName("","Start"),'=',$year),
        cts:collection-query("Main")
      ))

    let $references := (
      cts:element-reference(xs:QName("JournalTitle"),"collation=http://marklogic.com/collation/"),
      cts:element-reference(xs:QName("Term")),
      cts:element-attribute-reference(xs:QName("PublicationYear"),xs:QName("Start"))
    )

    return (    
      for $value in cts:value-tuples($references,"item-frequency",$query)
      order by json:array-values($value)[1] ascending, cts:frequency($value) descending
      return local:tuples-to-csv($value || ' ' || cts:frequency($value))
    )

Comments