Fast queries using cts:value-tuples
MarkLogic
has an extremely fast search engine but it can be adversely affected by inefficient queries and the absence of element/attribute indexing. Here is an example which makes use of a very efficient API function calledcts:value-tuples
that searches on indexed elements and attributes.Explaining the code:
- utility function to transform results to CSV format (lines 5-13)
- fetch journal titles for a given year (lines 15-24)
- iterate through the years and journal titles (lines 26-27)
- get all records for a given journal for a given year (lines 29-34)
- build references for fetching journal title, search term and publication year (lines 36-40)
- return frequency of a given search term per journal per year using
(lines 42-46)cts:value-tuples
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | xquery version "1.0-ml"; declare default collation "http://marklogic.com/collation/codepoint"; declare function local:tuples-to-csv($value) { let $value := fn:replace($value,'\[','') let $value := fn:replace($value,'\] ',',"') let $value := fn:replace($value,'(.$)','$1"') let $value := fn:replace($value,'(.*)([0-9]{4}),(.*)','"$2",$1$3') let $value := fn:replace($value,' "','"') return $value }; let $years := (2005, 2010, 2015, 2020) let $journals := cts:element-values( xs:QName("JournalTitle"),(),(), cts:and-query(( cts:element-attribute-range-query(xs:QName("PublicationYear"),fn:QName("","Start"),'=',$years), cts:collection-query("Main") )) ) for $year in $years for $journal in $journals let $query := cts:and-query(( cts:element-value-query(xs:QName("JournalTitle"),$journal), cts:element-attribute-range-query(xs:QName("PublicationYear"),fn:QName("","Start"),'=',$year), cts:collection-query("Main") )) let $references := ( cts:element-reference(xs:QName("JournalTitle"),"collation=http://marklogic.com/collation/"), cts:element-reference(xs:QName("Term")), cts:element-attribute-reference(xs:QName("PublicationYear"),xs:QName("Start")) ) return ( for $value in cts:value-tuples($references,"item-frequency",$query) order by json:array-values($value)[1] ascending, cts:frequency($value) descending return local:tuples-to-csv($value || ' ' || cts:frequency($value)) ) |
Comments