Catmandu - Cheat Sheet

21 June 2013 by Patrick

Help

 $ catmandu help
 $ catmandu help convert

Convert

 # Use Catmandu Importers to read data into your environment
 $ catmandu convert MARC to JSON < records.mrc
 $ catmandu convert MARC to YAML < records.mrc
 $ catmandu convert MARC to JSON --pretty 1 < records.mrc
 $ catmandu convert MARC to JSON --fix 'marc_map("245","title");remove_field("record")' < records.mrc
 $ catmandu convert MARC to CSV --fix 'marc_map("245","title"); remove_field("record");' < records.mrc
 $ catmandu convert OAI --url http://biblio.ugent.be/oai --set allFtxt to JSON
 $ catmandu convert OAI --url http://biblio.ugent.be/oai --set allFtxt to JSON --fix 'retain_field("title")'
 $ catmandu convert SRU --base http://www.unicat.be/sru --query dna	
 $ catmandu convert ArXiv --query 'all:electron'
 $ catmandu convert PubMed --term 'hochstenbach'
 $ cat test.tt
[%- FOREACH f IN record %]
[% _id %] [% f.shift %][% f.shift %][% f.shift %][% f.join(":") %]
[%- END %]
 $ catmandu convert MARC to Template --template `pwd`/test.tt < records.mrc 

Import/Export

 # Use Catmandu Store to store/retrieve data from a database
 $ catmandu import JSON to MongoDB --database_name mydb --bag data < records.json
 $ catmandu import MARC to MongoDB --database_name mydb --bag data < records.mrc
 $ catmandu import MARC to ElasticSearch --index_name mydb --bag data < records.mrc
 $ catmandu import MARC to ElasticSearch --index_name mydb --bag data --fix 'marc_map("245a","title")' < records.mrc

 $ catmandu export MongoDB --database_name mydb --bag data to JSON
 $ catmandu export MongoDB --database_name mydb --bag data to JSON --fix 'retain_field("_id")'
 $ catmandu export Solr --url http://localhost:8983/solr to JSON
 $ catmandu export ElasticSearch --index_name mydb to JSON

Lazy

 $ cat catmandu.yml
---
store:
  test1:
   package: MongoDB
   options:
    database_name: mydb
  test2:
   package: ElasticSearch
   options:
    index_name: mydb
  test3:
   package: Solr
   options:
    url: http://localhost:8983/solr

  $ catmandu import JSON to test1 < records.json # Mongo
  $ catmandu import MARC to test2 < records.mrc  # ElasticSearch
  $ catmandu import YAML to test3 < records.yaml # Solr
  $ catmandu export test1 to JSON                # Mongo
  $ catmandu export test2 to JSON                # ElasticSearch
  $ catmandy export test3                        # Solr very lazy JSON is the default exporter and importer
  $ cat fixes.txt
marc_map("245a","title");
marc_map("100","author.$append");
join_field("author",";");
marc_map("008_/10-13","language");
  $ catmandu import MARC to test2 --fix fixes.txt

Fixes

# Fixes clean your data. As input you get a Perl HASH. Each fix function is a command
# to transform the Perl HASH. Some fixes such as marc_map contain logic to transform
# complex data structures such as MARC.
set_field("my.name","patrick");            # { my => { name => 'Patrick'} }
add_field("my.name2","nicolas"); 
move_field("my.name","your.name");
copy_field("your.name","your.name2");
remove_field("your.name");

upcase("title");                           # marc -> MARC
downcase("title");                         # MARC -> marc
capitalize("my.deeply.nested.field.0");    # marc -> Marc
trim("field_with_spaces");                 # "  marc  " -> marc
substring("title",0,1);                    # marc -> m
prepend("title","die ");                   # marc -> die marc
append("title"," must die");               # marc -> marc must die

lookup("title","dict.csv",-sep_char=>'|'); # lookup 'marc' in dict.csv and replace the value
lookup("title","dict.csv",-default=>'test'); # lookup 'marc' in dict.csv and replace the value or set it to 'test'
lookup("title","dict.csv",-delete=>'1');   # lookup 'marc' in dict.csv and replace the value or delete nothing found

lookup_in_store('title', 'MongoDB', -database_name => 'lookups'); # lookup the (id)-value of title in 'lookups' and
                                           # replace it with the data found
lookup_in_store('title', 'MongoDB', -default => 'default value' , -delete => 1);

add_to_store('authors.*', 'MongoDB', '-bag', 'authors', '-database_name', 'catalog'); # add matching values to a store as a side effect

count("myarray");                          # count number of elements in an array or hash
sum("numbers");                            # replace an array element with the sum of its values
sort_field("tags");                        # sort the values of an array
sort_field("tags",-unique => 1);           # sort the values plus keep unique values
sort_field("tags",-reverse => 1);          # revese sort
sort_field("tags",-numeric => 1);          # sort numerical values
expand_date("2010-05-07");                 # Transforms a date to its parts {year => 2010, month => "5", day => "7", date => "2010-05-07"}
cmd("java MyClass");                       # Use an external program that can read JSON 
                                           # from stdin and write JSON to stdout

split_field("foo",":");                    # marc:must:die -> ['marc','must','die']
join_field("foo",":");                     # ['marc','must','die'] -> marc:must:die
retain_field("id");                        # delete any field except 'id'
replace_all("title","a","x");              # marc -> mxrc

# Most functions can work also work on arrays. E.g.
replace_all("author.*","a","x");           # [ 'marc','jan'] => ['mxrc','jxn']
# Use:
#   authors.$end (last entry)
#   authors.$start (first entry)
#   authors.$append (last + 1)
#   authors.$prepend (first - 1)
#   authors.* (all authors)
#   authors.2 (3rd author)

collapse();                                # collapse deep nested hash to a flat hash
expand();                                  # expand flat hash to deep nested hash
clone();                                   # clone the perl hash and work on the clone

to_json('my.field');                       # convert a value of a field to json
from_json('my.field');                     # replace the json field with the parsed value

nothing();                                 # do nothing (used in benchmarking)

# Copy all 245 subfields into the my.title hash
marc_map('245','my.title');
# Copy the 245-$a$b$c subfields into the my.title hash
marc_map('245abc','my.title');
# Copy the 100 subfields into the my.authors array
marc_map('100','my.authors.$append');
# Add the 710 subfields into the my.authors array
marc_map('710','my.authors.$append');
# Copy the 600-$x subfields into the my.subjects array while packing each into a genre.text hash
marc_map('600x','my.subjects.$append.genre.text');
# Copy the 008 characters 35-35 into the my.language hash
marc_map('008_/35-35','my.language');
# Copy all the 600 fields into a my.stringy hash joining them by '; '
marc_map('600','my.stringy', -join => '; ');
# When 024 field exists create the my.has024 hash with value 'found'
marc_map('024','my.has024', -value => 'found');
# Do the same examples now with the marc fields in 'record2'
marc_map('245','my.title', -record => 'record2');
# Remove the 900 fields
marc_remove('900');
# Add a marc field (in Catmandu::MARC 0.110)
marc_add('999', ind1 => ' ' , ind2 => '1' , a => 'test123');

# uppercase the value of field 'foo' if all members of 'oogly' have the value 'doogly'
if_all_match('oogly.*', 'doogly');
  upcase('foo'); # foo => 'BAR'
end();
# inverted
unless_all_match('oogly.*', 'doogly');
  upcase('foo'); # foo => 'BAR'
end();

# uppercase the value of field 'foo' if field 'oogly' has the value 'doogly'
if_any_match('oogly', 'doogly');
  upcase('foo'); # foo => 'BAR'
end();
# inverted
unless_any_match('oogly', 'doogly');
  upcase('foo'); # foo => 'BAR'
end();

# uppercase the value of field 'foo' if the field 'oogly' exists
if_exists('oogly');
  upcase('foo'); # foo => 'BAR'
end();
# inverted
unless_exists('oogly');
  upcase('foo'); # foo => 'bar'
end();
comments powered by Disqus