nur-packages

My NUR packages
git clone git://git.sikmir.ru/nur-packages
Log | Files | Refs | README | LICENSE

default.nix (2030B)


      1 {
      2   lib,
      3   stdenvNoCC,
      4   fetchurl,
      5   dict,
      6   jq,
      7   moreutils,
      8   stardict-tools,
      9   tatoebatools,
     10 }:
     11 let
     12   langs = [
     13     "bel eng"
     14     "bel rus"
     15     "deu eng"
     16     "deu rus"
     17     "eng bel"
     18     "eng deu"
     19     "eng epo"
     20     "eng fin"
     21     "eng hye"
     22     "eng rus"
     23     "eng swe"
     24     "eng ukr"
     25     "epo eng"
     26     "epo rus"
     27     "fin eng"
     28     "fin rus"
     29     "hye eng"
     30     "hye rus"
     31     "rus bel"
     32     "rus deu"
     33     "rus eng"
     34     "rus epo"
     35     "rus fin"
     36     "rus hye"
     37     "rus swe"
     38     "rus ukr"
     39     "swe eng"
     40     "swe rus"
     41     "ukr eng"
     42     "ukr rus"
     43   ];
     44   tatoeba = builtins.fromJSON (builtins.readFile ./tatoeba.json);
     45 in
     46 stdenvNoCC.mkDerivation rec {
     47   pname = "tatoeba";
     48   version = "2024-11-16";
     49 
     50   srcs = lib.mapAttrsToList (name: spec: fetchurl spec) tatoeba;
     51 
     52   unpackPhase =
     53     ''
     54       echo "{}" > versions.json
     55     ''
     56     + lib.concatMapStringsSep "\n" (src: ''
     57       bzcat ${src} > ${lib.removeSuffix ".bz2" src.name}
     58       jq '.+{"${lib.removeSuffix ".tsv.bz2" src.name}":"${version} 00:00:00"}' versions.json | \
     59         sponge versions.json
     60     '') srcs;
     61 
     62   nativeBuildInputs = [
     63     dict
     64     jq
     65     moreutils
     66     stardict-tools
     67     tatoebatools
     68   ];
     69 
     70   buildPhase =
     71     let
     72       makeDict =
     73         lang: with lib; ''
     74           parallel_corpus ${lang} > tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab
     75           stardict-tabfile tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab
     76         '';
     77     in
     78     ''
     79       export XDG_DATA_HOME=$PWD
     80       mkdir -p tatoebatools/{links,sentences_detailed}
     81       mv *_links.tsv tatoebatools/links
     82       mv *_sentences_detailed.tsv tatoebatools/sentences_detailed
     83       mv versions.json tatoebatools
     84       ${lib.concatMapStringsSep "\n" makeDict langs}
     85     '';
     86 
     87   installPhase = "install -Dm644 *.{dict*,idx,ifo} -t $out";
     88 
     89   meta = {
     90     description = "Tatoeba is a collection of sentences and translations";
     91     homepage = "https://tatoeba.org/";
     92     license = lib.licenses.free;
     93     maintainers = [ lib.maintainers.sikmir ];
     94     platforms = lib.platforms.all;
     95     skip.ci = true;
     96   };
     97 }