nur-packages

My NUR packages
git clone git://git.sikmir.ru/nur-packages
Log | Files | Refs | README | LICENSE

default.nix (1921B)


      1 { lib, stdenvNoCC, fetchurl, dict, jq, moreutils, stardict-tools, tatoebatools }:
      2 let
      3   langs = [
      4     "deu eng"
      5     "deu rus"
      6     "eng deu"
      7     "eng epo"
      8     "eng fin"
      9     "eng hye"
     10     "eng rus"
     11     "eng swe"
     12     "eng ukr"
     13     "epo eng"
     14     "epo rus"
     15     "fin eng"
     16     "fin rus"
     17     "hye eng"
     18     "hye rus"
     19     "rus deu"
     20     "rus eng"
     21     "rus epo"
     22     "rus fin"
     23     "rus hye"
     24     "rus swe"
     25     "rus ukr"
     26     "swe eng"
     27     "swe rus"
     28     "ukr eng"
     29     "ukr rus"
     30   ];
     31   tatoeba = builtins.fromJSON (builtins.readFile ./tatoeba.json);
     32 in
     33 stdenvNoCC.mkDerivation rec {
     34   pname = "tatoeba";
     35   version = "2023-05-27";
     36 
     37   srcs = lib.mapAttrsToList (name: spec: fetchurl spec) tatoeba;
     38 
     39   unpackPhase = ''
     40     echo "{}" > versions.json
     41   '' + lib.concatMapStringsSep "\n"
     42     (src: ''
     43       bzcat ${src} > ${lib.removeSuffix ".bz2" src.name}
     44       jq '.+{"${lib.removeSuffix ".tsv.bz2" src.name}":"${version} 00:00:00"}' versions.json | \
     45         sponge versions.json
     46     '')
     47     srcs;
     48 
     49   nativeBuildInputs = [ dict jq moreutils stardict-tools tatoebatools ];
     50 
     51   buildPhase =
     52     let
     53       makeDict = lang: with lib; ''
     54         parallel_corpus ${lang} > tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab
     55         stardict-tabfile tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab
     56       '';
     57     in
     58     ''
     59       export XDG_DATA_HOME=$PWD
     60       mkdir -p tatoebatools/{links,sentences_detailed}
     61       mv *_links.tsv tatoebatools/links
     62       mv *_sentences_detailed.tsv tatoebatools/sentences_detailed
     63       mv versions.json tatoebatools
     64       ${lib.concatMapStringsSep "\n" makeDict langs}
     65     '';
     66 
     67   installPhase = "install -Dm644 *.{dict*,idx,ifo} -t $out";
     68 
     69   meta = with lib; {
     70     description = "Tatoeba is a collection of sentences and translations";
     71     homepage = "https://tatoeba.org/";
     72     license = licenses.free;
     73     maintainers = with maintainers; [ sikmir ];
     74     platforms = platforms.all;
     75     skip.ci = true;
     76   };
     77 }