default.nix (2030B)
1 { 2 lib, 3 stdenvNoCC, 4 fetchurl, 5 dict, 6 jq, 7 moreutils, 8 stardict-tools, 9 tatoebatools, 10 }: 11 let 12 langs = [ 13 "bel eng" 14 "bel rus" 15 "deu eng" 16 "deu rus" 17 "eng bel" 18 "eng deu" 19 "eng epo" 20 "eng fin" 21 "eng hye" 22 "eng rus" 23 "eng swe" 24 "eng ukr" 25 "epo eng" 26 "epo rus" 27 "fin eng" 28 "fin rus" 29 "hye eng" 30 "hye rus" 31 "rus bel" 32 "rus deu" 33 "rus eng" 34 "rus epo" 35 "rus fin" 36 "rus hye" 37 "rus swe" 38 "rus ukr" 39 "swe eng" 40 "swe rus" 41 "ukr eng" 42 "ukr rus" 43 ]; 44 tatoeba = builtins.fromJSON (builtins.readFile ./tatoeba.json); 45 in 46 stdenvNoCC.mkDerivation rec { 47 pname = "tatoeba"; 48 version = "2024-11-16"; 49 50 srcs = lib.mapAttrsToList (name: spec: fetchurl spec) tatoeba; 51 52 unpackPhase = 53 '' 54 echo "{}" > versions.json 55 '' 56 + lib.concatMapStringsSep "\n" (src: '' 57 bzcat ${src} > ${lib.removeSuffix ".bz2" src.name} 58 jq '.+{"${lib.removeSuffix ".tsv.bz2" src.name}":"${version} 00:00:00"}' versions.json | \ 59 sponge versions.json 60 '') srcs; 61 62 nativeBuildInputs = [ 63 dict 64 jq 65 moreutils 66 stardict-tools 67 tatoebatools 68 ]; 69 70 buildPhase = 71 let 72 makeDict = 73 lang: with lib; '' 74 parallel_corpus ${lang} > tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 75 stardict-tabfile tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 76 ''; 77 in 78 '' 79 export XDG_DATA_HOME=$PWD 80 mkdir -p tatoebatools/{links,sentences_detailed} 81 mv *_links.tsv tatoebatools/links 82 mv *_sentences_detailed.tsv tatoebatools/sentences_detailed 83 mv versions.json tatoebatools 84 ${lib.concatMapStringsSep "\n" makeDict langs} 85 ''; 86 87 installPhase = "install -Dm644 *.{dict*,idx,ifo} -t $out"; 88 89 meta = { 90 description = "Tatoeba is a collection of sentences and translations"; 91 homepage = "https://tatoeba.org/"; 92 license = lib.licenses.free; 93 maintainers = [ lib.maintainers.sikmir ]; 94 platforms = lib.platforms.all; 95 skip.ci = true; 96 }; 97 }