package.nix (2073B)
1 { 2 lib, 3 stdenvNoCC, 4 fetchurl, 5 dict, 6 jq, 7 moreutils, 8 stardict-tools, 9 tatoebatools, 10 }: 11 let 12 langs = [ 13 "bel eng" 14 "bel rus" 15 "deu eng" 16 "deu rus" 17 "eng bel" 18 "eng deu" 19 "eng epo" 20 "eng fin" 21 "eng hye" 22 "eng rus" 23 "eng swe" 24 "eng ukr" 25 "epo eng" 26 "epo rus" 27 "fin eng" 28 "fin rus" 29 "hye eng" 30 "hye rus" 31 "rus bel" 32 "rus deu" 33 "rus eng" 34 "rus epo" 35 "rus fin" 36 "rus hye" 37 "rus swe" 38 "rus ukr" 39 "swe eng" 40 "swe rus" 41 "ukr eng" 42 "ukr rus" 43 ]; 44 tatoeba = builtins.fromJSON (builtins.readFile ./tatoeba.json); 45 in 46 stdenvNoCC.mkDerivation (finalAttrs: { 47 pname = "tatoeba"; 48 version = "2026-04-25"; 49 50 __structuredAttrs = true; 51 52 srcs = lib.mapAttrsToList (name: spec: fetchurl spec) tatoeba; 53 54 unpackPhase = '' 55 echo "{}" > versions.json 56 '' 57 + lib.concatMapStringsSep "\n" (src: '' 58 bzcat ${src} > ${lib.removeSuffix ".bz2" src.name} 59 jq '.+{"${lib.removeSuffix ".tsv.bz2" src.name}":"${finalAttrs.version} 00:00:00"}' versions.json | \ 60 sponge versions.json 61 '') finalAttrs.srcs; 62 63 nativeBuildInputs = [ 64 dict 65 jq 66 moreutils 67 stardict-tools 68 tatoebatools 69 ]; 70 71 buildPhase = 72 let 73 makeDict = 74 lang: with lib; '' 75 parallel_corpus ${lang} > tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 76 stardict-tabfile tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 77 ''; 78 in 79 '' 80 export XDG_DATA_HOME=$PWD 81 mkdir -p tatoebatools/{links,sentences_detailed} 82 mv *_links.tsv tatoebatools/links 83 mv *_sentences_detailed.tsv tatoebatools/sentences_detailed 84 mv versions.json tatoebatools 85 ${lib.concatMapStringsSep "\n" makeDict langs} 86 ''; 87 88 installPhase = "install -Dm644 *.{dict*,idx,ifo} -t $out"; 89 90 meta = { 91 description = "Tatoeba is a collection of sentences and translations"; 92 homepage = "https://tatoeba.org/"; 93 license = lib.licenses.free; 94 maintainers = [ lib.maintainers.sikmir ]; 95 platforms = lib.platforms.all; 96 skip.ci = true; 97 }; 98 })
