package.nix (2044B)
1 { 2 lib, 3 stdenvNoCC, 4 fetchurl, 5 dict, 6 jq, 7 moreutils, 8 stardict-tools, 9 tatoebatools, 10 }: 11 let 12 langs = [ 13 "bel eng" 14 "bel rus" 15 "deu eng" 16 "deu rus" 17 "eng bel" 18 "eng deu" 19 "eng epo" 20 "eng fin" 21 "eng hye" 22 "eng rus" 23 "eng swe" 24 "eng ukr" 25 "epo eng" 26 "epo rus" 27 "fin eng" 28 "fin rus" 29 "hye eng" 30 "hye rus" 31 "rus bel" 32 "rus deu" 33 "rus eng" 34 "rus epo" 35 "rus fin" 36 "rus hye" 37 "rus swe" 38 "rus ukr" 39 "swe eng" 40 "swe rus" 41 "ukr eng" 42 "ukr rus" 43 ]; 44 tatoeba = builtins.fromJSON (builtins.readFile ./tatoeba.json); 45 in 46 stdenvNoCC.mkDerivation (finalAttrs: { 47 pname = "tatoeba"; 48 version = "2025-11-08"; 49 50 srcs = lib.mapAttrsToList (name: spec: fetchurl spec) tatoeba; 51 52 unpackPhase = '' 53 echo "{}" > versions.json 54 '' 55 + lib.concatMapStringsSep "\n" (src: '' 56 bzcat ${src} > ${lib.removeSuffix ".bz2" src.name} 57 jq '.+{"${lib.removeSuffix ".tsv.bz2" src.name}":"${finalAttrs.version} 00:00:00"}' versions.json | \ 58 sponge versions.json 59 '') finalAttrs.srcs; 60 61 nativeBuildInputs = [ 62 dict 63 jq 64 moreutils 65 stardict-tools 66 tatoebatools 67 ]; 68 69 buildPhase = 70 let 71 makeDict = 72 lang: with lib; '' 73 parallel_corpus ${lang} > tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 74 stardict-tabfile tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 75 ''; 76 in 77 '' 78 export XDG_DATA_HOME=$PWD 79 mkdir -p tatoebatools/{links,sentences_detailed} 80 mv *_links.tsv tatoebatools/links 81 mv *_sentences_detailed.tsv tatoebatools/sentences_detailed 82 mv versions.json tatoebatools 83 ${lib.concatMapStringsSep "\n" makeDict langs} 84 ''; 85 86 installPhase = "install -Dm644 *.{dict*,idx,ifo} -t $out"; 87 88 meta = { 89 description = "Tatoeba is a collection of sentences and translations"; 90 homepage = "https://tatoeba.org/"; 91 license = lib.licenses.free; 92 maintainers = [ lib.maintainers.sikmir ]; 93 platforms = lib.platforms.all; 94 skip.ci = true; 95 }; 96 })
