local-ai: 2.19.4 -> 2.20.1 (#336871)
This commit is contained in:
commit
14f8e6c62c
@ -1,30 +1,39 @@
|
||||
{ lib
|
||||
, writers
|
||||
, writeText
|
||||
, linkFarmFromDrvs
|
||||
}: {
|
||||
genModels = configs:
|
||||
{
|
||||
lib,
|
||||
writers,
|
||||
writeText,
|
||||
linkFarmFromDrvs,
|
||||
}:
|
||||
{
|
||||
genModels =
|
||||
configs:
|
||||
let
|
||||
name = lib.strings.sanitizeDerivationName
|
||||
(builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs)));
|
||||
name = lib.strings.sanitizeDerivationName (
|
||||
builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs))
|
||||
);
|
||||
|
||||
genModelFiles = name: config:
|
||||
genModelFiles =
|
||||
name: config:
|
||||
let
|
||||
templateName = type: name + "_" + type;
|
||||
|
||||
config' = lib.recursiveUpdate config ({
|
||||
inherit name;
|
||||
} // lib.optionalAttrs (lib.isDerivation config.parameters.model) {
|
||||
parameters.model = config.parameters.model.name;
|
||||
} // lib.optionalAttrs (config ? template) {
|
||||
template = builtins.mapAttrs (n: _: templateName n) config.template;
|
||||
});
|
||||
config' = lib.recursiveUpdate config (
|
||||
{
|
||||
inherit name;
|
||||
}
|
||||
// lib.optionalAttrs (lib.isDerivation config.parameters.model) {
|
||||
parameters.model = config.parameters.model.name;
|
||||
}
|
||||
// lib.optionalAttrs (config ? template) {
|
||||
template = builtins.mapAttrs (n: _: templateName n) config.template;
|
||||
}
|
||||
);
|
||||
in
|
||||
[ (writers.writeYAML "${name}.yaml" config') ]
|
||||
++ lib.optional (lib.isDerivation config.parameters.model)
|
||||
config.parameters.model
|
||||
++ lib.optionals (config ? template)
|
||||
(lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template);
|
||||
++ lib.optional (lib.isDerivation config.parameters.model) config.parameters.model
|
||||
++ lib.optionals (config ? template) (
|
||||
lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template
|
||||
);
|
||||
in
|
||||
linkFarmFromDrvs name (lib.flatten (lib.mapAttrsToList genModelFiles configs));
|
||||
}
|
||||
|
@ -1,4 +1,9 @@
|
||||
{ pkgs, config, lib, ... }:
|
||||
{
|
||||
pkgs,
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
let
|
||||
cfg = config.services.local-ai;
|
||||
inherit (lib) mkOption types;
|
||||
@ -35,7 +40,13 @@ in
|
||||
};
|
||||
|
||||
logLevel = mkOption {
|
||||
type = types.enum [ "error" "warn" "info" "debug" "trace" ];
|
||||
type = types.enum [
|
||||
"error"
|
||||
"warn"
|
||||
"info"
|
||||
"debug"
|
||||
"trace"
|
||||
];
|
||||
default = "warn";
|
||||
};
|
||||
};
|
||||
@ -46,16 +57,18 @@ in
|
||||
environment.LLAMACPP_PARALLEL = toString cfg.parallelRequests;
|
||||
serviceConfig = {
|
||||
DynamicUser = true;
|
||||
ExecStart = lib.escapeShellArgs ([
|
||||
"${cfg.package}/bin/local-ai"
|
||||
"--address=:${toString cfg.port}"
|
||||
"--threads=${toString cfg.threads}"
|
||||
"--localai-config-dir=."
|
||||
"--models-path=${cfg.models}"
|
||||
"--log-level=${cfg.logLevel}"
|
||||
]
|
||||
++ lib.optional (cfg.parallelRequests > 1) "--parallel-requests"
|
||||
++ cfg.extraArgs);
|
||||
ExecStart = lib.escapeShellArgs (
|
||||
[
|
||||
"${cfg.package}/bin/local-ai"
|
||||
"--address=:${toString cfg.port}"
|
||||
"--threads=${toString cfg.threads}"
|
||||
"--localai-config-dir=."
|
||||
"--models-path=${cfg.models}"
|
||||
"--log-level=${cfg.logLevel}"
|
||||
]
|
||||
++ lib.optional (cfg.parallelRequests > 1) "--parallel-requests"
|
||||
++ cfg.extraArgs
|
||||
);
|
||||
RuntimeDirectory = "local-ai";
|
||||
WorkingDirectory = "%t/local-ai";
|
||||
};
|
||||
|
@ -1,71 +1,87 @@
|
||||
{ config
|
||||
, callPackages
|
||||
, stdenv
|
||||
, lib
|
||||
, addDriverRunpath
|
||||
, fetchFromGitHub
|
||||
, protobuf
|
||||
, protoc-gen-go
|
||||
, protoc-gen-go-grpc
|
||||
, grpc
|
||||
, openssl
|
||||
, llama-cpp
|
||||
{
|
||||
config,
|
||||
callPackages,
|
||||
stdenv,
|
||||
lib,
|
||||
addDriverRunpath,
|
||||
fetchFromGitHub,
|
||||
protobuf,
|
||||
protoc-gen-go,
|
||||
protoc-gen-go-grpc,
|
||||
grpc,
|
||||
openssl,
|
||||
llama-cpp,
|
||||
# needed for audio-to-text
|
||||
, ffmpeg
|
||||
, cmake
|
||||
, pkg-config
|
||||
, buildGoModule
|
||||
, makeWrapper
|
||||
, ncurses
|
||||
, which
|
||||
ffmpeg,
|
||||
cmake,
|
||||
pkg-config,
|
||||
buildGoModule,
|
||||
makeWrapper,
|
||||
ncurses,
|
||||
which,
|
||||
|
||||
, enable_upx ? true
|
||||
, upx
|
||||
enable_upx ? true,
|
||||
upx,
|
||||
|
||||
# apply feature parameter names according to
|
||||
# https://github.com/NixOS/rfcs/pull/169
|
||||
|
||||
# CPU extensions
|
||||
, enable_avx ? true
|
||||
, enable_avx2 ? true
|
||||
, enable_avx512 ? stdenv.hostPlatform.avx512Support
|
||||
, enable_f16c ? true
|
||||
, enable_fma ? true
|
||||
enable_avx ? true,
|
||||
enable_avx2 ? true,
|
||||
enable_avx512 ? stdenv.hostPlatform.avx512Support,
|
||||
enable_f16c ? true,
|
||||
enable_fma ? true,
|
||||
|
||||
, with_openblas ? false
|
||||
, openblas
|
||||
with_openblas ? false,
|
||||
openblas,
|
||||
|
||||
, with_cublas ? config.cudaSupport
|
||||
, cudaPackages
|
||||
with_cublas ? config.cudaSupport,
|
||||
cudaPackages,
|
||||
|
||||
, with_clblas ? false
|
||||
, clblast
|
||||
, ocl-icd
|
||||
, opencl-headers
|
||||
with_clblas ? false,
|
||||
clblast,
|
||||
ocl-icd,
|
||||
opencl-headers,
|
||||
|
||||
, with_tinydream ? false # do not compile with cublas
|
||||
, ncnn
|
||||
with_tinydream ? false, # do not compile with cublas
|
||||
ncnn,
|
||||
|
||||
, with_stablediffusion ? true
|
||||
, opencv
|
||||
with_stablediffusion ? true,
|
||||
opencv,
|
||||
|
||||
, with_tts ? true
|
||||
, onnxruntime
|
||||
, sonic
|
||||
, spdlog
|
||||
, fmt
|
||||
, espeak-ng
|
||||
, piper-tts
|
||||
with_tts ? true,
|
||||
onnxruntime,
|
||||
sonic,
|
||||
spdlog,
|
||||
fmt,
|
||||
espeak-ng,
|
||||
piper-tts,
|
||||
}:
|
||||
let
|
||||
BUILD_TYPE =
|
||||
assert (lib.count lib.id [ with_openblas with_cublas with_clblas ]) <= 1;
|
||||
if with_openblas then "openblas"
|
||||
else if with_cublas then "cublas"
|
||||
else if with_clblas then "clblas"
|
||||
else "";
|
||||
assert
|
||||
(lib.count lib.id [
|
||||
with_openblas
|
||||
with_cublas
|
||||
with_clblas
|
||||
]) <= 1;
|
||||
if with_openblas then
|
||||
"openblas"
|
||||
else if with_cublas then
|
||||
"cublas"
|
||||
else if with_clblas then
|
||||
"clblas"
|
||||
else
|
||||
"";
|
||||
|
||||
inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart libcufft;
|
||||
inherit (cudaPackages)
|
||||
libcublas
|
||||
cuda_nvcc
|
||||
cuda_cccl
|
||||
cuda_cudart
|
||||
libcufft
|
||||
;
|
||||
|
||||
go-llama = effectiveStdenv.mkDerivation {
|
||||
name = "go-llama";
|
||||
@ -81,13 +97,21 @@ let
|
||||
"BUILD_TYPE=${BUILD_TYPE}"
|
||||
];
|
||||
|
||||
buildInputs = [ ]
|
||||
++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas ]
|
||||
++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
|
||||
buildInputs =
|
||||
[ ]
|
||||
++ lib.optionals with_cublas [
|
||||
cuda_cccl
|
||||
cuda_cudart
|
||||
libcublas
|
||||
]
|
||||
++ lib.optionals with_clblas [
|
||||
clblast
|
||||
ocl-icd
|
||||
opencl-headers
|
||||
]
|
||||
++ lib.optionals with_openblas [ openblas.dev ];
|
||||
|
||||
nativeBuildInputs = [ cmake ]
|
||||
++ lib.optionals with_cublas [ cuda_nvcc ];
|
||||
nativeBuildInputs = [ cmake ] ++ lib.optionals with_cublas [ cuda_nvcc ];
|
||||
|
||||
dontUseCmakeConfigure = true;
|
||||
|
||||
@ -98,82 +122,70 @@ let
|
||||
'';
|
||||
};
|
||||
|
||||
llama-cpp-rpc = (llama-cpp-grpc.overrideAttrs (prev: {
|
||||
name = "llama-cpp-rpc";
|
||||
cmakeFlags = prev.cmakeFlags ++ [
|
||||
(lib.cmakeBool "GGML_AVX" false)
|
||||
(lib.cmakeBool "GGML_AVX2" false)
|
||||
(lib.cmakeBool "GGML_AVX512" false)
|
||||
(lib.cmakeBool "GGML_FMA" false)
|
||||
(lib.cmakeBool "GGML_F16C" false)
|
||||
];
|
||||
})).override {
|
||||
cudaSupport = false;
|
||||
openclSupport = false;
|
||||
blasSupport = false;
|
||||
rpcSupport = true;
|
||||
};
|
||||
llama-cpp-rpc =
|
||||
(llama-cpp-grpc.overrideAttrs (prev: {
|
||||
name = "llama-cpp-rpc";
|
||||
cmakeFlags = prev.cmakeFlags ++ [
|
||||
(lib.cmakeBool "GGML_AVX" false)
|
||||
(lib.cmakeBool "GGML_AVX2" false)
|
||||
(lib.cmakeBool "GGML_AVX512" false)
|
||||
(lib.cmakeBool "GGML_FMA" false)
|
||||
(lib.cmakeBool "GGML_F16C" false)
|
||||
];
|
||||
})).override
|
||||
{
|
||||
cudaSupport = false;
|
||||
openclSupport = false;
|
||||
blasSupport = false;
|
||||
rpcSupport = true;
|
||||
};
|
||||
|
||||
llama-cpp-grpc = (llama-cpp.overrideAttrs (final: prev: {
|
||||
name = "llama-cpp-grpc";
|
||||
src = fetchFromGitHub {
|
||||
owner = "ggerganov";
|
||||
repo = "llama.cpp";
|
||||
rev = "ed9d2854c9de4ae1f448334294e61167b04bec2a";
|
||||
hash = "sha256-Xu2h9Zu+Q9utfFFmDWBOEu/EXth4xWRNoTMvPF5Fo/A=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
postPatch = prev.postPatch + ''
|
||||
cd examples
|
||||
cp -r --no-preserve=mode ${src}/backend/cpp/llama grpc-server
|
||||
cp llava/clip.* llava/llava.* grpc-server
|
||||
printf "\nadd_subdirectory(grpc-server)" >> CMakeLists.txt
|
||||
llama-cpp-grpc =
|
||||
(llama-cpp.overrideAttrs (
|
||||
final: prev: {
|
||||
name = "llama-cpp-grpc";
|
||||
src = fetchFromGitHub {
|
||||
owner = "ggerganov";
|
||||
repo = "llama.cpp";
|
||||
rev = "fc54ef0d1c138133a01933296d50a36a1ab64735";
|
||||
hash = "sha256-o87EhrA2Oa98pwyb6GSUgwERY0/GWJiX7kvlxDv4zb4=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
postPatch =
|
||||
prev.postPatch
|
||||
+ ''
|
||||
cd examples
|
||||
cp -r --no-preserve=mode ${src}/backend/cpp/llama grpc-server
|
||||
cp llava/clip.* llava/llava.* grpc-server
|
||||
printf "\nadd_subdirectory(grpc-server)" >> CMakeLists.txt
|
||||
|
||||
cp ${src}/backend/backend.proto grpc-server
|
||||
sed -i grpc-server/CMakeLists.txt \
|
||||
-e '/get_filename_component/ s;[.\/]*backend/;;' \
|
||||
-e '$a\install(TARGETS ''${TARGET} RUNTIME)'
|
||||
cd ..
|
||||
'';
|
||||
cmakeFlags = prev.cmakeFlags ++ [
|
||||
(lib.cmakeBool "BUILD_SHARED_LIBS" false)
|
||||
(lib.cmakeBool "GGML_AVX" enable_avx)
|
||||
(lib.cmakeBool "GGML_AVX2" enable_avx2)
|
||||
(lib.cmakeBool "GGML_AVX512" enable_avx512)
|
||||
(lib.cmakeBool "GGML_FMA" enable_fma)
|
||||
(lib.cmakeBool "GGML_F16C" enable_f16c)
|
||||
];
|
||||
buildInputs = prev.buildInputs ++ [
|
||||
protobuf # provides also abseil_cpp as propagated build input
|
||||
grpc
|
||||
openssl
|
||||
];
|
||||
})).override {
|
||||
cudaSupport = with_cublas;
|
||||
rocmSupport = false;
|
||||
openclSupport = with_clblas;
|
||||
blasSupport = with_openblas;
|
||||
};
|
||||
|
||||
gpt4all = stdenv.mkDerivation {
|
||||
name = "gpt4all";
|
||||
src = fetchFromGitHub {
|
||||
owner = "nomic-ai";
|
||||
repo = "gpt4all";
|
||||
rev = "27a8b020c36b0df8f8b82a252d261cda47cf44b8";
|
||||
hash = "sha256-djq1eK6ncvhkO3MNDgasDBUY/7WWcmZt/GJsHAulLdI=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
makeFlags = [ "-C gpt4all-bindings/golang" ];
|
||||
buildFlags = [ "libgpt4all.a" ];
|
||||
dontUseCmakeConfigure = true;
|
||||
nativeBuildInputs = [ cmake ];
|
||||
installPhase = ''
|
||||
mkdir $out
|
||||
tar cf - --exclude=CMakeFiles . \
|
||||
| tar xf - -C $out
|
||||
'';
|
||||
};
|
||||
cp ${src}/backend/backend.proto grpc-server
|
||||
sed -i grpc-server/CMakeLists.txt \
|
||||
-e '/get_filename_component/ s;[.\/]*backend/;;' \
|
||||
-e '$a\install(TARGETS ''${TARGET} RUNTIME)'
|
||||
cd ..
|
||||
'';
|
||||
cmakeFlags = prev.cmakeFlags ++ [
|
||||
(lib.cmakeBool "BUILD_SHARED_LIBS" false)
|
||||
(lib.cmakeBool "GGML_AVX" enable_avx)
|
||||
(lib.cmakeBool "GGML_AVX2" enable_avx2)
|
||||
(lib.cmakeBool "GGML_AVX512" enable_avx512)
|
||||
(lib.cmakeBool "GGML_FMA" enable_fma)
|
||||
(lib.cmakeBool "GGML_F16C" enable_f16c)
|
||||
];
|
||||
buildInputs = prev.buildInputs ++ [
|
||||
protobuf # provides also abseil_cpp as propagated build input
|
||||
grpc
|
||||
openssl
|
||||
];
|
||||
}
|
||||
)).override
|
||||
{
|
||||
cudaSupport = with_cublas;
|
||||
rocmSupport = false;
|
||||
openclSupport = with_clblas;
|
||||
blasSupport = with_openblas;
|
||||
};
|
||||
|
||||
espeak-ng' = espeak-ng.overrideAttrs (self: {
|
||||
name = "espeak-ng'";
|
||||
@ -199,8 +211,14 @@ let
|
||||
name = "piper-phonemize";
|
||||
inherit (go-piper) src;
|
||||
sourceRoot = "${go-piper.src.name}/piper-phonemize";
|
||||
buildInputs = [ espeak-ng' onnxruntime ];
|
||||
nativeBuildInputs = [ cmake pkg-config ];
|
||||
buildInputs = [
|
||||
espeak-ng'
|
||||
onnxruntime
|
||||
];
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
pkg-config
|
||||
];
|
||||
cmakeFlags = [
|
||||
(lib.cmakeFeature "ONNXRUNTIME_DIR" "${onnxruntime.dev}")
|
||||
(lib.cmakeFeature "ESPEAK_NG_DIR" "${espeak-ng'}")
|
||||
@ -240,7 +258,15 @@ let
|
||||
-e '/CXXFLAGS *= / s;$; -DSPDLOG_FMT_EXTERNAL=1;'
|
||||
'';
|
||||
buildFlags = [ "libpiper_binding.a" ];
|
||||
buildInputs = [ piper-tts' espeak-ng' piper-phonemize sonic fmt spdlog onnxruntime ];
|
||||
buildInputs = [
|
||||
piper-tts'
|
||||
espeak-ng'
|
||||
piper-phonemize
|
||||
sonic
|
||||
fmt
|
||||
spdlog
|
||||
onnxruntime
|
||||
];
|
||||
installPhase = ''
|
||||
cp -r --no-preserve=mode $src $out
|
||||
mkdir -p $out/piper-phonemize/pi
|
||||
@ -273,16 +299,28 @@ let
|
||||
src = fetchFromGitHub {
|
||||
owner = "ggerganov";
|
||||
repo = "whisper.cpp";
|
||||
rev = "6739eb83c3ca5cf40d24c6fe8442a761a1eb6248";
|
||||
hash = "sha256-1yDdJVjIwYDJKn93zn4xOJXMoDTqaG2TvakjdHIMCxk=";
|
||||
rev = "9e3c5345cd46ea718209db53464e426c3fe7a25e";
|
||||
hash = "sha256-JOptyveuaKRLzeZ6GuB3A70IM7dk4we95g5o25XVXJI=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [ cmake pkg-config ]
|
||||
++ lib.optionals with_cublas [ cuda_nvcc ];
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
pkg-config
|
||||
] ++ lib.optionals with_cublas [ cuda_nvcc ];
|
||||
|
||||
buildInputs = [ ]
|
||||
++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas libcufft ]
|
||||
++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
|
||||
buildInputs =
|
||||
[ ]
|
||||
++ lib.optionals with_cublas [
|
||||
cuda_cccl
|
||||
cuda_cudart
|
||||
libcublas
|
||||
libcufft
|
||||
]
|
||||
++ lib.optionals with_clblas [
|
||||
clblast
|
||||
ocl-icd
|
||||
opencl-headers
|
||||
]
|
||||
++ lib.optionals with_openblas [ openblas.dev ];
|
||||
|
||||
cmakeFlags = [
|
||||
@ -379,25 +417,26 @@ let
|
||||
meta.broken = lib.versionOlder go-tiny-dream.stdenv.cc.version "13";
|
||||
};
|
||||
|
||||
GO_TAGS = lib.optional with_tinydream "tinydream"
|
||||
GO_TAGS =
|
||||
lib.optional with_tinydream "tinydream"
|
||||
++ lib.optional with_tts "tts"
|
||||
++ lib.optional with_stablediffusion "stablediffusion";
|
||||
|
||||
effectiveStdenv =
|
||||
if with_cublas then
|
||||
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
||||
# otherwise we get libstdc++ errors downstream.
|
||||
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
||||
# otherwise we get libstdc++ errors downstream.
|
||||
cudaPackages.backendStdenv
|
||||
else
|
||||
stdenv;
|
||||
|
||||
pname = "local-ai";
|
||||
version = "2.19.4";
|
||||
version = "2.20.1";
|
||||
src = fetchFromGitHub {
|
||||
owner = "go-skynet";
|
||||
repo = "LocalAI";
|
||||
rev = "v${version}";
|
||||
hash = "sha256-aKq6/DI+4+BvIEw6eONqPr3mZXuz7rMFN+FBypVj0Gc=";
|
||||
hash = "sha256-FeZZC0Tg9JT9Yj0e27GOLSdHEtWl17AHK3j7epwPyY8=";
|
||||
};
|
||||
|
||||
prepare-sources =
|
||||
@ -407,53 +446,67 @@ let
|
||||
''
|
||||
mkdir sources
|
||||
${cp} ${go-llama} sources/go-llama.cpp
|
||||
${cp} ${gpt4all} sources/gpt4all
|
||||
${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper
|
||||
${cp} ${go-rwkv} sources/go-rwkv.cpp
|
||||
${cp} ${whisper-cpp.src} sources/whisper.cpp
|
||||
cp ${whisper-cpp}/lib/lib*.a sources/whisper.cpp
|
||||
${cp} ${go-bert} sources/go-bert.cpp
|
||||
${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion
|
||||
${cp} ${
|
||||
if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src
|
||||
} sources/go-stable-diffusion
|
||||
${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream
|
||||
'';
|
||||
|
||||
self = buildGoModule.override { stdenv = effectiveStdenv; } {
|
||||
inherit pname version src;
|
||||
|
||||
vendorHash = "sha256-HEKE75+ixuNbM+KEuhbQQ/NYYEzVlGYOttPavftWKhk=";
|
||||
vendorHash = "sha256-mDxp5frUIECSHKjxaJVqIP7mnIusvdT45Xlxc9+P5tE=";
|
||||
|
||||
env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4";
|
||||
|
||||
postPatch = ''
|
||||
sed -i Makefile \
|
||||
-e '/mod download/ d' \
|
||||
-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-fallback/ d' \
|
||||
-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-avx/ d' \
|
||||
-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-cuda/ d' \
|
||||
postPatch =
|
||||
''
|
||||
sed -i Makefile \
|
||||
-e '/mod download/ d' \
|
||||
-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-fallback/ d' \
|
||||
-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-avx/ d' \
|
||||
-e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-cuda/ d' \
|
||||
|
||||
'' + lib.optionalString with_cublas ''
|
||||
sed -i Makefile \
|
||||
-e '/^CGO_LDFLAGS_WHISPER?=/ s;$;-L${libcufft}/lib -L${cuda_cudart}/lib;'
|
||||
'';
|
||||
''
|
||||
+ lib.optionalString with_cublas ''
|
||||
sed -i Makefile \
|
||||
-e '/^CGO_LDFLAGS_WHISPER?=/ s;$;-L${libcufft}/lib -L${cuda_cudart}/lib;'
|
||||
'';
|
||||
|
||||
postConfigure = prepare-sources + ''
|
||||
shopt -s extglob
|
||||
mkdir -p backend-assets/grpc
|
||||
cp ${llama-cpp-grpc}/bin/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||
cp ${llama-cpp-rpc}/bin/grpc-server backend-assets/grpc/llama-cpp-grpc
|
||||
postConfigure =
|
||||
prepare-sources
|
||||
+ ''
|
||||
shopt -s extglob
|
||||
mkdir -p backend-assets/grpc
|
||||
cp ${llama-cpp-grpc}/bin/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||
cp ${llama-cpp-rpc}/bin/grpc-server backend-assets/grpc/llama-cpp-grpc
|
||||
|
||||
mkdir -p backend/cpp/llama/llama.cpp
|
||||
mkdir -p backend/cpp/llama/llama.cpp
|
||||
|
||||
mkdir -p backend-assets/util
|
||||
cp ${llama-cpp-rpc}/bin/llama-rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
mkdir -p backend-assets/util
|
||||
cp ${llama-cpp-rpc}/bin/llama-rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
|
||||
# avoid rebuild of prebuilt make targets
|
||||
touch backend-assets/grpc/* backend-assets/util/* sources/**/lib*.a
|
||||
'';
|
||||
# avoid rebuild of prebuilt make targets
|
||||
touch backend-assets/grpc/* backend-assets/util/* sources/**/lib*.a
|
||||
'';
|
||||
|
||||
buildInputs = [ ]
|
||||
++ lib.optionals with_cublas [ cuda_cudart libcublas libcufft ]
|
||||
++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
|
||||
buildInputs =
|
||||
[ ]
|
||||
++ lib.optionals with_cublas [
|
||||
cuda_cudart
|
||||
libcublas
|
||||
libcufft
|
||||
]
|
||||
++ lib.optionals with_clblas [
|
||||
clblast
|
||||
ocl-icd
|
||||
opencl-headers
|
||||
]
|
||||
++ lib.optionals with_openblas [ openblas.dev ]
|
||||
++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs
|
||||
++ lib.optionals with_tts go-piper.buildInputs;
|
||||
@ -465,16 +518,16 @@ let
|
||||
makeWrapper
|
||||
ncurses # tput
|
||||
which
|
||||
]
|
||||
++ lib.optional enable_upx upx
|
||||
++ lib.optionals with_cublas [ cuda_nvcc ];
|
||||
] ++ lib.optional enable_upx upx ++ lib.optionals with_cublas [ cuda_nvcc ];
|
||||
|
||||
enableParallelBuilding = false;
|
||||
|
||||
modBuildPhase = prepare-sources + ''
|
||||
make protogen-go
|
||||
go mod tidy -v
|
||||
'';
|
||||
modBuildPhase =
|
||||
prepare-sources
|
||||
+ ''
|
||||
make protogen-go
|
||||
go mod tidy -v
|
||||
'';
|
||||
|
||||
proxyVendor = true;
|
||||
|
||||
@ -482,12 +535,13 @@ let
|
||||
# containing spaces
|
||||
env.GO_TAGS = builtins.concatStringsSep " " GO_TAGS;
|
||||
|
||||
makeFlags = [
|
||||
"VERSION=v${version}"
|
||||
"BUILD_TYPE=${BUILD_TYPE}"
|
||||
]
|
||||
++ lib.optional with_cublas "CUDA_LIBPATH=${cuda_cudart}/lib"
|
||||
++ lib.optional with_tts "PIPER_CGO_CXXFLAGS=-DSPDLOG_FMT_EXTERNAL=1";
|
||||
makeFlags =
|
||||
[
|
||||
"VERSION=v${version}"
|
||||
"BUILD_TYPE=${BUILD_TYPE}"
|
||||
]
|
||||
++ lib.optional with_cublas "CUDA_LIBPATH=${cuda_cudart}/lib"
|
||||
++ lib.optional with_tts "PIPER_CGO_CXXFLAGS=-DSPDLOG_FMT_EXTERNAL=1";
|
||||
|
||||
buildPhase = ''
|
||||
runHook preBuild
|
||||
@ -516,18 +570,25 @@ let
|
||||
# raises an segmentation fault
|
||||
postFixup =
|
||||
let
|
||||
LD_LIBRARY_PATH = [ ]
|
||||
LD_LIBRARY_PATH =
|
||||
[ ]
|
||||
++ lib.optionals with_cublas [
|
||||
# driverLink has to be first to avoid loading the stub version of libcuda.so
|
||||
# https://github.com/NixOS/nixpkgs/issues/320145#issuecomment-2190319327
|
||||
addDriverRunpath.driverLink
|
||||
(lib.getLib libcublas)
|
||||
cuda_cudart
|
||||
]
|
||||
++ lib.optionals with_clblas [ clblast ocl-icd ]
|
||||
# driverLink has to be first to avoid loading the stub version of libcuda.so
|
||||
# https://github.com/NixOS/nixpkgs/issues/320145#issuecomment-2190319327
|
||||
addDriverRunpath.driverLink
|
||||
(lib.getLib libcublas)
|
||||
cuda_cudart
|
||||
]
|
||||
++ lib.optionals with_clblas [
|
||||
clblast
|
||||
ocl-icd
|
||||
]
|
||||
++ lib.optionals with_openblas [ openblas ]
|
||||
++ lib.optionals with_tts [ piper-phonemize ]
|
||||
++ lib.optionals (with_tts && enable_upx) [ fmt spdlog ];
|
||||
++ lib.optionals (with_tts && enable_upx) [
|
||||
fmt
|
||||
spdlog
|
||||
];
|
||||
in
|
||||
''
|
||||
wrapProgram $out/bin/${pname} \
|
||||
@ -537,15 +598,30 @@ let
|
||||
|
||||
passthru.local-packages = {
|
||||
inherit
|
||||
go-tiny-dream go-rwkv go-bert go-llama gpt4all go-piper
|
||||
llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize
|
||||
piper-tts' llama-cpp-rpc;
|
||||
go-tiny-dream
|
||||
go-rwkv
|
||||
go-bert
|
||||
go-llama
|
||||
go-piper
|
||||
llama-cpp-grpc
|
||||
whisper-cpp
|
||||
go-tiny-dream-ncnn
|
||||
espeak-ng'
|
||||
piper-phonemize
|
||||
piper-tts'
|
||||
llama-cpp-rpc
|
||||
;
|
||||
};
|
||||
|
||||
passthru.features = {
|
||||
inherit
|
||||
with_cublas with_openblas with_tts with_stablediffusion
|
||||
with_tinydream with_clblas;
|
||||
with_cublas
|
||||
with_openblas
|
||||
with_tts
|
||||
with_stablediffusion
|
||||
with_tinydream
|
||||
with_clblas
|
||||
;
|
||||
};
|
||||
|
||||
passthru.tests = callPackages ./tests.nix { inherit self; };
|
||||
@ -555,7 +631,10 @@ let
|
||||
description = "OpenAI alternative to run local LLMs, image and audio generation";
|
||||
homepage = "https://localai.io";
|
||||
license = licenses.mit;
|
||||
maintainers = with maintainers; [ onny ck3d ];
|
||||
maintainers = with maintainers; [
|
||||
onny
|
||||
ck3d
|
||||
];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
};
|
||||
|
@ -1,23 +1,26 @@
|
||||
{ self
|
||||
, lib
|
||||
, testers
|
||||
, fetchzip
|
||||
, fetchurl
|
||||
, writers
|
||||
, symlinkJoin
|
||||
, jq
|
||||
, prom2json
|
||||
{
|
||||
self,
|
||||
lib,
|
||||
testers,
|
||||
fetchzip,
|
||||
fetchurl,
|
||||
writers,
|
||||
symlinkJoin,
|
||||
jq,
|
||||
prom2json,
|
||||
}:
|
||||
let
|
||||
common-config = { config, ... }: {
|
||||
imports = [ ./module.nix ];
|
||||
services.local-ai = {
|
||||
enable = true;
|
||||
package = self;
|
||||
threads = config.virtualisation.cores;
|
||||
logLevel = "debug";
|
||||
common-config =
|
||||
{ config, ... }:
|
||||
{
|
||||
imports = [ ./module.nix ];
|
||||
services.local-ai = {
|
||||
enable = true;
|
||||
package = self;
|
||||
threads = config.virtualisation.cores;
|
||||
logLevel = "debug";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
inherit (self.lib) genModels;
|
||||
in
|
||||
@ -73,7 +76,9 @@ in
|
||||
virtualisation.memorySize = 2048;
|
||||
services.local-ai.models = models;
|
||||
};
|
||||
passthru = { inherit models requests; };
|
||||
passthru = {
|
||||
inherit models requests;
|
||||
};
|
||||
testScript =
|
||||
let
|
||||
port = "8080";
|
||||
@ -93,7 +98,8 @@ in
|
||||
'';
|
||||
};
|
||||
|
||||
} // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) {
|
||||
}
|
||||
// lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) {
|
||||
# https://localai.io/docs/getting-started/manual/
|
||||
llama =
|
||||
let
|
||||
@ -146,7 +152,12 @@ in
|
||||
# https://localai.io/features/text-generation/#chat-completions
|
||||
chat-completions = {
|
||||
inherit model;
|
||||
messages = [{ role = "user"; content = "1 + 2 = ?"; }];
|
||||
messages = [
|
||||
{
|
||||
role = "user";
|
||||
content = "1 + 2 = ?";
|
||||
}
|
||||
];
|
||||
};
|
||||
# https://localai.io/features/text-generation/#edit-completions
|
||||
edit-completions = {
|
||||
@ -172,7 +183,9 @@ in
|
||||
# TODO: Add test case parallel requests
|
||||
services.local-ai.parallelRequests = 2;
|
||||
};
|
||||
passthru = { inherit models requests; };
|
||||
passthru = {
|
||||
inherit models requests;
|
||||
};
|
||||
testScript =
|
||||
let
|
||||
port = "8080";
|
||||
@ -196,80 +209,88 @@ in
|
||||
machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json")
|
||||
machine.copy_from_vm("completions.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${
|
||||
toString model-configs.${model}.parameters.max_tokens
|
||||
}' completions.json")
|
||||
|
||||
machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
|
||||
machine.copy_from_vm("metrics.json")
|
||||
'';
|
||||
};
|
||||
|
||||
} // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) {
|
||||
# https://localai.io/features/text-to-audio/#piper
|
||||
tts =
|
||||
let
|
||||
model-stt = "whisper-en";
|
||||
model-configs.${model-stt} = {
|
||||
backend = "whisper";
|
||||
parameters.model = fetchurl {
|
||||
url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin";
|
||||
hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs=";
|
||||
};
|
||||
};
|
||||
|
||||
model-tts = "piper-en";
|
||||
model-configs.${model-tts} = {
|
||||
backend = "piper";
|
||||
parameters.model = "en-us-danny-low.onnx";
|
||||
};
|
||||
|
||||
models =
|
||||
let
|
||||
models = genModels model-configs;
|
||||
in
|
||||
symlinkJoin {
|
||||
inherit (models) name;
|
||||
paths = [
|
||||
models
|
||||
(fetchzip {
|
||||
url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz";
|
||||
hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M=";
|
||||
stripRoot = false;
|
||||
})
|
||||
];
|
||||
};
|
||||
|
||||
requests.request = {
|
||||
model = model-tts;
|
||||
input = "Hello, how are you?";
|
||||
};
|
||||
in
|
||||
testers.runNixOSTest {
|
||||
name = self.name + "-tts";
|
||||
nodes.machine = {
|
||||
imports = [ common-config ];
|
||||
virtualisation.cores = 2;
|
||||
services.local-ai.models = models;
|
||||
};
|
||||
passthru = { inherit models requests; };
|
||||
testScript =
|
||||
let
|
||||
port = "8080";
|
||||
in
|
||||
''
|
||||
machine.wait_for_open_port(${port})
|
||||
machine.succeed("curl -f http://localhost:${port}/readyz")
|
||||
machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status 'debug' models.json")
|
||||
|
||||
machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav")
|
||||
machine.copy_from_vm("out.wav")
|
||||
|
||||
machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json")
|
||||
machine.copy_from_vm("transcription.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json")
|
||||
|
||||
machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
|
||||
machine.copy_from_vm("metrics.json")
|
||||
'';
|
||||
};
|
||||
}
|
||||
//
|
||||
lib.optionalAttrs
|
||||
(self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas)
|
||||
{
|
||||
# https://localai.io/features/text-to-audio/#piper
|
||||
tts =
|
||||
let
|
||||
model-stt = "whisper-en";
|
||||
model-configs.${model-stt} = {
|
||||
backend = "whisper";
|
||||
parameters.model = fetchurl {
|
||||
url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin";
|
||||
hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs=";
|
||||
};
|
||||
};
|
||||
|
||||
model-tts = "piper-en";
|
||||
model-configs.${model-tts} = {
|
||||
backend = "piper";
|
||||
parameters.model = "en-us-danny-low.onnx";
|
||||
};
|
||||
|
||||
models =
|
||||
let
|
||||
models = genModels model-configs;
|
||||
in
|
||||
symlinkJoin {
|
||||
inherit (models) name;
|
||||
paths = [
|
||||
models
|
||||
(fetchzip {
|
||||
url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz";
|
||||
hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M=";
|
||||
stripRoot = false;
|
||||
})
|
||||
];
|
||||
};
|
||||
|
||||
requests.request = {
|
||||
model = model-tts;
|
||||
input = "Hello, how are you?";
|
||||
};
|
||||
in
|
||||
testers.runNixOSTest {
|
||||
name = self.name + "-tts";
|
||||
nodes.machine = {
|
||||
imports = [ common-config ];
|
||||
virtualisation.cores = 2;
|
||||
services.local-ai.models = models;
|
||||
};
|
||||
passthru = {
|
||||
inherit models requests;
|
||||
};
|
||||
testScript =
|
||||
let
|
||||
port = "8080";
|
||||
in
|
||||
''
|
||||
machine.wait_for_open_port(${port})
|
||||
machine.succeed("curl -f http://localhost:${port}/readyz")
|
||||
machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status 'debug' models.json")
|
||||
|
||||
machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav")
|
||||
machine.copy_from_vm("out.wav")
|
||||
|
||||
machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json")
|
||||
machine.copy_from_vm("transcription.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json")
|
||||
|
||||
machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
|
||||
machine.copy_from_vm("metrics.json")
|
||||
'';
|
||||
};
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user