ollama: 0.3.12 -> 0.4.4 (#354969)

This commit is contained in:
Pol Dellaiera 2024-11-24 09:39:07 +01:00 committed by GitHub
commit 14aa5dc39d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 48 additions and 59 deletions

View File

@ -1,22 +0,0 @@
diff --git a/llm/generate/gen_common.sh b/llm/generate/gen_common.sh
index 3825c155..d22eccd2 100644
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -69,6 +69,8 @@ git_module_setup() {
}
apply_patches() {
+ return
+
# apply temporary patches until fix is upstream
for patch in ../patches/*.patch; do
git -c 'user.name=nobody' -c 'user.email=<>' -C ${LLAMACPP_DIR} am ${patch}
@@ -133,6 +135,8 @@ install() {
# Keep the local tree clean after we're done with the build
cleanup() {
+ return
+
(cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt)
if [ -n "$(ls -A ../patches/*.diff)" ]; then

View File

@ -12,6 +12,7 @@
cmake,
gcc12,
gitMinimal,
clblast,
libdrm,
rocmPackages,
@ -40,17 +41,17 @@ assert builtins.elem acceleration [
let
pname = "ollama";
# don't forget to invalidate all hashes each update
version = "0.3.12";
version = "0.4.4";
src = fetchFromGitHub {
owner = "ollama";
repo = "ollama";
rev = "v${version}";
hash = "sha256-K1FYXEP0bTZa8M+V4/SxI+Q+LWs2rsAMZ/ETJCaO7P8=";
hash = "sha256-yyUm9kETNQiJjpGeVLPe67G2CrEKYNcrPFixqqq+rH4=";
fetchSubmodules = true;
};
vendorHash = "sha256-hSxcREAujhvzHVNwnRTfhi0MKI3s8HNavER2VLz6SYk=";
vendorHash = "sha256-1+Eb81QQcVANQQ5u1c6is8dLVGYqrXKuFnF2MBkEHms=";
validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport) (lib.concatStrings [
"both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, "
@ -85,14 +86,22 @@ let
cudaPackages.libcublas
cudaPackages.cuda_cccl
];
# Extract the major version of CUDA. e.g. 11 12
cudaMajorVersion = lib.versions.major cudaPackages.cuda_cudart.version;
cudaToolkit = buildEnv {
name = "cuda-merged";
# ollama hardcodes the major version in the Makefile to support different variants.
# - https://github.com/ollama/ollama/blob/v0.4.4/llama/Makefile#L17-L18
name = "cuda-merged-${cudaMajorVersion}";
paths = map lib.getLib cudaLibs ++ [
(lib.getOutput "static" cudaPackages.cuda_cudart)
(lib.getBin (cudaPackages.cuda_nvcc.__spliced.buildHost or cudaPackages.cuda_nvcc))
];
};
cudaPath = lib.removeSuffix "-${cudaMajorVersion}" cudaToolkit;
metalFrameworks = with darwin.apple_sdk_11_0.frameworks; [
Accelerate
Metal
@ -133,12 +142,21 @@ goBuild {
lib.optionalAttrs enableRocm {
ROCM_PATH = rocmPath;
CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
HIP_PATH = rocmPath;
}
// lib.optionalAttrs enableCuda { CUDA_LIB_DIR = "${cudaToolkit}/lib"; };
// lib.optionalAttrs enableCuda {
CUDA_PATH = cudaPath;
};
nativeBuildInputs =
[ cmake ]
++ lib.optionals enableRocm [ rocmPackages.llvm.bintools ]
[
cmake
gitMinimal
]
++ lib.optionals enableRocm [
rocmPackages.llvm.bintools
rocmLibs
]
++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ]
++ lib.optionals (enableRocm || enableCuda) [
makeWrapper
@ -152,24 +170,13 @@ goBuild {
++ lib.optionals stdenv.hostPlatform.isDarwin metalFrameworks;
patches = [
# disable uses of `git` in the `go generate` script
# ollama's build script assumes the source is a git repo, but nix removes the git directory
# this also disables necessary patches contained in `ollama/llm/patches/`
# those patches are applied in `postPatch`
./disable-git.patch
# we provide our own deps at runtime
./skip-rocm-cp.patch
# ollama's build script is unable to find hipcc
./rocm.patch
];
postPatch = ''
# replace inaccurate version number with actual release version
substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
# apply ollama's patches to `llama.cpp` submodule
for diff in llm/patches/*; do
patch -p1 -d llm/llama.cpp < $diff
done
'';
overrideModAttrs = (
@ -180,10 +187,15 @@ goBuild {
);
preBuild = ''
# disable uses of `git`, since nix removes the git directory
export OLLAMA_SKIP_PATCHING=true
# build llama.cpp libraries for ollama
go generate ./...
make -j $NIX_BUILD_CORES
'';
postInstall = lib.optionalString stdenv.hostPlatform.isLinux ''
# copy libggml_*.so and runners into lib
# https://github.com/ollama/ollama/blob/v0.4.4/llama/make/gpu.make#L90
mkdir -p $out/lib
cp -r dist/*/lib/* $out/lib/
'';
postFixup =

View File

@ -0,0 +1,13 @@
diff --git a/llama/make/Makefile.rocm b/llama/make/Makefile.rocm
index 4ab176b4..cd8be223 100644
--- a/llama/make/Makefile.rocm
+++ b/llama/make/Makefile.rocm
@@ -15,7 +15,7 @@ ifeq ($(OS),windows)
GPU_COMPILER:=$(GPU_COMPILER_WIN)
else ifeq ($(OS),linux)
GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
- GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X)
+ GPU_COMPILER_LINUX := $(HIP_PATH)/bin/hipcc
GPU_COMPILER:=$(GPU_COMPILER_LINUX)
ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(GPU_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
GPU_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))

View File

@ -1,14 +0,0 @@
diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
index 48d08fd0..e50f7b36 100755
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -284,9 +284,6 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
mkdir -p "${ROCM_DIST_DIR}"
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${GOARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf ); do
cp -a "${dep}"* "${ROCM_DIST_DIR}"
- if [ $(readlink -f "${dep}") != "${dep}" ] ; then
- cp $(readlink -f "${dep}") "${ROCM_DIST_DIR}"
- fi
done
install
dist