ollama: 0.3.12 -> 0.4.4 (#354969)

2024-11-24 09:39:07 +01:00 · 2024-11-24 09:39:07 +01:00 · 14aa5dc39d
commit 14aa5dc39d
parent ef95c99dd0 3f631d658b
4 changed files with 48 additions and 59 deletions
--- a/pkgs/by-name/ol/ollama/disable-git.patch
+++ b/pkgs/by-name/ol/ollama/disable-git.patch
@ -1,22 +0,0 @@
-diff --git a/llm/generate/gen_common.sh b/llm/generate/gen_common.sh
-index 3825c155..d22eccd2 100644
--- a/llm/generate/gen_common.sh
-+++ b/llm/generate/gen_common.sh
-@@ -69,6 +69,8 @@ git_module_setup() {
- }
- 
- apply_patches() {
-+    return
-+
-     # apply temporary patches until fix is upstream
-     for patch in ../patches/*.patch; do
-         git -c 'user.name=nobody' -c 'user.email=<>' -C ${LLAMACPP_DIR} am ${patch}
-@@ -133,6 +135,8 @@ install() {
- 
- # Keep the local tree clean after we're done with the build
- cleanup() {
-+    return
-+
-     (cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt)
- 
-     if [ -n "$(ls -A ../patches/*.diff)" ]; then
--- a/pkgs/by-name/ol/ollama/package.nix
+++ b/pkgs/by-name/ol/ollama/package.nix
@ -12,6 +12,7 @@

  cmake,
  gcc12,
+  gitMinimal,
  clblast,
  libdrm,
  rocmPackages,
@ -40,17 +41,17 @@ assert builtins.elem acceleration [
 let
  pname = "ollama";
  # don't forget to invalidate all hashes each update
-  version = "0.3.12";
+  version = "0.4.4";

  src = fetchFromGitHub {
    owner = "ollama";
    repo = "ollama";
    rev = "v${version}";
-    hash = "sha256-K1FYXEP0bTZa8M+V4/SxI+Q+LWs2rsAMZ/ETJCaO7P8=";
+    hash = "sha256-yyUm9kETNQiJjpGeVLPe67G2CrEKYNcrPFixqqq+rH4=";
    fetchSubmodules = true;
  };

-  vendorHash = "sha256-hSxcREAujhvzHVNwnRTfhi0MKI3s8HNavER2VLz6SYk=";
+  vendorHash = "sha256-1+Eb81QQcVANQQ5u1c6is8dLVGYqrXKuFnF2MBkEHms=";

  validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport) (lib.concatStrings [
    "both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, "
@ -85,14 +86,22 @@ let
    cudaPackages.libcublas
    cudaPackages.cuda_cccl
  ];
+
+  # Extract the major version of CUDA. e.g. 11 12
+  cudaMajorVersion = lib.versions.major cudaPackages.cuda_cudart.version;
+
  cudaToolkit = buildEnv {
-    name = "cuda-merged";
+    # ollama hardcodes the major version in the Makefile to support different variants.
+    # - https://github.com/ollama/ollama/blob/v0.4.4/llama/Makefile#L17-L18
+    name = "cuda-merged-${cudaMajorVersion}";
    paths = map lib.getLib cudaLibs ++ [
      (lib.getOutput "static" cudaPackages.cuda_cudart)
      (lib.getBin (cudaPackages.cuda_nvcc.__spliced.buildHost or cudaPackages.cuda_nvcc))
    ];
  };

+  cudaPath = lib.removeSuffix "-${cudaMajorVersion}" cudaToolkit;
+
  metalFrameworks = with darwin.apple_sdk_11_0.frameworks; [
    Accelerate
    Metal
@ -133,12 +142,21 @@ goBuild {
    lib.optionalAttrs enableRocm {
      ROCM_PATH = rocmPath;
      CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
+      HIP_PATH = rocmPath;
    }
-    // lib.optionalAttrs enableCuda { CUDA_LIB_DIR = "${cudaToolkit}/lib"; };
+    // lib.optionalAttrs enableCuda {
+      CUDA_PATH = cudaPath;
+    };

  nativeBuildInputs =
-    [ cmake ]
-    ++ lib.optionals enableRocm [ rocmPackages.llvm.bintools ]
+    [
+      cmake
+      gitMinimal
+    ]
+    ++ lib.optionals enableRocm [
+      rocmPackages.llvm.bintools
+      rocmLibs
+    ]
    ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ]
    ++ lib.optionals (enableRocm || enableCuda) [
      makeWrapper
@ -152,24 +170,13 @@ goBuild {
    ++ lib.optionals stdenv.hostPlatform.isDarwin metalFrameworks;

  patches = [
-    # disable uses of `git` in the `go generate` script
-    # ollama's build script assumes the source is a git repo, but nix removes the git directory
-    # this also disables necessary patches contained in `ollama/llm/patches/`
-    # those patches are applied in `postPatch`
-    ./disable-git.patch
-
-    # we provide our own deps at runtime
-    ./skip-rocm-cp.patch
+    # ollama's build script is unable to find hipcc
+    ./rocm.patch
  ];

  postPatch = ''
    # replace inaccurate version number with actual release version
    substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
-
-    # apply ollama's patches to `llama.cpp` submodule
-    for diff in llm/patches/*; do
-      patch -p1 -d llm/llama.cpp < $diff
-    done
  '';

  overrideModAttrs = (
@ -180,10 +187,15 @@ goBuild {
  );

  preBuild = ''
-    # disable uses of `git`, since nix removes the git directory
-    export OLLAMA_SKIP_PATCHING=true
    # build llama.cpp libraries for ollama
-    go generate ./...
+    make -j $NIX_BUILD_CORES
+  '';
+
+  postInstall = lib.optionalString stdenv.hostPlatform.isLinux ''
+    # copy libggml_*.so and runners into lib
+    # https://github.com/ollama/ollama/blob/v0.4.4/llama/make/gpu.make#L90
+    mkdir -p $out/lib
+    cp -r dist/*/lib/* $out/lib/
  '';

  postFixup =
--- a/pkgs/by-name/ol/ollama/rocm.patch
+++ b/pkgs/by-name/ol/ollama/rocm.patch
@ -0,0 +1,13 @@
+diff --git a/llama/make/Makefile.rocm b/llama/make/Makefile.rocm
+index 4ab176b4..cd8be223 100644
+--- a/llama/make/Makefile.rocm
+++ b/llama/make/Makefile.rocm
+@@ -15,7 +15,7 @@ ifeq ($(OS),windows)
+ 	GPU_COMPILER:=$(GPU_COMPILER_WIN)
+ else ifeq ($(OS),linux)
+ 	GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
+-	GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X)
+	GPU_COMPILER_LINUX := $(HIP_PATH)/bin/hipcc
+ 	GPU_COMPILER:=$(GPU_COMPILER_LINUX)
+ 	ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(GPU_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' '  | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
+ 	GPU_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
--- a/pkgs/by-name/ol/ollama/skip-rocm-cp.patch
+++ b/pkgs/by-name/ol/ollama/skip-rocm-cp.patch
@ -1,14 +0,0 @@
-diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
-index 48d08fd0..e50f7b36 100755
--- a/llm/generate/gen_linux.sh
-+++ b/llm/generate/gen_linux.sh
-@@ -284,9 +284,6 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
-     mkdir -p "${ROCM_DIST_DIR}"
-     for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${GOARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf ); do
-         cp -a "${dep}"* "${ROCM_DIST_DIR}"
-        if [ $(readlink -f "${dep}") != "${dep}" ] ; then
-            cp $(readlink -f "${dep}") "${ROCM_DIST_DIR}"
-        fi
-     done
-     install
-     dist