Llama.cpp 1.1.0

pts/llama-cpp-1.1.0 - 02 June 2024 - Update against Llama.cpp upstream, switch to Llama 3 model.

downloads.xml

<?xml version="1.0"?>
<!--Phoronix Test Suite v10.8.5-->
<PhoronixTestSuite>
  <Downloads>
    <Package>
      <URL>https://github.com/ggerganov/llama.cpp/archive/refs/tags/b3067.tar.gz</URL>
      <MD5>951dd9ce10456799db6ad24d0d5b65bd</MD5>
      <SHA256>440319d7ace91c6ea009fb37f723150a6be7cadbb27110f86744896e278fbb86</SHA256>
      <FileName>llama.cpp-b3067.tar.gz</FileName>
      <FileSize>20214347</FileSize>
    </Package>
    <Package>
      <URL>https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/0910a3e69201d274d4fd68e89448114cd78e4c82/Meta-Llama-3-8B-Instruct-Q8_0.gguf?download=true</URL>
      <MD5>75bb6264a938d98d9a0b3af9dced2985</MD5>
      <SHA256>4514087a6e21a05906121ebbe22e5f8610eab5e5db561d3a1be20c7fe43167c8</SHA256>
      <FileName>Meta-Llama-3-8B-Instruct-Q8_0.gguf</FileName>
      <FileSize>8540770496</FileSize>
      <Optional>TRUE</Optional>
    </Package>
  </Downloads>
</PhoronixTestSuite>

install.sh

#!/bin/bash
tar -xf llama.cpp-b3067.tar.gz
cd llama.cpp-b3067
make -j LLAMA_OPENBLAS=1 
echo $? > ~/install-exit-status
echo "#!/bin/sh
cd llama.cpp-b3067
./main \$@ -p \"Building a website can be done in 10 simple steps:\" -n 512 -e -t \$NUM_CPU_PHYSICAL_CORES > \$LOG_FILE 2>&1
echo \$? > ~/test-exit-status" > ~/llama-cpp
chmod +x ~/llama-cpp

results-definition.xml

<?xml version="1.0"?>
<!--Phoronix Test Suite v10.8.5-->
<PhoronixTestSuite>
  <ResultsParser>
    <OutputTemplate>llama_print_timings:        eval time =   38874.00 ms /   511 runs   (   76.07 ms per token,    #_RESULT_# tokens per second)</OutputTemplate>
    <LineHint>tokens per second</LineHint>
    <ResultBeforeString>tokens</ResultBeforeString>
  </ResultsParser>
</PhoronixTestSuite>

test-definition.xml

<?xml version="1.0"?>
<!--Phoronix Test Suite v10.8.5-->
<PhoronixTestSuite>
  <TestInformation>
    <Title>Llama.cpp</Title>
    <AppVersion>b3067</AppVersion>
    <Description>Llama.cpp is a port of Facebook's LLaMA model in C/C++ developed by Georgi Gerganov. Llama.cpp allows the inference of LLaMA and other supported models in C/C++. For CPU inference Llama.cpp supports AVX2/AVX-512, ARM NEON, and other modern ISAs along with features like OpenBLAS usage.</Description>
    <ResultScale>Tokens Per Second</ResultScale>
    <Proportion>HIB</Proportion>
    <TimesToRun>3</TimesToRun>
  </TestInformation>
  <TestProfile>
    <Version>1.1.0</Version>
    <SupportedPlatforms>Linux</SupportedPlatforms>
    <SoftwareType>Utility</SoftwareType>
    <TestType>System</TestType>
    <License>Free</License>
    <ExternalDependencies>build-utilities, blas-development</ExternalDependencies>
    <InstallRequiresInternet>TRUE</InstallRequiresInternet>
    <EnvironmentSize>58700</EnvironmentSize>
    <ProjectURL>https://github.com/ggerganov/llama.cpp/</ProjectURL>
    <RepositoryURL>https://github.com/ggerganov/llama.cpp</RepositoryURL>
    <Maintainer>Michael Larabel</Maintainer>
    <SystemDependencies>pkgconf</SystemDependencies>
  </TestProfile>
  <TestSettings>
    <Option>
      <DisplayName>Model</DisplayName>
      <Identifier>model</Identifier>
      <ArgumentPrefix>-m ../</ArgumentPrefix>
      <Menu>
        <Entry>
          <Name>Meta-Llama-3-8B-Instruct-Q8_0.gguf</Name>
          <Value>Meta-Llama-3-8B-Instruct-Q8_0.gguf</Value>
        </Entry>
      </Menu>
    </Option>
  </TestSettings>
</PhoronixTestSuite>