Llamafile 1.3.0

pts/llamafile-1.3.0 - 04 December 2024 - Update against llamafile 0.8.16.

downloads.xml

<?xml version="1.0"?>
<!--Phoronix Test Suite v10.8.5-->
<PhoronixTestSuite>
  <Downloads>
    <Package>
      <URL>https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.17/llamafile-bench-0.8.17</URL>
      <MD5>dd17f074069cc0fb3ce9dfed378992c9</MD5>
      <SHA256>a8f1e158a72dd69b3e5389192349146221578608770c46fcb1a5cba9360f7973</SHA256>
      <FileName>llamafile-bench-0.8.17</FileName>
      <FileSize>8953691</FileSize>
    </Package>
    <Package>
      <URL>https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.2-llamafile/resolve/772507f9981f04bca0b745027b9a9cf585cb490c/mistral-7b-instruct-v0.2.Q5_K_M.llamafile?download=true</URL>
      <MD5>991dd4c8a4ca30740854b9dc82286ae1</MD5>
      <SHA256>f68c1e37d4d2100b9acbf5ccf806461ab6c8e47c351fb69133eb8aa545b06883</SHA256>
      <FileName>mistral-7b-instruct-v0.2.Q5_K_M.llamafile.86</FileName>
      <FileSize>5166938210</FileSize>
      <Optional>TRUE</Optional>
    </Package>
    <Package>
      <URL>https://huggingface.co/Mozilla/WizardCoder-Python-34B-V1.0-llamafile/resolve/f7bf923573f2d667551d098bb36822571f2b6c11/wizardcoder-python-34b-v1.0.Q6_K.llamafile?download=true</URL>
      <MD5>4a07ecb839f534f95d7acc25f33f7f01</MD5>
      <SHA256>97480e6106fdc337ae23e0c7d6d842be260ffb35e0b76d75d3f02e5341294e5a</SHA256>
      <FileName>wizardcoder-python-34b-v1.0.Q6_K.llamafile.86</FileName>
      <FileSize>27708232181</FileSize>
      <Optional>TRUE</Optional>
    </Package>
    <Package>
      <URL>https://huggingface.co/Mozilla/Llama-3.2-3B-Instruct-llamafile/resolve/fd32939695a41a1dc32b418385f3862b79fac82d/Llama-3.2-3B-Instruct.Q6_K.llamafile?download=true</URL>
      <MD5>6527e1f694d3c8f0371753afd62da6b6</MD5>
      <SHA256>6d5ca841cbd0c7f6987bdbf4b3b0fbbc98f39d079e6068e84f284de9001170db</SHA256>
      <FileName>Llama-3.2-3B-Instruct.Q6_K.llamafile</FileName>
      <FileSize>2814261409</FileSize>
      <Optional>TRUE</Optional>
    </Package>
    <Package>
      <URL>https://huggingface.co/Mozilla/TinyLlama-1.1B-Chat-v1.0-llamafile/resolve/846197003068d371225a5b81649053eaa7ae8ae4/TinyLlama-1.1B-Chat-v1.0.BF16.llamafile?download=true</URL>
      <MD5>38e5b80664833b4f35332763eff2ad4d</MD5>
      <SHA256>7060208eb3984ae86a5906134d2df6275ee3f3c05a044e9f3460b200d262fec1</SHA256>
      <FileName>TinyLlama-1.1B-Chat-v1.0.BF16.llamafile</FileName>
      <FileSize>2371303518</FileSize>
      <Optional>TRUE</Optional>
    </Package>
  </Downloads>
</PhoronixTestSuite>

install.sh

#!/bin/bash
chmod +x llamafile-bench-0.8.17
echo $? > ~/install-exit-status
echo "#!/bin/sh
./llamafile-bench-0.8.17 -t \$NUM_CPU_PHYSICAL_CORES -v \$@ > \$LOG_FILE 2>&1
echo \$? > ~/test-exit-status" > ~/llamafile
chmod +x ~/llamafile

results-definition.xml

<?xml version="1.0"?>
<!--Phoronix Test Suite v10.8.5-->
<PhoronixTestSuite>
  <ResultsParser>
    <OutputTemplate>llama_print_timings:        eval time =   34060.90 ms /   385 runs   (   88.47 ms per token,    #_RESULT_# tokens per second)</OutputTemplate>
    <LineHint>tokens per second</LineHint>
    <ResultBeforeString>tokens</ResultBeforeString>
  </ResultsParser>
</PhoronixTestSuite>

test-definition.xml

<?xml version="1.0"?>
<!--Phoronix Test Suite v10.8.5-->
<PhoronixTestSuite>
  <TestInformation>
    <Title>Llamafile</Title>
    <AppVersion>0.8.16</AppVersion>
    <Description>Mozilla's Llamafile allows distributing and running large language models (LLMs) as a single file. Llamafile aims to make open-source LLMs more accessible to developers and users. Llamafile supports a variety of models, CPUs and GPUs, and other options.</Description>
    <ResultScale>Tokens Per Second</ResultScale>
    <Proportion>HIB</Proportion>
    <TimesToRun>3</TimesToRun>
  </TestInformation>
  <TestProfile>
    <Version>1.3.0</Version>
    <SupportedPlatforms>Linux</SupportedPlatforms>
    <SoftwareType>Utility</SoftwareType>
    <TestType>System</TestType>
    <License>Free</License>
    <EnvironmentSize>40000</EnvironmentSize>
    <ProjectURL>https://llamafile.ai/</ProjectURL>
    <RepositoryURL>https://github.com/Mozilla-Ocho/llamafile</RepositoryURL>
    <Maintainer>Michael Larabel</Maintainer>
  </TestProfile>
  <TestSettings>
    <Option>
      <DisplayName>Model</DisplayName>
      <Identifier>model</Identifier>
      <ArgumentPrefix>-m </ArgumentPrefix>
      <Menu>
        <Entry>
          <Name>mistral-7b-instruct-v0.2.Q5_K_M</Name>
          <Value>mistral-7b-instruct-v0.2.Q5_K_M.llamafile.86 -r 15</Value>
        </Entry>
        <Entry>
          <Name>Llama-3.2-3B-Instruct.Q6_K</Name>
          <Value>Llama-3.2-3B-Instruct.Q6_K.llamafile -r 15</Value>
        </Entry>
        <Entry>
          <Name>TinyLlama-1.1B-Chat-v1.0.BF16</Name>
          <Value>TinyLlama-1.1B-Chat-v1.0.BF16.llamafile -r 15</Value>
        </Entry>
        <Entry>
          <Name>wizardcoder-python-34b-v1.0.Q6_K</Name>
          <Value>wizardcoder-python-34b-v1.0.Q6_K.llamafile.86 -r 5</Value>
        </Entry>
      </Menu>
    </Option>
    <Option>
      <DisplayName>Test</DisplayName>
      <Identifier>test</Identifier>
      <Menu>
        <Entry>
          <Name>Text Generation 16</Name>
          <Value>-n 16 -p 0</Value>
        </Entry>
        <Entry>
          <Name>Text Generation 128</Name>
          <Value>-n 128 -p 0</Value>
        </Entry>
        <Entry>
          <Name>Prompt Processing 256</Name>
          <Value>-n 0 -p 256</Value>
        </Entry>
        <Entry>
          <Name>Prompt Processing 512</Name>
          <Value>-n 0 -p 512</Value>
        </Entry>
        <Entry>
          <Name>Prompt Processing 1024</Name>
          <Value>-n 0 -p 1024</Value>
        </Entry>
        <Entry>
          <Name>Prompt Processing 2048</Name>
          <Value>-n 0 -p 2048</Value>
        </Entry>
      </Menu>
    </Option>
  </TestSettings>
</PhoronixTestSuite>