Llamafile 1.1.0

pts/llamafile-1.1.0 - 03 April 2024 - Update against Llamafile 0.7.

downloads.xml

<?xml version="1.0"?>
<!--Phoronix Test Suite v10.8.4-->
<PhoronixTestSuite>
  <Downloads>
    <Package>
      <URL>https://huggingface.co/jartine/Mistral-7B-Instruct-v0.2-llamafile/resolve/1b7651d5619e4f7410a4e0005d7118f848610ca5/mistral-7b-instruct-v0.2.Q8_0.llamafile?download=true</URL>
      <MD5>02b4f43f579017bab7a6ce84034af58b</MD5>
      <SHA256>dee9af5fdbfee17530e613060db8548ca9b3be0dff2d67476766dc0c0b730857</SHA256>
      <FileName>mistral-7b-instruct-v0.2.Q8_0.llamafile.7</FileName>
      <FileSize>7720145391</FileSize>
      <Optional>TRUE</Optional>
    </Package>
    <Package>
      <URL>https://huggingface.co/jartine/llava-v1.5-7B-GGUF/resolve/b0189a9d04ca084d8808cd74ee6c2c7839e1311c/llava-v1.5-7b-q4.llamafile?download=true</URL>
      <MD5>5fde77da9ab80bded610097d2d1b213d</MD5>
      <SHA256>8a92123c4114965cab4cb394cbbea2f5a0f64db0152ccc5c2aca2ad0cf14dca8</SHA256>
      <FileName>llava-v1.5-7b-q4.llamafile.7</FileName>
      <FileSize>4282782308</FileSize>
      <Optional>TRUE</Optional>
    </Package>
    <Package>
      <URL>https://huggingface.co/jartine/WizardCoder-Python-34B-V1.0-llamafile/resolve/2bf59460e11fe5f48204df385cd33570d782e18a/wizardcoder-python-34b-v1.0.Q6_K.llamafile?download=true</URL>
      <MD5>6191ce33888e6d48ef9b54b66e6d6b2f</MD5>
      <SHA256>c8c879e988459879ea0cfe13b951666cb6bda571845f24bd04df31652fd998d4</SHA256>
      <FileName>wizardcoder-python-34b-v1.0.Q6_K.llamafile.7</FileName>
      <FileSize>27708166645</FileSize>
      <Optional>TRUE</Optional>
    </Package>
    <Package>
      <URL>http://www.phoronix-test-suite.com/benchmark-files/pts-sample-photos-2.tar.bz2, http://www.phoronix.net/downloads/phoronix-test-suite/benchmark-files/pts-sample-photos-2.tar.bz2</URL>
      <MD5>7ff2870cf3fa2299ea5f4e77e5a8535f</MD5>
      <SHA256>e8597d7d5910a709985bffae490dfa9490483eb7838b351191156f027d9b5272</SHA256>
      <FileName>pts-sample-photos-2.tar.bz2</FileName>
      <FileSize>29599729</FileSize>
    </Package>
  </Downloads>
</PhoronixTestSuite>

install.sh

#!/bin/bash
chmod +x mistral-7b-instruct-v0.2.Q8_0.llamafile.7
chmod +x llava-v1.5-7b-q4.llamafile.7
chmod +x wizardcoder-python-34b-v1.0.Q6_K.llamafile.7
tar -xf pts-sample-photos-2.tar.bz2
echo $? > ~/install-exit-status

cat <<'EOT' > run-mistral
#!/bin/bash
./mistral-7b-instruct-v0.2.Q8_0.llamafile.7 --temp 0.7 -p '[INST]Write a long story about llamas[/INST]' $@
exit $?
EOT
chmod +x run-mistral

cat <<'EOT' > run-wizardcoder
#!/bin/bash
./wizardcoder-python-34b-v1.0.Q6_K.llamafile.7 --temp 0 -e -r '```\n' -p '```c\nvoid *memcpy_sse2(char *dst, const char *src, size_t size) {\n' $@
exit $?
EOT
chmod +x run-wizardcoder

cat <<'EOT' > run-llava
#!/bin/bash
./llava-v1.5-7b-q4.llamafile.7 --temp 0.2 --image DSC_4646.JPG -e -p '### User: Describe in detail what do you see?\n### Assistant:' $@
exit $?
EOT
chmod +x run-llava

echo "#!/bin/sh
./\$@ -t \$NUM_CPU_PHYSICAL_CORES > \$LOG_FILE 2>&1
echo \$? > ~/test-exit-status" > ~/llamafile
chmod +x ~/llamafile

results-definition.xml

<?xml version="1.0"?>
<!--Phoronix Test Suite v10.8.4-->
<PhoronixTestSuite>
  <ResultsParser>
    <OutputTemplate>llama_print_timings:        eval time =   18329.86 ms /   399 runs   (   45.94 ms per token,    #_RESULT_# tokens per second)</OutputTemplate>
    <LineHint>tokens per second</LineHint>
    <ResultBeforeString>tokens</ResultBeforeString>
  </ResultsParser>
</PhoronixTestSuite>

test-definition.xml

<?xml version="1.0"?>
<!--Phoronix Test Suite v10.8.4-->
<PhoronixTestSuite>
  <TestInformation>
    <Title>Llamafile</Title>
    <AppVersion>0.7</AppVersion>
    <Description>Mozilla's Llamafile allows distributing and running large language models (LLMs) as a single file. Llamafile aims to make open-source LLMs more accessible to developers and users. Llamafile supports a variety of models, CPUs and GPUs, and other options.</Description>
    <ResultScale>Tokens Per Second</ResultScale>
    <Proportion>HIB</Proportion>
    <TimesToRun>3</TimesToRun>
  </TestInformation>
  <TestProfile>
    <Version>1.1.0</Version>
    <SupportedPlatforms>Linux</SupportedPlatforms>
    <SoftwareType>Utility</SoftwareType>
    <TestType>System</TestType>
    <License>Free</License>
    <EnvironmentSize>40000</EnvironmentSize>
    <ProjectURL>https://llamafile.ai/</ProjectURL>
    <RepositoryURL>https://github.com/Mozilla-Ocho/llamafile</RepositoryURL>
    <Maintainer>Michael Larabel</Maintainer>
  </TestProfile>
  <TestSettings>
    <Option>
      <DisplayName>Test</DisplayName>
      <Identifier>test</Identifier>
      <Menu>
        <Entry>
          <Name>mistral-7b-instruct-v0.2.Q8_0</Name>
          <Value>run-mistral</Value>
        </Entry>
        <Entry>
          <Name>llava-v1.5-7b-q4</Name>
          <Value>run-llava</Value>
        </Entry>
        <Entry>
          <Name>wizardcoder-python-34b-v1.0.Q6_K</Name>
          <Value>run-wizardcoder</Value>
        </Entry>
      </Menu>
    </Option>
    <Option>
      <DisplayName>Acceleration</DisplayName>
      <Identifier>accel</Identifier>
      <Menu>
        <Entry>
          <Name>CPU</Name>
          <Value>--gpu DISABLE</Value>
        </Entry>
        <Entry>
          <Name>GPU AUTO</Name>
          <Value>--gpu AUTO</Value>
        </Entry>
      </Menu>
    </Option>
  </TestSettings>
</PhoronixTestSuite>