added support for batch_size

This commit is contained in:
Radek Davidek 2026-03-29 16:14:23 +02:00
parent f5a52a93c3
commit 71d370b63a
5 changed files with 35 additions and 11 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@ target
.project .project
.vscode .vscode
.claude .claude
.codex

View File

@ -1,5 +1,5 @@
#Llama Runner Configuration #Llama Runner Configuration
#Thu Mar 26 18:27:22 CET 2026 #Sun Mar 29 16:13:49 CEST 2026
windowHeight=1189 windowHeight=1189
windowWidth=711 windowWidth=711
windowX=1849 windowX=1849

View File

@ -19,6 +19,7 @@ public class CommandBuilder {
cmd.append(" --host ").append(config.getHost()); cmd.append(" --host ").append(config.getHost());
cmd.append(" --port ").append(config.getPort()); cmd.append(" --port ").append(config.getPort());
cmd.append(" --parallel ").append(config.getParallel()); cmd.append(" --parallel ").append(config.getParallel());
cmd.append(" --batch-size ").append(config.getBatchSize());
cmd.append(" -t ").append(config.getThreads()); cmd.append(" -t ").append(config.getThreads());
cmd.append(" -fa ").append(config.isFlashAttention() ? "on" : "off"); cmd.append(" -fa ").append(config.isFlashAttention() ? "on" : "off");
cmd.append(" --temp ").append(config.getTemperature()); cmd.append(" --temp ").append(config.getTemperature());

View File

@ -52,6 +52,7 @@ public class Main extends JFrame {
private JTextField hostField; private JTextField hostField;
private JTextField portField; private JTextField portField;
private JTextField parallelField; private JTextField parallelField;
private JTextField batchSizeField;
private JTextField threadsField; private JTextField threadsField;
private JCheckBox flashAttnCheckBox; private JCheckBox flashAttnCheckBox;
private JCheckBox kvUnifiedCheckBox; private JCheckBox kvUnifiedCheckBox;
@ -332,6 +333,7 @@ public class Main extends JFrame {
config.setHost(hostField.getText()); config.setHost(hostField.getText());
config.setPort(Integer.parseInt(portField.getText())); config.setPort(Integer.parseInt(portField.getText()));
config.setParallel(Integer.parseInt(parallelField.getText())); config.setParallel(Integer.parseInt(parallelField.getText()));
config.setBatchSize(Integer.parseInt(batchSizeField.getText()));
config.setThreads(Integer.parseInt(threadsField.getText())); config.setThreads(Integer.parseInt(threadsField.getText()));
config.setFlashAttention(flashAttnCheckBox.isSelected()); config.setFlashAttention(flashAttnCheckBox.isSelected());
config.setKvUnified(kvUnifiedCheckBox.isSelected()); config.setKvUnified(kvUnifiedCheckBox.isSelected());
@ -365,6 +367,7 @@ public class Main extends JFrame {
hostField.setText(config.getHost()); hostField.setText(config.getHost());
portField.setText(String.valueOf(config.getPort())); portField.setText(String.valueOf(config.getPort()));
parallelField.setText(String.valueOf(config.getParallel())); parallelField.setText(String.valueOf(config.getParallel()));
batchSizeField.setText(String.valueOf(config.getBatchSize()));
threadsField.setText(String.valueOf(config.getThreads())); threadsField.setText(String.valueOf(config.getThreads()));
flashAttnCheckBox.setSelected(config.isFlashAttention()); flashAttnCheckBox.setSelected(config.isFlashAttention());
kvUnifiedCheckBox.setSelected(config.isKvUnified()); kvUnifiedCheckBox.setSelected(config.isKvUnified());
@ -710,6 +713,18 @@ public class Main extends JFrame {
gbc.gridx = 0; gbc.gridx = 0;
gbc.gridy = 1; gbc.gridy = 1;
gbc.weightx = 0; gbc.weightx = 0;
panel.add(new JLabel("Batch Size:"), gbc);
gbc.gridx = 1;
gbc.weightx = 1.0;
batchSizeField = new JTextField("4096", 10);
batchSizeField.setCaretColor(Color.WHITE);
batchSizeField.addActionListener(e -> updateCommandPreview());
panel.add(batchSizeField, gbc);
gbc.gridx = 0;
gbc.gridy = 2;
gbc.weightx = 0;
panel.add(new JLabel("Threads:"), gbc); panel.add(new JLabel("Threads:"), gbc);
gbc.gridx = 1; gbc.gridx = 1;
@ -720,17 +735,17 @@ public class Main extends JFrame {
panel.add(threadsField, gbc); panel.add(threadsField, gbc);
gbc.gridx = 0; gbc.gridx = 0;
gbc.gridy = 2; gbc.gridy = 3;
gbc.weightx = 0; gbc.weightx = 0;
panel.add(new JLabel(), gbc); panel.add(new JLabel(), gbc);
gbc.gridx = 1; gbc.gridx = 1;
gbc.gridy = 2; gbc.gridy = 3;
gbc.weightx = 1.0; gbc.weightx = 1.0;
panel.add(new JLabel(), gbc); panel.add(new JLabel(), gbc);
gbc.gridx = 0; gbc.gridx = 0;
gbc.gridy = 3; gbc.gridy = 4;
gbc.weightx = 0; gbc.weightx = 0;
flashAttnCheckBox = new JCheckBox("Flash Attention", true); flashAttnCheckBox = new JCheckBox("Flash Attention", true);
flashAttnCheckBox.addActionListener(e -> updateCommandPreview()); flashAttnCheckBox.addActionListener(e -> updateCommandPreview());
@ -741,7 +756,7 @@ public class Main extends JFrame {
panel.add(new JLabel(), gbc); panel.add(new JLabel(), gbc);
gbc.gridx = 0; gbc.gridx = 0;
gbc.gridy = 4; gbc.gridy = 5;
gbc.weightx = 0; gbc.weightx = 0;
kvUnifiedCheckBox = new JCheckBox("KV Unified", true); kvUnifiedCheckBox = new JCheckBox("KV Unified", true);
kvUnifiedCheckBox.addActionListener(e -> updateCommandPreview()); kvUnifiedCheckBox.addActionListener(e -> updateCommandPreview());
@ -752,7 +767,7 @@ public class Main extends JFrame {
panel.add(new JLabel(), gbc); panel.add(new JLabel(), gbc);
gbc.gridx = 0; gbc.gridx = 0;
gbc.gridy = 5; gbc.gridy = 6;
gbc.weightx = 0; gbc.weightx = 0;
panel.add(new JLabel("Cache K:"), gbc); panel.add(new JLabel("Cache K:"), gbc);
@ -764,7 +779,7 @@ public class Main extends JFrame {
panel.add(cacheTypeKComboBox, gbc); panel.add(cacheTypeKComboBox, gbc);
gbc.gridx = 0; gbc.gridx = 0;
gbc.gridy = 6; gbc.gridy = 7;
gbc.weightx = 0; gbc.weightx = 0;
panel.add(new JLabel("Cache V:"), gbc); panel.add(new JLabel("Cache V:"), gbc);
@ -776,7 +791,7 @@ public class Main extends JFrame {
panel.add(cacheTypeVComboBox, gbc); panel.add(cacheTypeVComboBox, gbc);
gbc.gridx = 0; gbc.gridx = 0;
gbc.gridy = 7; gbc.gridy = 8;
gbc.weightx = 0; gbc.weightx = 0;
panel.add(new JLabel("GPU Layers:"), gbc); panel.add(new JLabel("GPU Layers:"), gbc);
@ -789,7 +804,7 @@ public class Main extends JFrame {
// Fit parameter // Fit parameter
gbc.gridx = 0; gbc.gridx = 0;
gbc.gridy = 8; gbc.gridy = 9;
gbc.weightx = 0; gbc.weightx = 0;
panel.add(new JLabel("Fit:"), gbc); panel.add(new JLabel("Fit:"), gbc);
@ -800,7 +815,7 @@ public class Main extends JFrame {
panel.add(fitCheckBox, gbc); panel.add(fitCheckBox, gbc);
gbc.gridx = 0; gbc.gridx = 0;
gbc.gridy = 9; gbc.gridy = 10;
gbc.weightx = 0; gbc.weightx = 0;
panel.add(new JLabel("Reasoning:"), gbc); panel.add(new JLabel("Reasoning:"), gbc);

View File

@ -7,10 +7,12 @@ import java.io.Serializable;
*/ */
public class ModelConfig implements Serializable { public class ModelConfig implements Serializable {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
private static final int DEFAULT_BATCH_SIZE = 4096;
private String host; private String host;
private int port; private int port;
private int parallel; private int parallel;
private int batchSize;
private int threads; private int threads;
private boolean flashAttention; private boolean flashAttention;
private boolean kvUnified; private boolean kvUnified;
@ -32,6 +34,7 @@ public class ModelConfig implements Serializable {
this.host = "0.0.0.0"; this.host = "0.0.0.0";
this.port = 3080; this.port = 3080;
this.parallel = 1; this.parallel = 1;
this.batchSize = DEFAULT_BATCH_SIZE;
this.threads = 99; this.threads = 99;
this.flashAttention = true; this.flashAttention = true;
this.kvUnified = true; this.kvUnified = true;
@ -60,6 +63,9 @@ public class ModelConfig implements Serializable {
public int getParallel() { return parallel; } public int getParallel() { return parallel; }
public void setParallel(int parallel) { this.parallel = parallel; } public void setParallel(int parallel) { this.parallel = parallel; }
public int getBatchSize() { return batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; }
public void setBatchSize(int batchSize) { this.batchSize = batchSize; }
public int getThreads() { return threads; } public int getThreads() { return threads; }
public void setThreads(int threads) { this.threads = threads; } public void setThreads(int threads) { this.threads = threads; }
@ -114,6 +120,7 @@ public class ModelConfig implements Serializable {
"host='" + host + '\'' + "host='" + host + '\'' +
", port=" + port + ", port=" + port +
", parallel=" + parallel + ", parallel=" + parallel +
", batchSize=" + getBatchSize() +
", threads=" + threads + ", threads=" + threads +
", flashAttention=" + flashAttention + ", flashAttention=" + flashAttention +
", kvUnified=" + kvUnified + ", kvUnified=" + kvUnified +