From 71d370b63a7af704cf1f1649f04491d5d20ca36a Mon Sep 17 00:00:00 2001 From: Radek Davidek Date: Sun, 29 Mar 2026 16:14:23 +0200 Subject: [PATCH] added support for batch_size --- .gitignore | 3 +- config.properties | 2 +- .../cz/kamma/llamarunner/CommandBuilder.java | 1 + src/main/java/cz/kamma/llamarunner/Main.java | 33 ++++++++++++++----- .../cz/kamma/llamarunner/ModelConfig.java | 7 ++++ 5 files changed, 35 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 2c48ba4..729b15e 100755 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ target .classpath .project .vscode -.claude \ No newline at end of file +.claude +.codex \ No newline at end of file diff --git a/config.properties b/config.properties index 7398fc3..a537361 100644 --- a/config.properties +++ b/config.properties @@ -1,5 +1,5 @@ #Llama Runner Configuration -#Thu Mar 26 18:27:22 CET 2026 +#Sun Mar 29 16:13:49 CEST 2026 windowHeight=1189 windowWidth=711 windowX=1849 diff --git a/src/main/java/cz/kamma/llamarunner/CommandBuilder.java b/src/main/java/cz/kamma/llamarunner/CommandBuilder.java index 17796a0..acbadc3 100644 --- a/src/main/java/cz/kamma/llamarunner/CommandBuilder.java +++ b/src/main/java/cz/kamma/llamarunner/CommandBuilder.java @@ -19,6 +19,7 @@ public class CommandBuilder { cmd.append(" --host ").append(config.getHost()); cmd.append(" --port ").append(config.getPort()); cmd.append(" --parallel ").append(config.getParallel()); + cmd.append(" --batch-size ").append(config.getBatchSize()); cmd.append(" -t ").append(config.getThreads()); cmd.append(" -fa ").append(config.isFlashAttention() ? "on" : "off"); cmd.append(" --temp ").append(config.getTemperature()); diff --git a/src/main/java/cz/kamma/llamarunner/Main.java b/src/main/java/cz/kamma/llamarunner/Main.java index e2f330d..35fc625 100644 --- a/src/main/java/cz/kamma/llamarunner/Main.java +++ b/src/main/java/cz/kamma/llamarunner/Main.java @@ -52,6 +52,7 @@ public class Main extends JFrame { private JTextField hostField; private JTextField portField; private JTextField parallelField; + private JTextField batchSizeField; private JTextField threadsField; private JCheckBox flashAttnCheckBox; private JCheckBox kvUnifiedCheckBox; @@ -332,6 +333,7 @@ public class Main extends JFrame { config.setHost(hostField.getText()); config.setPort(Integer.parseInt(portField.getText())); config.setParallel(Integer.parseInt(parallelField.getText())); + config.setBatchSize(Integer.parseInt(batchSizeField.getText())); config.setThreads(Integer.parseInt(threadsField.getText())); config.setFlashAttention(flashAttnCheckBox.isSelected()); config.setKvUnified(kvUnifiedCheckBox.isSelected()); @@ -365,6 +367,7 @@ public class Main extends JFrame { hostField.setText(config.getHost()); portField.setText(String.valueOf(config.getPort())); parallelField.setText(String.valueOf(config.getParallel())); + batchSizeField.setText(String.valueOf(config.getBatchSize())); threadsField.setText(String.valueOf(config.getThreads())); flashAttnCheckBox.setSelected(config.isFlashAttention()); kvUnifiedCheckBox.setSelected(config.isKvUnified()); @@ -710,6 +713,18 @@ public class Main extends JFrame { gbc.gridx = 0; gbc.gridy = 1; gbc.weightx = 0; + panel.add(new JLabel("Batch Size:"), gbc); + + gbc.gridx = 1; + gbc.weightx = 1.0; + batchSizeField = new JTextField("4096", 10); + batchSizeField.setCaretColor(Color.WHITE); + batchSizeField.addActionListener(e -> updateCommandPreview()); + panel.add(batchSizeField, gbc); + + gbc.gridx = 0; + gbc.gridy = 2; + gbc.weightx = 0; panel.add(new JLabel("Threads:"), gbc); gbc.gridx = 1; @@ -720,17 +735,17 @@ public class Main extends JFrame { panel.add(threadsField, gbc); gbc.gridx = 0; - gbc.gridy = 2; + gbc.gridy = 3; gbc.weightx = 0; panel.add(new JLabel(), gbc); gbc.gridx = 1; - gbc.gridy = 2; + gbc.gridy = 3; gbc.weightx = 1.0; panel.add(new JLabel(), gbc); gbc.gridx = 0; - gbc.gridy = 3; + gbc.gridy = 4; gbc.weightx = 0; flashAttnCheckBox = new JCheckBox("Flash Attention", true); flashAttnCheckBox.addActionListener(e -> updateCommandPreview()); @@ -741,7 +756,7 @@ public class Main extends JFrame { panel.add(new JLabel(), gbc); gbc.gridx = 0; - gbc.gridy = 4; + gbc.gridy = 5; gbc.weightx = 0; kvUnifiedCheckBox = new JCheckBox("KV Unified", true); kvUnifiedCheckBox.addActionListener(e -> updateCommandPreview()); @@ -752,7 +767,7 @@ public class Main extends JFrame { panel.add(new JLabel(), gbc); gbc.gridx = 0; - gbc.gridy = 5; + gbc.gridy = 6; gbc.weightx = 0; panel.add(new JLabel("Cache K:"), gbc); @@ -764,7 +779,7 @@ public class Main extends JFrame { panel.add(cacheTypeKComboBox, gbc); gbc.gridx = 0; - gbc.gridy = 6; + gbc.gridy = 7; gbc.weightx = 0; panel.add(new JLabel("Cache V:"), gbc); @@ -776,7 +791,7 @@ public class Main extends JFrame { panel.add(cacheTypeVComboBox, gbc); gbc.gridx = 0; - gbc.gridy = 7; + gbc.gridy = 8; gbc.weightx = 0; panel.add(new JLabel("GPU Layers:"), gbc); @@ -789,7 +804,7 @@ public class Main extends JFrame { // Fit parameter gbc.gridx = 0; - gbc.gridy = 8; + gbc.gridy = 9; gbc.weightx = 0; panel.add(new JLabel("Fit:"), gbc); @@ -800,7 +815,7 @@ public class Main extends JFrame { panel.add(fitCheckBox, gbc); gbc.gridx = 0; - gbc.gridy = 9; + gbc.gridy = 10; gbc.weightx = 0; panel.add(new JLabel("Reasoning:"), gbc); diff --git a/src/main/java/cz/kamma/llamarunner/ModelConfig.java b/src/main/java/cz/kamma/llamarunner/ModelConfig.java index 11cc280..6454f0a 100644 --- a/src/main/java/cz/kamma/llamarunner/ModelConfig.java +++ b/src/main/java/cz/kamma/llamarunner/ModelConfig.java @@ -7,10 +7,12 @@ import java.io.Serializable; */ public class ModelConfig implements Serializable { private static final long serialVersionUID = 1L; + private static final int DEFAULT_BATCH_SIZE = 4096; private String host; private int port; private int parallel; + private int batchSize; private int threads; private boolean flashAttention; private boolean kvUnified; @@ -32,6 +34,7 @@ public class ModelConfig implements Serializable { this.host = "0.0.0.0"; this.port = 3080; this.parallel = 1; + this.batchSize = DEFAULT_BATCH_SIZE; this.threads = 99; this.flashAttention = true; this.kvUnified = true; @@ -60,6 +63,9 @@ public class ModelConfig implements Serializable { public int getParallel() { return parallel; } public void setParallel(int parallel) { this.parallel = parallel; } + public int getBatchSize() { return batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; } + public void setBatchSize(int batchSize) { this.batchSize = batchSize; } + public int getThreads() { return threads; } public void setThreads(int threads) { this.threads = threads; } @@ -114,6 +120,7 @@ public class ModelConfig implements Serializable { "host='" + host + '\'' + ", port=" + port + ", parallel=" + parallel + + ", batchSize=" + getBatchSize() + ", threads=" + threads + ", flashAttention=" + flashAttention + ", kvUnified=" + kvUnified +