mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-23 19:25:51 +00:00
webui : handle PDF input (as text or image) + convert pasted long content to file (#13562)
* webui : handle PDF input (as text or image) * handle the case where pdf image + server without mtmd * fix bug missing pages
This commit is contained in:
@ -16,6 +16,8 @@ export const CONFIG_DEFAULT = {
|
||||
showTokensPerSecond: false,
|
||||
showThoughtInProgress: false,
|
||||
excludeThoughtOnReq: true,
|
||||
pasteLongTextToFileLen: 2500,
|
||||
pdfAsImage: false,
|
||||
// make sure these default values are in sync with `common.h`
|
||||
samplers: 'edkypmxt',
|
||||
temperature: 0.8,
|
||||
@ -43,6 +45,8 @@ export const CONFIG_DEFAULT = {
|
||||
export const CONFIG_INFO: Record<string, string> = {
|
||||
apiKey: 'Set the API Key if you are using --api-key option for the server.',
|
||||
systemMessage: 'The starting message that defines how model should behave.',
|
||||
pasteLongTextToFileLen:
|
||||
'On pasting long text, it will be converted to a file. You can control the file length by setting the value of this parameter. Value 0 means disable.',
|
||||
samplers:
|
||||
'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
|
||||
temperature:
|
||||
|
@ -306,6 +306,7 @@ function ChatInput({
|
||||
onStop: () => void;
|
||||
isGenerating: boolean;
|
||||
}) {
|
||||
const { config } = useAppContext();
|
||||
const [isDrag, setIsDrag] = useState(false);
|
||||
|
||||
return (
|
||||
@ -328,7 +329,28 @@ function ChatInput({
|
||||
{({ getRootProps, getInputProps }) => (
|
||||
<div
|
||||
className="flex flex-col rounded-xl border-1 border-base-content/30 p-3 w-full"
|
||||
// when a file is pasted to the input, we handle it here
|
||||
// if a text is pasted, and if it is long text, we will convert it to a file
|
||||
onPasteCapture={(e: ClipboardEvent<HTMLInputElement>) => {
|
||||
const text = e.clipboardData.getData('text/plain');
|
||||
if (
|
||||
text.length > 0 &&
|
||||
config.pasteLongTextToFileLen > 0 &&
|
||||
text.length > config.pasteLongTextToFileLen
|
||||
) {
|
||||
// if the text is too long, we will convert it to a file
|
||||
extraContext.addItems([
|
||||
{
|
||||
type: 'context',
|
||||
name: 'Pasted Content',
|
||||
content: text,
|
||||
},
|
||||
]);
|
||||
e.preventDefault();
|
||||
return;
|
||||
}
|
||||
|
||||
// if a file is pasted, we will handle it here
|
||||
const files = Array.from(e.clipboardData.items)
|
||||
.filter((item) => item.kind === 'file')
|
||||
.map((item) => item.getAsFile())
|
||||
|
@ -100,6 +100,16 @@ const SETTING_SECTIONS: SettingSection[] = [
|
||||
key,
|
||||
}) as SettingFieldInput
|
||||
),
|
||||
{
|
||||
type: SettingInputType.SHORT_INPUT,
|
||||
label: 'Paste length to file',
|
||||
key: 'pasteLongTextToFileLen',
|
||||
},
|
||||
{
|
||||
type: SettingInputType.CHECKBOX,
|
||||
label: 'Parse PDF as image instead of text',
|
||||
key: 'pdfAsImage',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
@ -452,10 +462,10 @@ function SettingsModalLongInput({
|
||||
label?: string;
|
||||
}) {
|
||||
return (
|
||||
<label className="form-control mb-2">
|
||||
<div className="label inline">{label || configKey}</div>
|
||||
<label className="form-control">
|
||||
<div className="label inline text-sm">{label || configKey}</div>
|
||||
<textarea
|
||||
className="textarea textarea-bordered h-24"
|
||||
className="textarea textarea-bordered h-24 mb-2"
|
||||
placeholder={`Default: ${CONFIG_DEFAULT[configKey] || 'none'}`}
|
||||
value={value}
|
||||
onChange={(e) => onChange(e.target.value)}
|
||||
@ -482,9 +492,7 @@ function SettingsModalShortInput({
|
||||
<>
|
||||
{/* on mobile, we simply show the help message here */}
|
||||
{helpMsg && (
|
||||
<div className="block md:hidden mb-1">
|
||||
<b>{label || configKey}</b>
|
||||
<br />
|
||||
<div className="block mb-1 opacity-75">
|
||||
<p className="text-xs">{helpMsg}</p>
|
||||
</div>
|
||||
)}
|
||||
@ -493,11 +501,6 @@ function SettingsModalShortInput({
|
||||
<div tabIndex={0} role="button" className="font-bold hidden md:block">
|
||||
{label || configKey}
|
||||
</div>
|
||||
{helpMsg && (
|
||||
<div className="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
|
||||
{helpMsg}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<input
|
||||
type="text"
|
||||
|
@ -2,6 +2,17 @@ import { useState } from 'react';
|
||||
import { MessageExtra } from '../utils/types';
|
||||
import toast from 'react-hot-toast';
|
||||
import { useAppContext } from '../utils/app.context';
|
||||
import * as pdfjs from 'pdfjs-dist';
|
||||
import pdfjsWorkerSrc from 'pdfjs-dist/build/pdf.worker.min.mjs?url';
|
||||
import { TextContent, TextItem } from 'pdfjs-dist/types/src/display/api';
|
||||
|
||||
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorkerSrc;
|
||||
|
||||
// This file handles uploading extra context items (a.k.a files)
|
||||
// It allows processing these kinds of files:
|
||||
// - image files (converted to base64)
|
||||
// - text files (including code files)
|
||||
// - pdf (converted to text)
|
||||
|
||||
// Interface describing the API returned by the hook
|
||||
export interface ChatExtraContextApi {
|
||||
@ -13,7 +24,7 @@ export interface ChatExtraContextApi {
|
||||
}
|
||||
|
||||
export function useChatExtraContext(): ChatExtraContextApi {
|
||||
const { serverProps } = useAppContext();
|
||||
const { serverProps, config } = useAppContext();
|
||||
const [items, setItems] = useState<MessageExtra[]>([]);
|
||||
|
||||
const addItems = (newItems: MessageExtra[]) => {
|
||||
@ -28,6 +39,8 @@ export function useChatExtraContext(): ChatExtraContextApi {
|
||||
setItems([]);
|
||||
};
|
||||
|
||||
const isSupportVision = serverProps?.modalities?.vision;
|
||||
|
||||
const onFileAdded = (files: File[]) => {
|
||||
for (const file of files) {
|
||||
const mimeType = file.type;
|
||||
@ -38,7 +51,7 @@ export function useChatExtraContext(): ChatExtraContextApi {
|
||||
}
|
||||
|
||||
if (mimeType.startsWith('image/')) {
|
||||
if (!serverProps?.modalities?.vision) {
|
||||
if (!isSupportVision) {
|
||||
toast.error('Multimodal is not supported by this server or model.');
|
||||
break;
|
||||
}
|
||||
@ -69,7 +82,43 @@ export function useChatExtraContext(): ChatExtraContextApi {
|
||||
toast.error('Video and audio files are not supported yet.');
|
||||
break;
|
||||
} else if (mimeType.startsWith('application/pdf')) {
|
||||
toast.error('PDF files are not supported yet.');
|
||||
if (config.pdfAsImage && !isSupportVision) {
|
||||
toast(
|
||||
'Multimodal is not supported, PDF will be converted to text instead of image.'
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
const promise =
|
||||
config.pdfAsImage && isSupportVision
|
||||
? convertPDFToImage(file).then((base64Urls) => {
|
||||
addItems(
|
||||
base64Urls.map((base64Url) => ({
|
||||
type: 'imageFile',
|
||||
name: file.name,
|
||||
base64Url,
|
||||
}))
|
||||
);
|
||||
})
|
||||
: convertPDFToText(file).then((content) => {
|
||||
if (isSupportVision) {
|
||||
toast.success(
|
||||
'PDF file converted to text. You can also convert it to image, see in Settings.'
|
||||
);
|
||||
}
|
||||
addItems([
|
||||
{
|
||||
type: 'textFile',
|
||||
name: file.name,
|
||||
content,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
promise.catch((error) => {
|
||||
console.error(error);
|
||||
toast.error('Failed to parse PDF file.');
|
||||
});
|
||||
break;
|
||||
} else {
|
||||
// Because there can be many text file types (like code file), we will not check the mime type
|
||||
@ -105,11 +154,69 @@ export function useChatExtraContext(): ChatExtraContextApi {
|
||||
};
|
||||
}
|
||||
|
||||
async function getFileAsBuffer(file: File): Promise<ArrayBuffer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
if (event.target?.result) {
|
||||
resolve(event.target.result as ArrayBuffer);
|
||||
} else {
|
||||
reject(new Error('Failed to read file.'));
|
||||
}
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
});
|
||||
}
|
||||
|
||||
async function convertPDFToText(file: File): Promise<string> {
|
||||
const buffer = await getFileAsBuffer(file);
|
||||
const pdf = await pdfjs.getDocument(buffer).promise;
|
||||
const numPages = pdf.numPages;
|
||||
const textContentPromises: Promise<TextContent>[] = [];
|
||||
for (let i = 1; i <= numPages; i++) {
|
||||
textContentPromises.push(
|
||||
pdf.getPage(i).then((page) => page.getTextContent())
|
||||
);
|
||||
}
|
||||
const textContents = await Promise.all(textContentPromises);
|
||||
const textItems = textContents.flatMap((textContent: TextContent) =>
|
||||
textContent.items.map((item) => (item as TextItem).str ?? '')
|
||||
);
|
||||
return textItems.join('\n');
|
||||
}
|
||||
|
||||
// returns list of base64 images
|
||||
async function convertPDFToImage(file: File): Promise<string[]> {
|
||||
const buffer = await getFileAsBuffer(file);
|
||||
const doc = await pdfjs.getDocument(buffer).promise;
|
||||
const pages: Promise<string>[] = [];
|
||||
|
||||
for (let i = 1; i <= doc.numPages; i++) {
|
||||
const page = await doc.getPage(i);
|
||||
const viewport = page.getViewport({ scale: 1.5 });
|
||||
const canvas = document.createElement('canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
if (!ctx) {
|
||||
throw new Error('Failed to get 2D context from canvas');
|
||||
}
|
||||
const task = page.render({ canvasContext: ctx, viewport: viewport });
|
||||
pages.push(
|
||||
task.promise.then(() => {
|
||||
return canvas.toDataURL();
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
return await Promise.all(pages);
|
||||
}
|
||||
|
||||
// WARN: vibe code below
|
||||
// This code is a heuristic to determine if a string is likely not binary.
|
||||
// It is necessary because input file can have various mime types which we don't have time to investigate.
|
||||
// For example, a python file can be text/plain, application/x-python, etc.
|
||||
export function isLikelyNotBinary(str: string): boolean {
|
||||
function isLikelyNotBinary(str: string): boolean {
|
||||
const options = {
|
||||
prefixLength: 1024 * 10, // Check the first 10KB of the string
|
||||
suspiciousCharThresholdRatio: 0.15, // Allow up to 15% suspicious chars
|
||||
|
Reference in New Issue
Block a user