2025-02-06 17:32:29 +01:00
import React , { useMemo , useState } from 'react' ;
import Markdown , { ExtraProps } from 'react-markdown' ;
import remarkGfm from 'remark-gfm' ;
import rehypeHightlight from 'rehype-highlight' ;
import rehypeKatex from 'rehype-katex' ;
import remarkMath from 'remark-math' ;
import remarkBreaks from 'remark-breaks' ;
import 'katex/dist/katex.min.css' ;
import { classNames , copyStr } from '../utils/misc' ;
import { ElementContent , Root } from 'hast' ;
import { visit } from 'unist-util-visit' ;
2025-02-08 21:54:50 +01:00
import { useAppContext } from '../utils/app.context' ;
import { CanvasType } from '../utils/types' ;
2025-05-08 15:37:29 +02:00
import { BtnWithTooltips } from '../utils/common' ;
import { DocumentDuplicateIcon , PlayIcon } from '@heroicons/react/24/outline' ;
2025-02-06 17:32:29 +01:00
2025-02-08 21:54:50 +01:00
export default function MarkdownDisplay ( {
content ,
isGenerating ,
} : {
content : string ;
isGenerating? : boolean ;
} ) {
2025-02-06 17:32:29 +01:00
const preprocessedContent = useMemo (
( ) = > preprocessLaTeX ( content ) ,
[ content ]
) ;
return (
< Markdown
remarkPlugins = { [ remarkGfm , remarkMath , remarkBreaks ] }
rehypePlugins = { [ rehypeHightlight , rehypeKatex , rehypeCustomCopyButton ] }
components = { {
button : ( props ) = > (
2025-02-08 21:54:50 +01:00
< CodeBlockButtons
{ . . . props }
isGenerating = { isGenerating }
origContent = { preprocessedContent }
/ >
2025-02-06 17:32:29 +01:00
) ,
2025-02-08 10:42:34 +01:00
// note: do not use "pre", "p" or other basic html elements here, it will cause the node to re-render when the message is being generated (this should be a bug with react-markdown, not sure how to fix it)
2025-02-06 17:32:29 +01:00
} }
>
{ preprocessedContent }
< / Markdown >
) ;
}
2025-02-08 21:54:50 +01:00
const CodeBlockButtons : React.ElementType <
2025-02-06 17:32:29 +01:00
React . ClassAttributes < HTMLButtonElement > &
React . HTMLAttributes < HTMLButtonElement > &
2025-02-08 21:54:50 +01:00
ExtraProps & { origContent : string ; isGenerating? : boolean }
> = ( { node , origContent , isGenerating } ) = > {
const { config } = useAppContext ( ) ;
2025-02-06 17:32:29 +01:00
const startOffset = node ? . position ? . start . offset ? ? 0 ;
const endOffset = node ? . position ? . end . offset ? ? 0 ;
const copiedContent = useMemo (
( ) = >
origContent
. substring ( startOffset , endOffset )
. replace ( /^```[^\n]+\n/g , '' )
. replace ( /```$/g , '' ) ,
[ origContent , startOffset , endOffset ]
) ;
2025-02-08 21:54:50 +01:00
const codeLanguage = useMemo (
( ) = >
origContent
. substring ( startOffset , startOffset + 10 )
. match ( /^```([^\n]+)\n/ ) ? . [ 1 ] ? ? '' ,
[ origContent , startOffset ]
) ;
const canRunCode =
! isGenerating &&
config . pyIntepreterEnabled &&
codeLanguage . startsWith ( 'py' ) ;
2025-02-06 17:32:29 +01:00
return (
< div
className = { classNames ( {
2025-02-08 21:54:50 +01:00
'text-right sticky top-[7em] mb-2 mr-2 h-0' : true ,
2025-02-06 17:32:29 +01:00
'display-none' : ! node ? . position ,
} ) }
>
2025-05-08 15:37:29 +02:00
< CopyButton
className = "badge btn-mini btn-soft shadow-sm"
content = { copiedContent }
/ >
2025-02-08 21:54:50 +01:00
{ canRunCode && (
< RunPyCodeButton
2025-05-08 15:37:29 +02:00
className = "badge btn-mini shadow-sm ml-2"
2025-02-08 21:54:50 +01:00
content = { copiedContent }
/ >
) }
2025-02-06 17:32:29 +01:00
< / div >
) ;
} ;
export const CopyButton = ( {
content ,
className ,
} : {
content : string ;
className? : string ;
} ) = > {
const [ copied , setCopied ] = useState ( false ) ;
return (
2025-05-08 15:37:29 +02:00
< BtnWithTooltips
2025-02-06 17:32:29 +01:00
className = { className }
onClick = { ( ) = > {
copyStr ( content ) ;
setCopied ( true ) ;
} }
onMouseLeave = { ( ) = > setCopied ( false ) }
2025-05-08 15:37:29 +02:00
tooltipsContent = { copied ? 'Copied!' : 'Copy' }
2025-02-06 17:32:29 +01:00
>
2025-05-08 15:37:29 +02:00
< DocumentDuplicateIcon className = "h-4 w-4" / >
< / BtnWithTooltips >
2025-02-06 17:32:29 +01:00
) ;
} ;
2025-02-08 21:54:50 +01:00
export const RunPyCodeButton = ( {
content ,
className ,
} : {
content : string ;
className? : string ;
} ) = > {
const { setCanvasData } = useAppContext ( ) ;
return (
< >
2025-05-08 15:37:29 +02:00
< BtnWithTooltips
2025-02-08 21:54:50 +01:00
className = { className }
onClick = { ( ) = >
setCanvasData ( {
type : CanvasType . PY_INTERPRETER ,
content ,
} )
}
2025-05-08 15:37:29 +02:00
tooltipsContent = "Run code"
2025-02-08 21:54:50 +01:00
>
2025-05-08 15:37:29 +02:00
< PlayIcon className = "h-4 w-4" / >
< / BtnWithTooltips >
2025-02-08 21:54:50 +01:00
< / >
) ;
} ;
2025-02-06 17:32:29 +01:00
/ * *
* This injects the "button" element before each "pre" element .
* The actual button will be replaced with a react component in the MarkdownDisplay .
* We don ' t replace "pre" node directly because it will cause the node to re - render , which causes this bug : https : //github.com/ggerganov/llama.cpp/issues/9608
* /
function rehypeCustomCopyButton() {
return function ( tree : Root ) {
visit ( tree , 'element' , function ( node ) {
if ( node . tagName === 'pre' && ! node . properties . visited ) {
const preNode = { . . . node } ;
// replace current node
preNode . properties . visited = 'true' ;
node . tagName = 'div' ;
2025-02-08 21:54:50 +01:00
node . properties = { } ;
2025-02-06 17:32:29 +01:00
// add node for button
const btnNode : ElementContent = {
type : 'element' ,
tagName : 'button' ,
properties : { } ,
children : [ ] ,
position : node.position ,
} ;
node . children = [ btnNode , preNode ] ;
}
} ) ;
} ;
}
/ * *
* The part below is copied and adapted from :
* https : //github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
* ( MIT License )
* /
// Regex to check if the processed content contains any potential LaTeX patterns
const containsLatexRegex =
/\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/ ;
// Regex for inline and block LaTeX expressions
const inlineLatex = new RegExp ( /\\\((.+?)\\\)/ , 'g' ) ;
const blockLatex = new RegExp ( /\\\[(.*?[^\\])\\\]/ , 'gs' ) ;
// Function to restore code blocks
const restoreCodeBlocks = ( content : string , codeBlocks : string [ ] ) = > {
return content . replace (
/<<CODE_BLOCK_(\d+)>>/g ,
( _ , index ) = > codeBlocks [ index ]
) ;
} ;
// Regex to identify code blocks and inline code
const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g ;
export const processLaTeX = ( _content : string ) = > {
let content = _content ;
// Temporarily replace code blocks and inline code with placeholders
const codeBlocks : string [ ] = [ ] ;
let index = 0 ;
content = content . replace ( codeBlockRegex , ( match ) = > {
codeBlocks [ index ] = match ;
return ` <<CODE_BLOCK_ ${ index ++ } >> ` ;
} ) ;
// Escape dollar signs followed by a digit or space and digit
let processedContent = content . replace ( /(\$)(?=\s?\d)/g , '\\$' ) ;
// If no LaTeX patterns are found, restore code blocks and return the processed content
if ( ! containsLatexRegex . test ( processedContent ) ) {
return restoreCodeBlocks ( processedContent , codeBlocks ) ;
}
// Convert LaTeX expressions to a markdown compatible format
processedContent = processedContent
. replace ( inlineLatex , ( _ : string , equation : string ) = > ` $ ${ equation } $ ` ) // Convert inline LaTeX
. replace ( blockLatex , ( _ : string , equation : string ) = > ` $ $ ${ equation } $ $ ` ) ; // Convert block LaTeX
// Restore code blocks
return restoreCodeBlocks ( processedContent , codeBlocks ) ;
} ;
/ * *
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters .
*
* @param content The input string containing LaTeX expressions .
* @returns The processed string with replaced delimiters and escaped characters .
* /
export function preprocessLaTeX ( content : string ) : string {
// Step 1: Protect code blocks
const codeBlocks : string [ ] = [ ] ;
content = content . replace ( /(```[\s\S]*?```|`[^`\n]+`)/g , ( _ , code ) = > {
codeBlocks . push ( code ) ;
return ` <<CODE_BLOCK_ ${ codeBlocks . length - 1 } >> ` ;
} ) ;
// Step 2: Protect existing LaTeX expressions
const latexExpressions : string [ ] = [ ] ;
// Protect block math ($$...$$), \[...\], and \(...\) as before.
content = content . replace (
/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g ,
( match ) = > {
latexExpressions . push ( match ) ;
return ` <<LATEX_ ${ latexExpressions . length - 1 } >> ` ;
}
) ;
// Protect inline math ($...$) only if it does NOT match a currency pattern.
// We assume a currency pattern is one where the inner content is purely numeric (with optional decimals).
content = content . replace ( /\$([^$]+)\$/g , ( match , inner ) = > {
if ( /^\s*\d+(?:\.\d+)?\s*$/ . test ( inner ) ) {
// This looks like a currency value (e.g. "$123" or "$12.34"),
// so don't protect it.
return match ;
} else {
// Otherwise, treat it as a LaTeX expression.
latexExpressions . push ( match ) ;
return ` <<LATEX_ ${ latexExpressions . length - 1 } >> ` ;
}
} ) ;
// Step 3: Escape dollar signs that are likely currency indicators.
// (Now that inline math is protected, this will only escape dollars not already protected)
content = content . replace ( /\$(?=\d)/g , '\\$' ) ;
// Step 4: Restore LaTeX expressions
content = content . replace (
/<<LATEX_(\d+)>>/g ,
( _ , index ) = > latexExpressions [ parseInt ( index ) ]
) ;
// Step 5: Restore code blocks
content = content . replace (
/<<CODE_BLOCK_(\d+)>>/g ,
( _ , index ) = > codeBlocks [ parseInt ( index ) ]
) ;
// Step 6: Apply additional escaping functions
content = escapeBrackets ( content ) ;
content = escapeMhchem ( content ) ;
return content ;
}
export function escapeBrackets ( text : string ) : string {
const pattern =
/(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g ;
return text . replace (
pattern ,
(
match : string ,
codeBlock : string | undefined ,
squareBracket : string | undefined ,
roundBracket : string | undefined
) : string = > {
if ( codeBlock != null ) {
return codeBlock ;
} else if ( squareBracket != null ) {
return ` $ $ ${ squareBracket } $ $ ` ;
} else if ( roundBracket != null ) {
return ` $ ${ roundBracket } $ ` ;
}
return match ;
}
) ;
}
export function escapeMhchem ( text : string ) {
return text . replaceAll ( '$\\ce{' , '$\\\\ce{' ) . replaceAll ( '$\\pu{' , '$\\\\pu{' ) ;
}