feat(GraphRAG): enhance GraphRAG by graph community summary (#1801)

Co-authored-by: Florian <fanzhidongyzby@163.com>
Co-authored-by: KingSkyLi <15566300566@163.com>
Co-authored-by: aries_ckt <916701291@qq.com>
Co-authored-by: Fangyin Cheng <staneyffer@gmail.com>
Co-authored-by: yvonneyx <zhuyuxin0627@gmail.com>
This commit is contained in:
M1n9X
2024-08-30 21:59:44 +08:00
committed by GitHub
parent 471689ba20
commit 759f7d99cc
59 changed files with 29316 additions and 411 deletions

View File

@@ -83,9 +83,9 @@ const RecallTestModal: React.FC<RecallTestModalProps> = ({ open, setOpen, space
destroyOnClose={true}
>
<Card
title='召回配置'
size='small'
className='my-4'
title="召回配置"
size="small"
className="my-4"
extra={
<Popover
placement='bottomRight'
@@ -160,7 +160,7 @@ const RecallTestModal: React.FC<RecallTestModalProps> = ({ open, setOpen, space
)} */}
</Form>
</Card>
<Card title='召回结果' size='small'>
<Card title="召回结果" size="small">
<Spin spinning={loading}>
{resultList.length > 0 ? (
<div
@@ -172,9 +172,9 @@ const RecallTestModal: React.FC<RecallTestModalProps> = ({ open, setOpen, space
{resultList.map(item => (
<Card
title={
<div className='flex items-center'>
<Tag color='blue'># {item.chunk_id}</Tag>
{item.metadata.prop_field.title}
<div className="flex items-center">
<Tag color="blue"># {item.chunk_id}</Tag>
{item.metadata.source}
</div>
}
extra={

View File

@@ -1,10 +1,10 @@
/** @type {import('next').NextConfig} */
const CopyPlugin = require('copy-webpack-plugin');
const MonacoWebpackPlugin = require('monaco-editor-webpack-plugin');
const path = require('path');
const CopyPlugin = require("copy-webpack-plugin");
const MonacoWebpackPlugin = require("monaco-editor-webpack-plugin");
const path = require("path");
const nextConfig = {
experimental: {
esmExternals: 'loose',
esmExternals: "loose",
},
typescript: {
ignoreBuildErrors: true,
@@ -27,30 +27,35 @@ const nextConfig = {
new CopyPlugin({
patterns: [
{
from: path.join(__dirname, 'node_modules/@oceanbase-odc/monaco-plugin-ob/worker-dist/'),
to: 'static/ob-workers',
from: path.join(
__dirname,
"node_modules/@oceanbase-odc/monaco-plugin-ob/worker-dist/"
),
to: "static/ob-workers",
},
],
}),
})
);
// 添加 monaco-editor-webpack-plugin 插件
config.plugins.push(
new MonacoWebpackPlugin({
// 你可以在这里配置插件的选项,例如:
languages: ['sql'],
filename: 'static/[name].worker.js',
}),
languages: ["sql"],
filename: "static/[name].worker.js",
})
);
}
return config;
},
};
const withTM = require('next-transpile-modules')([
'@berryv/g2-react',
'@antv/g2',
'react-syntax-highlighter',
'@antv/gpt-vis',
const withTM = require("next-transpile-modules")([
"@berryv/g2-react",
"@antv/g2",
"react-syntax-highlighter",
"@antv/g6",
"@antv/graphin",
"@antv/gpt-vis",
]);
module.exports = withTM({

13767
web/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -23,9 +23,12 @@
"dependencies": {
"@ant-design/cssinjs": "^1.18.4",
"@ant-design/icons": "^5.2.5",
"@antv/algorithm": "^0.1.26",
"@antv/ava": "3.5.0-alpha.4",
"@antv/g2": "^5.1.8",
"@antv/gpt-vis": "^0.0.5",
"@antv/g6": "^5.0.17",
"@antv/graphin": "^3.0.2",
"@antv/s2": "^1.51.2",
"@berryv/g2-react": "^0.1.0",
"@emotion/react": "^11.11.4",
@@ -44,6 +47,9 @@
"classnames": "^2.3.2",
"cookies-next": "^4.0.0",
"copy-to-clipboard": "^3.3.3",
"framer-motion": "^10.16.4",
"google-auth-library": "^9.2.0",
"google-one-tap": "^1.0.6",
"cytoscape": "^3.29.2",
"cytoscape-euler": "^1.2.2",
"eslint-plugin-prettier": "^5.2.1",
@@ -53,16 +59,22 @@
"dayjs": "^1.11.12",
"i18next": "^23.4.5",
"iron-session": "^6.3.1",
"iron-session": "^6.3.1",
"lodash": "^4.17.21",
"markdown-it": "^14.1.0",
"markdown-it": "^14.1.0",
"moment": "^2.29.4",
"monaco-editor": ">=0.31.0",
"multer": "^1.4.5-lts.1",
"mysql2": "^3.6.2",
"multer": "^1.4.5-lts.1",
"mysql2": "^3.6.2",
"next": "13.4.7",
"next-auth": "^4.20.1",
"next-connect": "^1.0.0-next.4",
"next-transpile-modules": "^10.0.1",
"next-connect": "^1.0.0-next.4",
"next-transpile-modules": "^10.0.1",
"nprogress": "^0.2.0",
"react": "^18.3.1",
"react-dom": "^18.3.1",
@@ -82,7 +94,6 @@
},
"devDependencies": {
"@types/crypto-js": "^4.1.2",
"@types/cytoscape": "^3.21.0",
"@types/google-one-tap": "^1.2.4",
"@types/lodash": "^4.14.195",
"@types/markdown-it": "^14.1.1",

View File

@@ -0,0 +1,232 @@
import React, { useEffect, useMemo, useRef, useState } from "react";
import { Button, Spin } from "antd";
import { RollbackOutlined } from "@ant-design/icons";
import { apiInterceptors, getGraphVis } from "@/client/api";
import { useRouter } from "next/router";
import { idOf } from "@antv/g6";
import type {
Graph,
GraphData,
GraphOptions,
ID,
IPointerEvent,
PluginOptions,
} from "@antv/g6";
import type { GraphVisResult } from "../../../types/knowledge";
import { Graphin } from "@antv/graphin";
import { getDegree, getSize, isInCommunity } from "../../../utils/graph";
import { groupBy } from "lodash";
type GraphVisData = GraphVisResult | null;
const PALETTE = [
"#5F95FF",
"#61DDAA",
"#F6BD16",
"#7262FD",
"#78D3F8",
"#9661BC",
"#F6903D",
"#008685",
"#F08BB4",
];
function GraphVis() {
const LIMIT = 500;
const router = useRouter();
const [data, setData] = useState<GraphVisData>(null);
const graphRef = useRef<Graph | null>();
const [isReady, setIsReady] = useState(false);
const fetchGraphVis = async () => {
const [_, data] = await apiInterceptors(
getGraphVis(spaceName as string, { limit: LIMIT })
);
setData(data);
};
const transformData = (data: GraphVisData): GraphData => {
if (!data) return { nodes: [], edges: [] };
const nodes = data.nodes.map((node) => ({ id: node.id, data: node }));
const edges = data.edges.map((edge) => ({
source: edge.source,
target: edge.target,
data: edge,
}));
return { nodes, edges };
};
const back = () => {
router.push(`/construct/knowledge`);
};
const {
query: { spaceName },
} = useRouter();
useEffect(() => {
if (spaceName) fetchGraphVis();
}, [spaceName]);
const graphData = useMemo(() => transformData(data), [data]);
useEffect(() => {
if (isReady && graphRef.current) {
const groupedNodes = groupBy(
graphData.nodes,
(node) => node.data!.communityId
);
const plugins: PluginOptions = [];
Object.entries(groupedNodes).forEach(([key, nodes]) => {
if (!key || nodes.length < 2) return;
const color = graphRef.current?.getElementRenderStyle(
idOf(nodes[0])
).fill;
plugins.push({
key,
type: "bubble-sets",
members: nodes.map(idOf),
stroke: color,
fill: color,
fillOpacity: 0.1,
});
});
graphRef.current.setPlugins((prev) => [...prev, ...plugins]);
}
}, [isReady]);
const getNodeSize = (nodeId: ID) => {
return getSize(getNodeDegree(nodeId));
};
const getNodeDegree = (nodeId?: ID) => {
if (!nodeId) return 0;
return getDegree(graphData.edges!, nodeId);
};
const options: GraphOptions = {
data: graphData,
autoFit: "center",
node: {
style: (d) => {
const style = {
size: getNodeSize(idOf(d)),
label: true,
labelLineWidth: 2,
labelText: d.data?.name as string,
labelFontSize: 10,
labelBackground: true,
labelBackgroundFill: "#e5e7eb",
labelPadding: [0, 6],
labelBackgroundRadius: 4,
labelMaxWidth: "400%",
labelWordWrap: true,
};
if (!isInCommunity(graphData, idOf(d))) {
Object.assign(style, { fill: "#b0b0b0" });
}
return style;
},
state: {
active: {
lineWidth: 2,
labelWordWrap: false,
labelFontSize: 12,
labelFontWeight: "bold",
},
inactive: {
label: false,
},
},
palette: {
type: "group",
field: "communityId",
color: PALETTE,
},
},
edge: {
style: {
lineWidth: 1,
stroke: "#e2e2e2",
endArrow: true,
endArrowType: "vee",
label: true,
labelFontSize: 8,
labelBackground: true,
labelText: (e) => e.data!.name as string,
labelBackgroundFill: "#e5e7eb",
labelPadding: [0, 6],
labelBackgroundRadius: 4,
labelMaxWidth: "60%",
labelWordWrap: true,
},
state: {
active: {
stroke: "#b0b0b0",
labelWordWrap: false,
labelFontSize: 10,
labelFontWeight: "bold",
},
inactive: {
label: false,
},
},
},
behaviors: [
"drag-canvas",
"zoom-canvas",
"drag-element",
{
type: "hover-activate",
degree: 1,
state: "active",
enable: (event: IPointerEvent) => ["node"].includes(event.targetType),
},
],
animation: false,
layout: {
type: "force",
preventOverlap: true,
nodeSize: (d) => getNodeSize(d?.id as ID),
linkDistance: (edge) => {
const { source, target } = edge as { source: ID; target: ID };
const nodeSize = Math.min(getNodeSize(source), getNodeSize(target));
const degree = Math.min(getNodeDegree(source), getNodeDegree(target));
return degree === 1
? nodeSize * 2
: Math.min(degree * nodeSize * 1.5, 700);
},
},
transforms: ["process-parallel-edges"],
};
if (!data) return <Spin className="h-full justify-center content-center" />;
return (
<div className="p-4 h-full overflow-y-scroll relative px-2">
<Graphin
ref={(ref) => {
graphRef.current = ref;
}}
style={{ height: "100%", width: "100%" }}
options={options}
onReady={() => {
setIsReady(true);
}}
>
<Button
style={{ background: "#fff" }}
onClick={back}
icon={<RollbackOutlined />}
>
Back
</Button>
</Graphin>
</div>
);
}
export default GraphVis;

100
web/utils/graph.ts Normal file
View File

@@ -0,0 +1,100 @@
import { idOf } from "@antv/g6";
import { pick, groupBy } from "lodash";
import type { EdgeData, GraphData, ID } from "@antv/g6";
/**
* Reassign the layout style to the original graph data
* @param model - original graph data
* @param layoutResult - layout result
*/
export function reassignLayoutStyle(model: GraphData, layoutResult: GraphData) {
layoutResult.nodes?.forEach((layoutNode) => {
const modelNode = model.nodes?.find((node) => node.id === layoutNode.id);
if (modelNode?.style)
Object.assign(
modelNode.style || {},
pick(layoutNode.style, ["x", "y", "z"])
);
});
}
/**
* Calculate node size based on degree
* @param degree - degree of the node
* @param minSize - minimum size of the node
* @param maxSize - maximum size of the node
* @param minDegree - minimum degree
* @param maxDegree - maximum degree
* @returns size of the node
*/
export function getSize(
degree: number,
minSize = 24,
maxSize = 60,
minDegree = 1,
maxDegree = 10
): number {
const _degree = Math.max(minDegree, Math.min(maxDegree, degree));
const size =
minSize +
((_degree - minDegree) / (maxDegree - minDegree)) * (maxSize - minSize);
return size;
}
/**
* Get node degree, means the number of edges connected to the node
* @param edges - all edges data
* @param nodeId - node id
* @returns degree of the node
*/
export function getDegree(edges: EdgeData[], nodeId: ID) {
return getRelatedEdgesData(edges, nodeId).length;
}
/**
* Get related edges data of a node
* @param edges - all edges data
* @param nodeId - node id
* @returns related edges data
*/
export function getRelatedEdgesData(edges: EdgeData[], nodeId: ID) {
return edges.filter(
(edge) => edge.source === nodeId || edge.target === nodeId
);
}
/**
* Concatenate the labels of the related edges to the node as the node's edge key
* @param edges - all edges data
* @param nodeId - node id
* @returns edge key
*/
export function getCommunityId(edges: EdgeData[], nodeId: ID) {
const relatedEdges = getRelatedEdgesData(edges, nodeId);
const key = relatedEdges
.map((edge) => {
const direction = edge.source === nodeId ? "->" : "<-";
const otherEnd = edge.source === nodeId ? edge.target : edge.source;
return `${direction}_${edge.data!.label}_${otherEnd}`;
})
.sort()
.join("+");
return key;
}
/**
* Whether the node is in a community(same communityId) with more than `limit` nodes
* @param data - graph data
* @param nodeId - node id
* @param limit - limit
* @returns boolean
*/
export function isInCommunity(data: GraphData, nodeId: string, limit = 2) {
const groupedNodes = groupBy(data.nodes, (node) => node.data!.communityId);
const filtered = Object.values(groupedNodes).find((nodes) =>
nodes.map(idOf).includes(nodeId)
)!;
return filtered.length > limit;
}

10526
web/yarn.lock Normal file

File diff suppressed because it is too large Load Diff