fix: Add new sample data, update sample container create (#2230)

* add new sample data, update sample container create

* updated data sets

* another fix

* Refactor sample file-specific settings

---------

Co-authored-by: Mark Brown <mjbrown@microsoft.com>
This commit is contained in:
Laurent Nguyen
2025-10-24 14:20:24 +02:00
committed by GitHub
parent abf4b3bd0f
commit 31ec3c08bc
4 changed files with 299383 additions and 388271 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -182,7 +182,7 @@ export const FabricHomeScreen: React.FC<SplashScreenProps> = (props: SplashScree
}, },
{ {
title: "Sample Vector Data", title: "Sample Vector Data",
description: "Load sample vector data in your database", description: "Load sample vector data with text-embedding-ada-002",
icon: <img src={AzureOpenAiIcon} alt={"Azure Open AI icon"} aria-hidden="true" />, icon: <img src={AzureOpenAiIcon} alt={"Azure Open AI icon"} aria-hidden="true" />,
onClick: () => { onClick: () => {
setSelectedSampleDataConfiguration({ setSelectedSampleDataConfiguration({
@@ -203,7 +203,7 @@ export const FabricHomeScreen: React.FC<SplashScreenProps> = (props: SplashScree
title: "Sample Gallery", title: "Sample Gallery",
description: "Get real-world end-to-end samples", description: "Get real-world end-to-end samples",
icon: <img src={GithubIcon} alt={"GitHub icon"} aria-hidden="true" />, icon: <img src={GithubIcon} alt={"GitHub icon"} aria-hidden="true" />,
onClick: () => window.open("https://azurecosmosdb.github.io/gallery/?tags=example&tags=analytics", "_blank"), onClick: () => window.open("https://aka.ms/CosmosFabricSamplesGallery", "_blank"),
}, },
]; ];

View File

@@ -36,39 +36,32 @@ export enum SampleDataFile {
FABRIC_SAMPLE_VECTOR_DATA = "FabricSampleVectorData", FABRIC_SAMPLE_VECTOR_DATA = "FabricSampleVectorData",
} }
export const createContainer = async ( const containerSettings: {
databaseName: string, [key in SampleDataFile]: {
containerName: string, partitionKeyString: string;
explorer: Explorer, vectorEmbeddingPolicy?: DataModels.VectorEmbeddingPolicy;
sampleDataFile: SampleDataFile, indexingPolicy?: DataModels.IndexingPolicy;
): Promise<ViewModels.Collection> => { };
const createRequest: DataModels.CreateCollectionParams = { } = {
autoPilotMaxThroughput: isFabricNative() ? DEFAULT_FABRIC_NATIVE_CONTAINER_THROUGHPUT : undefined, [SampleDataFile.COPILOT]: {
createNewDatabase: false, partitionKeyString: "category",
collectionId: containerName,
databaseId: databaseName,
databaseLevelThroughput: false,
partitionKey: {
paths: [`/${SAMPLE_DATA_PARTITION_KEY}`],
kind: "Hash",
version: BackendDefaults.partitionKeyVersion,
}, },
vectorEmbeddingPolicy: [SampleDataFile.FABRIC_SAMPLE_DATA]: {
sampleDataFile === SampleDataFile.FABRIC_SAMPLE_VECTOR_DATA partitionKeyString: "categoryName",
? { },
[SampleDataFile.FABRIC_SAMPLE_VECTOR_DATA]: {
partitionKeyString: "categoryName",
vectorEmbeddingPolicy: {
vectorEmbeddings: [ vectorEmbeddings: [
{ {
path: "/descriptionVector", path: "/vectors",
dataType: "float32", dataType: "float32",
distanceFunction: "cosine", distanceFunction: "cosine",
dimensions: 512, dimensions: 1536,
}, },
], ],
} },
: undefined, indexingPolicy: {
indexingPolicy:
sampleDataFile === SampleDataFile.FABRIC_SAMPLE_VECTOR_DATA
? {
automatic: true, automatic: true,
indexingMode: "consistent", indexingMode: "consistent",
includedPaths: [ includedPaths: [
@@ -84,13 +77,34 @@ export const createContainer = async (
fullTextIndexes: [], fullTextIndexes: [],
vectorIndexes: [ vectorIndexes: [
{ {
path: "/descriptionVector", path: "/vectors",
type: "quantizedFlat", type: "quantizedFlat",
quantizationByteSize: 64, quantizationByteSize: 64,
}, },
], ],
} },
: undefined, },
};
export const createContainer = async (
databaseName: string,
containerName: string,
explorer: Explorer,
sampleDataFile: SampleDataFile,
): Promise<ViewModels.Collection> => {
const createRequest: DataModels.CreateCollectionParams = {
autoPilotMaxThroughput: isFabricNative() ? DEFAULT_FABRIC_NATIVE_CONTAINER_THROUGHPUT : undefined,
createNewDatabase: false,
collectionId: containerName,
databaseId: databaseName,
databaseLevelThroughput: false,
partitionKey: {
paths: [`/${containerSettings[sampleDataFile].partitionKeyString}`],
kind: "Hash",
version: BackendDefaults.partitionKeyVersion,
},
vectorEmbeddingPolicy: containerSettings[sampleDataFile].vectorEmbeddingPolicy,
indexingPolicy: containerSettings[sampleDataFile].indexingPolicy,
}; };
await createCollection(createRequest); await createCollection(createRequest);
await explorer.refreshAllDatabases(); await explorer.refreshAllDatabases();
@@ -103,8 +117,6 @@ export const createContainer = async (
return newCollection; return newCollection;
}; };
const SAMPLE_DATA_PARTITION_KEY = "category"; // This pkey is specifically set for queryCopilotSampleData.json below
export const importData = async (sampleDataFile: SampleDataFile, collection: ViewModels.Collection): Promise<void> => { export const importData = async (sampleDataFile: SampleDataFile, collection: ViewModels.Collection): Promise<void> => {
let documents: JSONObject[] = undefined; let documents: JSONObject[] = undefined;
switch (sampleDataFile) { switch (sampleDataFile) {