fix: Add new sample data, update sample container create (#2230)

* add new sample data, update sample container create

* updated data sets

* another fix

* Refactor sample file-specific settings

---------

Co-authored-by: Mark Brown <mjbrown@microsoft.com>
This commit is contained in:
Laurent Nguyen
2025-10-24 14:20:24 +02:00
committed by GitHub
parent abf4b3bd0f
commit 31ec3c08bc
4 changed files with 299383 additions and 388271 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -182,7 +182,7 @@ export const FabricHomeScreen: React.FC<SplashScreenProps> = (props: SplashScree
},
{
title: "Sample Vector Data",
description: "Load sample vector data in your database",
description: "Load sample vector data with text-embedding-ada-002",
icon: <img src={AzureOpenAiIcon} alt={"Azure Open AI icon"} aria-hidden="true" />,
onClick: () => {
setSelectedSampleDataConfiguration({
@@ -203,7 +203,7 @@ export const FabricHomeScreen: React.FC<SplashScreenProps> = (props: SplashScree
title: "Sample Gallery",
description: "Get real-world end-to-end samples",
icon: <img src={GithubIcon} alt={"GitHub icon"} aria-hidden="true" />,
onClick: () => window.open("https://azurecosmosdb.github.io/gallery/?tags=example&tags=analytics", "_blank"),
onClick: () => window.open("https://aka.ms/CosmosFabricSamplesGallery", "_blank"),
},
];

View File

@@ -36,6 +36,56 @@ export enum SampleDataFile {
FABRIC_SAMPLE_VECTOR_DATA = "FabricSampleVectorData",
}
const containerSettings: {
[key in SampleDataFile]: {
partitionKeyString: string;
vectorEmbeddingPolicy?: DataModels.VectorEmbeddingPolicy;
indexingPolicy?: DataModels.IndexingPolicy;
};
} = {
[SampleDataFile.COPILOT]: {
partitionKeyString: "category",
},
[SampleDataFile.FABRIC_SAMPLE_DATA]: {
partitionKeyString: "categoryName",
},
[SampleDataFile.FABRIC_SAMPLE_VECTOR_DATA]: {
partitionKeyString: "categoryName",
vectorEmbeddingPolicy: {
vectorEmbeddings: [
{
path: "/vectors",
dataType: "float32",
distanceFunction: "cosine",
dimensions: 1536,
},
],
},
indexingPolicy: {
automatic: true,
indexingMode: "consistent",
includedPaths: [
{
path: "/*",
},
],
excludedPaths: [
{
path: '/"_etag"/?',
},
],
fullTextIndexes: [],
vectorIndexes: [
{
path: "/vectors",
type: "quantizedFlat",
quantizationByteSize: 64,
},
],
},
},
};
export const createContainer = async (
databaseName: string,
containerName: string,
@@ -49,48 +99,12 @@ export const createContainer = async (
databaseId: databaseName,
databaseLevelThroughput: false,
partitionKey: {
paths: [`/${SAMPLE_DATA_PARTITION_KEY}`],
paths: [`/${containerSettings[sampleDataFile].partitionKeyString}`],
kind: "Hash",
version: BackendDefaults.partitionKeyVersion,
},
vectorEmbeddingPolicy:
sampleDataFile === SampleDataFile.FABRIC_SAMPLE_VECTOR_DATA
? {
vectorEmbeddings: [
{
path: "/descriptionVector",
dataType: "float32",
distanceFunction: "cosine",
dimensions: 512,
},
],
}
: undefined,
indexingPolicy:
sampleDataFile === SampleDataFile.FABRIC_SAMPLE_VECTOR_DATA
? {
automatic: true,
indexingMode: "consistent",
includedPaths: [
{
path: "/*",
},
],
excludedPaths: [
{
path: '/"_etag"/?',
},
],
fullTextIndexes: [],
vectorIndexes: [
{
path: "/descriptionVector",
type: "quantizedFlat",
quantizationByteSize: 64,
},
],
}
: undefined,
vectorEmbeddingPolicy: containerSettings[sampleDataFile].vectorEmbeddingPolicy,
indexingPolicy: containerSettings[sampleDataFile].indexingPolicy,
};
await createCollection(createRequest);
await explorer.refreshAllDatabases();
@@ -103,8 +117,6 @@ export const createContainer = async (
return newCollection;
};
const SAMPLE_DATA_PARTITION_KEY = "category"; // This pkey is specifically set for queryCopilotSampleData.json below
export const importData = async (sampleDataFile: SampleDataFile, collection: ViewModels.Collection): Promise<void> => {
let documents: JSONObject[] = undefined;
switch (sampleDataFile) {