* fix: Update timezone options loading for CurrentDateComponent * fix: Remove real-time refresh options stripping to stabilize component index * Enhance component options in component_index.json - Added language model options for agent_llm: "Anthropic", "OpenAI" and "OpenAI", "Custom". - Introduced model_name option: "Select a model". - Expanded HTTP method options for method: "GET", "POST", "PATCH", "PUT", "DELETE". - Updated mode options to include: "URL", "cURL". - Added search_mode options: "Web", "News", "RSS". - Defined environment options for Astra DB API Endpoint: "prod", "test", "dev". - Specified search_method options: "Hybrid Search", "Vector Search". - Included chunker options: "HybridChunker", "HierarchicalChunker". - Added tokenizer provider options: "Hugging Face", "OpenAI". - Defined export_format options: "Markdown", "HTML", "Plaintext", "DocTags". - Introduced auth_mode options: "basic", "jwt". - Updated pipeline options: "standard", "vlm". - Specified storage_location options for reading and saving files: "Local", "AWS", "Google Drive". - Expanded operator options for text comparison. - Updated repo_source options: "Local", "Remote". - Updated dependencies version for google to "2.5.0". - Added model_id options for Hugging Face Hub. - Defined base_url options for IBM API. - Updated mode options for message operations: "Retrieve", "Store". - Enhanced mirostat options: "Disabled", "Mirostat", "Mirostat 2.0". - Expanded model_name options for various GPT models. - Updated DataFrame operation options with icons. - Enhanced text operation options with icons. - Specified output_type options: "Message", "Data", "DataFrame". - Updated CurrentDateComponent to dynamically load timezone options. * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
193 lines
6.9 KiB
Python
193 lines
6.9 KiB
Python
"""Build a static component index for fast startup.
|
|
|
|
This script generates a prebuilt index of all built-in components by walking
|
|
through the lfx.components package and processing each module. The index is
|
|
saved as a JSON file that can be loaded instantly at runtime, avoiding the
|
|
need to import all component modules during startup.
|
|
"""
|
|
|
|
import hashlib
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import orjson
|
|
|
|
|
|
def _get_lfx_version():
|
|
"""Get the installed lfx version.
|
|
|
|
Components are located in LFX, so use LFX.
|
|
"""
|
|
from importlib.metadata import version
|
|
|
|
version = version("lfx")
|
|
print(f"Retrieved LFX version: {version}")
|
|
return version
|
|
|
|
|
|
def _normalize_for_determinism(obj):
|
|
"""Recursively normalize data structures for deterministic serialization.
|
|
|
|
Sorts dictionaries by key to ensure consistent ordering. Lists are kept in
|
|
their original order since many lists are semantically ordered (e.g., field_order,
|
|
display_order, etc.).
|
|
|
|
Note: If upstream code produces nondeterministic list ordering (e.g., from
|
|
reflection or set iteration), this function will NOT fix it. Ensure lists
|
|
are deterministically ordered before calling this function, or consider
|
|
sorting specific list fields that are semantically unordered (e.g., tags).
|
|
"""
|
|
if isinstance(obj, dict):
|
|
# Recursively normalize all dict values and return sorted by keys
|
|
return {k: _normalize_for_determinism(v) for k, v in sorted(obj.items())}
|
|
if isinstance(obj, list):
|
|
# Recursively normalize list items but preserve order
|
|
# Lists like field_order, display_order, etc. are semantically ordered
|
|
return [_normalize_for_determinism(item) for item in obj]
|
|
# Primitive types, return as-is
|
|
return obj
|
|
|
|
|
|
def _strip_dynamic_fields(obj):
|
|
"""Recursively remove dynamic fields that change with external dependencies.
|
|
|
|
This prevents unnecessary hash changes and git history bloat when dependencies update.
|
|
Timestamps are stripped to ensure deterministic builds - version is used as the timeline.
|
|
"""
|
|
# List of field names that are dynamically populated from external sources
|
|
# or contain runtime-specific data
|
|
dynamic_field_names = {"timestamp", "deprecated_at"}
|
|
|
|
if isinstance(obj, dict):
|
|
return {k: _strip_dynamic_fields(v) for k, v in obj.items() if k not in dynamic_field_names}
|
|
if isinstance(obj, list):
|
|
return [_strip_dynamic_fields(item) for item in obj]
|
|
return obj
|
|
|
|
|
|
def _import_components() -> tuple[dict, int]:
|
|
"""Import all lfx components using the async import function.
|
|
|
|
Returns:
|
|
Tuple of (modules_dict, components_count)
|
|
|
|
Raises:
|
|
RuntimeError: If component import fails
|
|
"""
|
|
import asyncio
|
|
|
|
from lfx.interface.components import import_langflow_components
|
|
|
|
try:
|
|
# Run the async function
|
|
components_result = asyncio.run(import_langflow_components())
|
|
modules_dict = components_result.get("components", {})
|
|
components_count = sum(len(v) for v in modules_dict.values())
|
|
print(f"Discovered {components_count} components across {len(modules_dict)} categories")
|
|
except Exception as e:
|
|
msg = f"Failed to import components: {e}"
|
|
raise RuntimeError(msg) from e
|
|
else:
|
|
return modules_dict, components_count
|
|
|
|
|
|
def build_component_index() -> dict:
|
|
"""Build the component index by scanning all modules in lfx.components.
|
|
|
|
Returns:
|
|
A dictionary containing version, entries, and sha256 hash
|
|
|
|
Raises:
|
|
RuntimeError: If index cannot be built
|
|
ValueError: If existing index is invalid
|
|
"""
|
|
print("Building component index...")
|
|
|
|
modules_dict, components_count = _import_components()
|
|
current_version = _get_lfx_version()
|
|
|
|
# Convert modules_dict to entries format and sort for determinism
|
|
# Sort by category name (top_level) to ensure consistent ordering
|
|
entries = []
|
|
for category_name in sorted(modules_dict.keys()):
|
|
# Sort components within each category by component name
|
|
components_dict = modules_dict[category_name]
|
|
sorted_components = {}
|
|
|
|
for comp_name in sorted(components_dict.keys()):
|
|
# Make defensive copies to avoid mutating the original component object
|
|
component = dict(components_dict[comp_name])
|
|
component["metadata"] = dict(component.get("metadata", {}))
|
|
|
|
sorted_components[comp_name] = component
|
|
|
|
entries.append([category_name, sorted_components])
|
|
|
|
index = {
|
|
"version": current_version,
|
|
"metadata": {
|
|
"num_modules": len(modules_dict),
|
|
"num_components": components_count,
|
|
},
|
|
"entries": entries,
|
|
}
|
|
|
|
# Strip dynamic fields from component templates BEFORE normalization
|
|
# This prevents changes in external dependencies (like litellm model lists) from changing the hash
|
|
print("\nStripping dynamic fields from component metadata...")
|
|
index = _strip_dynamic_fields(index)
|
|
|
|
# Normalize the entire structure for deterministic output
|
|
index = _normalize_for_determinism(index)
|
|
|
|
# Calculate SHA256 hash for integrity verification
|
|
# IMPORTANT: Hash is computed BEFORE adding the sha256 field itself
|
|
# Determinism relies on BOTH:
|
|
# 1. _normalize_for_determinism() - recursively sorts dict keys
|
|
# 2. orjson.OPT_SORT_KEYS - ensures consistent serialization
|
|
#
|
|
# To verify integrity later, you must:
|
|
# 1. Load the index
|
|
# 2. Remove the 'sha256' field
|
|
# 3. Serialize with OPT_SORT_KEYS
|
|
# 4. Compare SHA256 hashes
|
|
payload = orjson.dumps(index, option=orjson.OPT_SORT_KEYS)
|
|
index["sha256"] = hashlib.sha256(payload).hexdigest() # type: ignore[index]
|
|
|
|
return index
|
|
|
|
|
|
# Standard location for component index
|
|
COMPONENT_INDEX_PATH = Path(__file__).parent.parent / "src" / "lfx" / "src" / "lfx" / "_assets" / "component_index.json"
|
|
|
|
|
|
def main():
|
|
"""Main entry point for building the component index."""
|
|
try:
|
|
# Build the index - will raise on any error
|
|
index = build_component_index()
|
|
except Exception as e: # noqa: BLE001
|
|
print(f"Failed to build component index: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Use the standard component index path (defined at module level)
|
|
output_path = COMPONENT_INDEX_PATH
|
|
|
|
# Create directory if it doesn't exist
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Pretty-print for readable git diffs and resolvable merge conflicts
|
|
print(f"\nWriting formatted index to {output_path}")
|
|
json_bytes = orjson.dumps(index, option=orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2)
|
|
output_path.write_text(json_bytes.decode("utf-8"), encoding="utf-8")
|
|
|
|
print("\nIndex successfully written!")
|
|
print(f" Version: {index['version']}")
|
|
print(f" Modules: {index['metadata']['num_modules']}")
|
|
print(f" Components: {index['metadata']['num_components']}")
|
|
print(f" SHA256: {index['sha256']}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|