Files
langflow/scripts/build_component_index.py
Gabriel Luiz Freitas Almeida 60e3ab4970 fix: Remove blanket real_time_refresh options stripping and add dynamic loading to CurrentDate (#11400)
* fix: Update timezone options loading for CurrentDateComponent

* fix: Remove real-time refresh options stripping to stabilize component index

* Enhance component options in component_index.json

- Added language model options for agent_llm: "Anthropic", "OpenAI" and "OpenAI", "Custom".
- Introduced model_name option: "Select a model".
- Expanded HTTP method options for method: "GET", "POST", "PATCH", "PUT", "DELETE".
- Updated mode options to include: "URL", "cURL".
- Added search_mode options: "Web", "News", "RSS".
- Defined environment options for Astra DB API Endpoint: "prod", "test", "dev".
- Specified search_method options: "Hybrid Search", "Vector Search".
- Included chunker options: "HybridChunker", "HierarchicalChunker".
- Added tokenizer provider options: "Hugging Face", "OpenAI".
- Defined export_format options: "Markdown", "HTML", "Plaintext", "DocTags".
- Introduced auth_mode options: "basic", "jwt".
- Updated pipeline options: "standard", "vlm".
- Specified storage_location options for reading and saving files: "Local", "AWS", "Google Drive".
- Expanded operator options for text comparison.
- Updated repo_source options: "Local", "Remote".
- Updated dependencies version for google to "2.5.0".
- Added model_id options for Hugging Face Hub.
- Defined base_url options for IBM API.
- Updated mode options for message operations: "Retrieve", "Store".
- Enhanced mirostat options: "Disabled", "Mirostat", "Mirostat 2.0".
- Expanded model_name options for various GPT models.
- Updated DataFrame operation options with icons.
- Enhanced text operation options with icons.
- Specified output_type options: "Message", "Data", "DataFrame".
- Updated CurrentDateComponent to dynamically load timezone options.

* [autofix.ci] apply automated fixes

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-01-21 20:47:47 +00:00

193 lines
6.9 KiB
Python

"""Build a static component index for fast startup.
This script generates a prebuilt index of all built-in components by walking
through the lfx.components package and processing each module. The index is
saved as a JSON file that can be loaded instantly at runtime, avoiding the
need to import all component modules during startup.
"""
import hashlib
import sys
from pathlib import Path
import orjson
def _get_lfx_version():
"""Get the installed lfx version.
Components are located in LFX, so use LFX.
"""
from importlib.metadata import version
version = version("lfx")
print(f"Retrieved LFX version: {version}")
return version
def _normalize_for_determinism(obj):
"""Recursively normalize data structures for deterministic serialization.
Sorts dictionaries by key to ensure consistent ordering. Lists are kept in
their original order since many lists are semantically ordered (e.g., field_order,
display_order, etc.).
Note: If upstream code produces nondeterministic list ordering (e.g., from
reflection or set iteration), this function will NOT fix it. Ensure lists
are deterministically ordered before calling this function, or consider
sorting specific list fields that are semantically unordered (e.g., tags).
"""
if isinstance(obj, dict):
# Recursively normalize all dict values and return sorted by keys
return {k: _normalize_for_determinism(v) for k, v in sorted(obj.items())}
if isinstance(obj, list):
# Recursively normalize list items but preserve order
# Lists like field_order, display_order, etc. are semantically ordered
return [_normalize_for_determinism(item) for item in obj]
# Primitive types, return as-is
return obj
def _strip_dynamic_fields(obj):
"""Recursively remove dynamic fields that change with external dependencies.
This prevents unnecessary hash changes and git history bloat when dependencies update.
Timestamps are stripped to ensure deterministic builds - version is used as the timeline.
"""
# List of field names that are dynamically populated from external sources
# or contain runtime-specific data
dynamic_field_names = {"timestamp", "deprecated_at"}
if isinstance(obj, dict):
return {k: _strip_dynamic_fields(v) for k, v in obj.items() if k not in dynamic_field_names}
if isinstance(obj, list):
return [_strip_dynamic_fields(item) for item in obj]
return obj
def _import_components() -> tuple[dict, int]:
"""Import all lfx components using the async import function.
Returns:
Tuple of (modules_dict, components_count)
Raises:
RuntimeError: If component import fails
"""
import asyncio
from lfx.interface.components import import_langflow_components
try:
# Run the async function
components_result = asyncio.run(import_langflow_components())
modules_dict = components_result.get("components", {})
components_count = sum(len(v) for v in modules_dict.values())
print(f"Discovered {components_count} components across {len(modules_dict)} categories")
except Exception as e:
msg = f"Failed to import components: {e}"
raise RuntimeError(msg) from e
else:
return modules_dict, components_count
def build_component_index() -> dict:
"""Build the component index by scanning all modules in lfx.components.
Returns:
A dictionary containing version, entries, and sha256 hash
Raises:
RuntimeError: If index cannot be built
ValueError: If existing index is invalid
"""
print("Building component index...")
modules_dict, components_count = _import_components()
current_version = _get_lfx_version()
# Convert modules_dict to entries format and sort for determinism
# Sort by category name (top_level) to ensure consistent ordering
entries = []
for category_name in sorted(modules_dict.keys()):
# Sort components within each category by component name
components_dict = modules_dict[category_name]
sorted_components = {}
for comp_name in sorted(components_dict.keys()):
# Make defensive copies to avoid mutating the original component object
component = dict(components_dict[comp_name])
component["metadata"] = dict(component.get("metadata", {}))
sorted_components[comp_name] = component
entries.append([category_name, sorted_components])
index = {
"version": current_version,
"metadata": {
"num_modules": len(modules_dict),
"num_components": components_count,
},
"entries": entries,
}
# Strip dynamic fields from component templates BEFORE normalization
# This prevents changes in external dependencies (like litellm model lists) from changing the hash
print("\nStripping dynamic fields from component metadata...")
index = _strip_dynamic_fields(index)
# Normalize the entire structure for deterministic output
index = _normalize_for_determinism(index)
# Calculate SHA256 hash for integrity verification
# IMPORTANT: Hash is computed BEFORE adding the sha256 field itself
# Determinism relies on BOTH:
# 1. _normalize_for_determinism() - recursively sorts dict keys
# 2. orjson.OPT_SORT_KEYS - ensures consistent serialization
#
# To verify integrity later, you must:
# 1. Load the index
# 2. Remove the 'sha256' field
# 3. Serialize with OPT_SORT_KEYS
# 4. Compare SHA256 hashes
payload = orjson.dumps(index, option=orjson.OPT_SORT_KEYS)
index["sha256"] = hashlib.sha256(payload).hexdigest() # type: ignore[index]
return index
# Standard location for component index
COMPONENT_INDEX_PATH = Path(__file__).parent.parent / "src" / "lfx" / "src" / "lfx" / "_assets" / "component_index.json"
def main():
"""Main entry point for building the component index."""
try:
# Build the index - will raise on any error
index = build_component_index()
except Exception as e: # noqa: BLE001
print(f"Failed to build component index: {e}", file=sys.stderr)
sys.exit(1)
# Use the standard component index path (defined at module level)
output_path = COMPONENT_INDEX_PATH
# Create directory if it doesn't exist
output_path.parent.mkdir(parents=True, exist_ok=True)
# Pretty-print for readable git diffs and resolvable merge conflicts
print(f"\nWriting formatted index to {output_path}")
json_bytes = orjson.dumps(index, option=orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2)
output_path.write_text(json_bytes.decode("utf-8"), encoding="utf-8")
print("\nIndex successfully written!")
print(f" Version: {index['version']}")
print(f" Modules: {index['metadata']['num_modules']}")
print(f" Components: {index['metadata']['num_components']}")
print(f" SHA256: {index['sha256']}")
if __name__ == "__main__":
main()