77 lines
3.6 KiB
JSON
Executable File
77 lines
3.6 KiB
JSON
Executable File
{
|
|
"python_data_stack": {
|
|
"pandas": {
|
|
"desc": "Manipulation et analyse de données tabulaires",
|
|
"key_ops": ["read_csv", "groupby", "merge", "pivot_table", "apply", "rolling"],
|
|
"best_practices": ["Vectorized operations over loops", "Use dtypes wisely", "Chain operations"]
|
|
},
|
|
"numpy": {
|
|
"desc": "Calcul numérique performant",
|
|
"key_ops": ["array operations", "broadcasting", "linear algebra", "FFT", "random"]
|
|
},
|
|
"scikit_learn": {
|
|
"desc": "Machine learning classique",
|
|
"algorithms": {
|
|
"classification": ["RandomForest", "XGBoost", "SVM", "LogisticRegression", "KNN"],
|
|
"regression": ["LinearRegression", "Ridge", "Lasso", "GradientBoosting", "SVR"],
|
|
"clustering": ["KMeans", "DBSCAN", "Hierarchical", "GaussianMixture"],
|
|
"dimensionality": ["PCA", "t-SNE", "UMAP", "LDA"]
|
|
},
|
|
"pipeline": ["preprocessing", "feature_selection", "model_selection", "cross_validation", "hyperparameter_tuning"]
|
|
},
|
|
"visualization": {
|
|
"matplotlib": "Graphiques de base, publication-quality",
|
|
"seaborn": "Statistical data visualization",
|
|
"plotly": "Interactive charts, dashboards",
|
|
"altair": "Declarative statistical visualization"
|
|
}
|
|
},
|
|
"ml_ops": {
|
|
"experiment_tracking": ["MLflow", "Weights & Biases", "Neptune"],
|
|
"model_serving": ["FastAPI", "TensorRT", "Triton", "vLLM", "Ollama"],
|
|
"data_pipeline": ["Apache Airflow", "Prefect", "Dagster", "dbt"],
|
|
"feature_store": ["Feast", "Tecton", "Hopsworks"],
|
|
"monitoring": ["Evidently AI", "Prometheus + Grafana", "WhyLabs"]
|
|
},
|
|
"llm_engineering": {
|
|
"prompt_engineering": {
|
|
"techniques": ["Zero-shot", "Few-shot", "Chain-of-Thought", "Tree-of-Thoughts", "ReAct", "Self-Consistency"],
|
|
"best_practices": [
|
|
"Be specific and detailed in instructions",
|
|
"Use delimiters for structured input",
|
|
"Specify output format explicitly",
|
|
"Break complex tasks into steps",
|
|
"Use system prompts for persona and constraints"
|
|
]
|
|
},
|
|
"rag": {
|
|
"components": ["Document Loading", "Chunking", "Embedding", "Vector Store", "Retrieval", "Generation"],
|
|
"chunking_strategies": ["Fixed-size", "Recursive", "Semantic", "Document-based"],
|
|
"embedding_models": ["nomic-embed-text", "BGE", "GTE", "E5", "OpenAI ada-002"],
|
|
"vector_stores": ["pgvector", "Chroma", "Weaviate", "Pinecone", "Milvus"],
|
|
"retrieval_strategies": ["Similarity search", "MMR", "Hybrid (BM25+vector)", "Re-ranking"]
|
|
},
|
|
"fine_tuning": {
|
|
"methods": ["LoRA", "QLoRA", "Full fine-tuning", "RLHF", "DPO"],
|
|
"tools": ["Hugging Face Transformers", "Axolotl", "LLaMA-Factory", "Unsloth"],
|
|
"data_prep": ["Instruction format", "Conversation format", "Preference pairs"]
|
|
},
|
|
"inference_optimization": {
|
|
"quantization": ["GGUF (llama.cpp)", "GPTQ", "AWQ", "bitsandbytes 4-bit"],
|
|
"serving": ["vLLM (PagedAttention)", "Ollama", "TGI", "llama.cpp server"],
|
|
"techniques": ["KV-cache", "Speculative decoding", "Continuous batching", "Flash Attention"]
|
|
}
|
|
},
|
|
"sql_advanced": {
|
|
"postgresql": {
|
|
"features": ["CTE (WITH)", "Window functions", "LATERAL joins", "JSONB", "Full-text search", "pgvector"],
|
|
"optimization": ["EXPLAIN ANALYZE", "Index types (B-tree, GIN, GiST, BRIN)", "Partitioning", "Materialized views"],
|
|
"extensions": ["pgvector", "PostGIS", "pg_trgm", "dblink", "pg_stat_statements"]
|
|
},
|
|
"patterns": {
|
|
"analytics": ["Running totals", "Moving averages", "Percentile ranks", "Gap analysis", "Cohort analysis"],
|
|
"etl": ["COPY command", "Foreign Data Wrappers", "Logical replication", "pg_dump strategies"]
|
|
}
|
|
}
|
|
}
|