Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import json | |
| import re | |
| import time | |
| from typing import List, Dict, Tuple, Optional | |
| from urllib.parse import quote | |
| class GGUFModelFinder: | |
| def __init__(self): | |
| self.popular_gguf_creators = [ | |
| "TheBloke", | |
| "bartowski", | |
| "mradermacher", | |
| "microsoft", | |
| "QuantFactory", | |
| "lmstudio-ai", | |
| "huggingface", | |
| "mlabonne", | |
| "NousResearch", | |
| "MaziyarPanahi" | |
| ] | |
| self.api_base = "https://huggingface.co/api" | |
| self.headers = { | |
| "User-Agent": "GGUF-Model-Finder/1.0" | |
| } | |
| def clean_model_name(self, model_name: str) -> str: | |
| """Clean and normalize model name for better searching""" | |
| # Remove common prefixes and suffixes | |
| cleaned = model_name.strip() | |
| # Remove author/organization prefix if present | |
| if "/" in cleaned: | |
| cleaned = cleaned.split("/")[-1] | |
| # Remove common suffixes | |
| suffixes_to_remove = [ | |
| "-GGUF", "-gguf", "-GPTQ", "-gptq", "-AWQ", "-awq", | |
| "-HF", "-hf", "-chat", "-instruct", "-base", "-v1", | |
| "-v2", "-v3", "-uncensored", "-finetune" | |
| ] | |
| for suffix in suffixes_to_remove: | |
| if cleaned.lower().endswith(suffix.lower()): | |
| cleaned = cleaned[:-len(suffix)] | |
| return cleaned.strip() | |
| def search_models(self, query: str, author: str = None, limit: int = 20) -> List[Dict]: | |
| """Search for models using HuggingFace API""" | |
| try: | |
| search_url = f"{self.api_base}/models" | |
| params = { | |
| "search": query, | |
| "filter": "gguf", | |
| "limit": limit, | |
| "sort": "downloads" | |
| } | |
| if author: | |
| params["author"] = author | |
| response = requests.get(search_url, params=params, headers=self.headers, timeout=10) | |
| if response.status_code == 200: | |
| return response.json() | |
| else: | |
| return [] | |
| except Exception as e: | |
| print(f"Error searching models: {e}") | |
| return [] | |
| def search_gguf_variants(self, model_name: str) -> List[Dict]: | |
| """Search for GGUF variants of a given model""" | |
| cleaned_name = self.clean_model_name(model_name) | |
| all_results = [] | |
| # Search with different query variations | |
| search_terms = [ | |
| cleaned_name, | |
| f"{cleaned_name} GGUF", | |
| f"{cleaned_name}-GGUF", | |
| f"{cleaned_name}_GGUF" | |
| ] | |
| # Search through popular GGUF creators | |
| for creator in self.popular_gguf_creators: | |
| for term in search_terms: | |
| results = self.search_models(term, author=creator, limit=10) | |
| all_results.extend(results) | |
| time.sleep(0.1) # Rate limiting | |
| # Also search generally without author filter | |
| for term in search_terms: | |
| results = self.search_models(term, limit=15) | |
| all_results.extend(results) | |
| time.sleep(0.1) | |
| # Remove duplicates and filter relevant results | |
| seen_ids = set() | |
| filtered_results = [] | |
| for model in all_results: | |
| model_id = model.get('id', '') | |
| if model_id not in seen_ids and 'gguf' in model_id.lower(): | |
| seen_ids.add(model_id) | |
| # Check if model name is relevant | |
| model_name_clean = self.clean_model_name(model_id) | |
| if self.is_relevant_match(cleaned_name, model_name_clean): | |
| filtered_results.append(model) | |
| # Sort by downloads (descending) | |
| filtered_results.sort(key=lambda x: x.get('downloads', 0), reverse=True) | |
| return filtered_results[:20] # Return top 20 results | |
| def is_relevant_match(self, original: str, candidate: str) -> bool: | |
| """Check if candidate model is a relevant match for original""" | |
| original_lower = original.lower() | |
| candidate_lower = candidate.lower() | |
| # Direct substring match | |
| if original_lower in candidate_lower or candidate_lower in original_lower: | |
| return True | |
| # Check word overlap | |
| original_words = set(re.findall(r'\w+', original_lower)) | |
| candidate_words = set(re.findall(r'\w+', candidate_lower)) | |
| # If most words overlap, it's likely a match | |
| if len(original_words) > 0: | |
| overlap_ratio = len(original_words.intersection(candidate_words)) / len(original_words) | |
| return overlap_ratio >= 0.6 | |
| return False | |
| def get_model_details(self, model_id: str) -> Dict: | |
| """Get detailed information about a specific model""" | |
| try: | |
| url = f"{self.api_base}/models/{model_id}" | |
| response = requests.get(url, headers=self.headers, timeout=10) | |
| if response.status_code == 200: | |
| return response.json() | |
| return {} | |
| except Exception as e: | |
| print(f"Error getting model details: {e}") | |
| return {} | |
| def format_model_info(self, model: Dict) -> str: | |
| """Format model information for display""" | |
| model_id = model.get('id', 'Unknown') | |
| downloads = model.get('downloads', 0) | |
| likes = model.get('likes', 0) | |
| updated = model.get('lastModified', 'Unknown') | |
| # Format the date | |
| if updated != 'Unknown': | |
| try: | |
| from datetime import datetime | |
| date_obj = datetime.fromisoformat(updated.replace('Z', '+00:00')) | |
| updated = date_obj.strftime('%Y-%m-%d') | |
| except: | |
| pass | |
| # Get model size info if available | |
| size_info = "" | |
| if 'siblings' in model: | |
| total_size = 0 | |
| file_count = 0 | |
| for sibling in model['siblings']: | |
| if sibling.get('rfilename', '').endswith('.gguf'): | |
| file_count += 1 | |
| if 'size' in sibling: | |
| total_size += sibling['size'] | |
| if total_size > 0: | |
| size_gb = total_size / (1024**3) | |
| size_info = f" | Size: {size_gb:.1f}GB ({file_count} GGUF files)" | |
| model_url = f"https://huggingface.co/{model_id}" | |
| return f""" | |
| **[{model_id}]({model_url})** | |
| - Downloads: {downloads:,} | Likes: {likes} | Updated: {updated}{size_info} | |
| """ | |
| def find_gguf_models(model_name: str, progress=gr.Progress()) -> Tuple[str, str]: | |
| """Main function to find GGUF models""" | |
| if not model_name.strip(): | |
| return "Please enter a model name to search for.", "" | |
| progress(0.1, desc="Initializing search...") | |
| finder = GGUFModelFinder() | |
| progress(0.3, desc="Searching for GGUF variants...") | |
| results = finder.search_gguf_variants(model_name) | |
| progress(0.8, desc="Formatting results...") | |
| if not results: | |
| no_results = f""" | |
| # No GGUF Models Found π | |
| Could not find any GGUF variants for **{model_name}**. | |
| ## Suggestions: | |
| 1. **Check the spelling** of the model name | |
| 2. **Try a simpler name** (e.g., just "llama-2-7b" instead of "meta-llama/Llama-2-7b-chat-hf") | |
| 3. **Search manually** on [Hugging Face](https://huggingface.co/models?other=gguf) with the GGUF filter | |
| 4. **Check popular GGUF creators**: | |
| - [ReallyFloppyPenguin](https://huggingface.co/ReallyFloppyPenguin) | |
| - [TheBloke](https://huggingface.co/TheBloke) | |
| - [bartowski](https://huggingface.co/bartowski) | |
| - [mradermacher](https://huggingface.co/mradermacher) | |
| - [QuantFactory](https://huggingface.co/QuantFactory) | |
| The model you're looking for might not have been converted to GGUF format yet, or might be available under a different name. | |
| """ | |
| return no_results, "" | |
| # Create main results | |
| results_md = f""" | |
| # GGUF Models Found for "{model_name}" π― | |
| Found **{len(results)}** GGUF variant(s): | |
| """ | |
| for i, model in enumerate(results, 1): | |
| results_md += f"{i}. {finder.format_model_info(model)}\n" | |
| # Create additional info | |
| additional_info = f""" | |
| ## π What is GGUF? | |
| GGUF (GPT-Generated Unified Format) is a file format for storing models for inference with GGML and llama.cpp. It's designed to be fast to load and save, and to be extensible. | |
| ## π§ How to Use These Models | |
| ### With llama.cpp: | |
| ```bash | |
| ./main -m model.gguf -p "Your prompt here" | |
| ``` | |
| ### With Ollama: | |
| ```bash | |
| ollama create mymodel -f Modelfile | |
| ollama run mymodel | |
| ``` | |
| ### With Python (llama-cpp-python): | |
| ```python | |
| from llama_cpp import Llama | |
| llm = Llama(model_path="model.gguf") | |
| output = llm("Your prompt here") | |
| ``` | |
| ## π‘ Tips for Choosing a Model | |
| - **Q4_K_M**: Good balance of quality and size | |
| - **Q5_K_M**: Higher quality, larger size | |
| - **Q6_K**: Even higher quality, larger size | |
| - **Q8_0**: Highest quality, largest size | |
| Lower numbers = smaller file size but lower quality | |
| Higher numbers = larger file size but higher quality | |
| ## π Popular GGUF Model Creators | |
| The results above are from trusted model quantizers who regularly convert popular models to GGUF format. | |
| """ | |
| progress(1.0, desc="Complete!") | |
| return results_md, additional_info | |
| # Create the Gradio interface | |
| def create_interface(): | |
| with gr.Blocks( | |
| title="GGUF Model Finder", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .container { max-width: 1200px; margin: auto; } | |
| .header { text-align: center; margin: 20px 0; } | |
| .search-box { margin: 20px 0; } | |
| """ | |
| ) as iface: | |
| gr.HTML(""" | |
| <div class="header"> | |
| <h1>π GGUF Model Finder</h1> | |
| <p>Find GGUF (quantized) versions of your favorite language models for local inference</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| model_input = gr.Textbox( | |
| label="Model Name", | |
| placeholder="e.g., llama-2-7b, mistral-7b, codellama-34b, deepseek-coder-6.7b", | |
| info="Enter the name of the model you want to find GGUF versions for", | |
| lines=1 | |
| ) | |
| with gr.Column(scale=1): | |
| search_btn = gr.Button("π Search GGUF Models", variant="primary", size="lg") | |
| gr.HTML(""" | |
| <div style="margin: 20px 0; padding: 15px; background-color: #374151; border-radius: 8px;"> | |
| <strong>π‘ Quick Examples:</strong><br> | |
| β’ <code>llama-2-7b</code> - Meta's Llama 2 7B model<br> | |
| β’ <code>mistral-7b</code> - Mistral AI's 7B model<br> | |
| β’ <code>codellama-34b</code> - Code Llama 34B model<br> | |
| β’ <code>neural-chat-7b</code> - Intel's Neural Chat model<br> | |
| β’ <code>deepseek-coder</code> - DeepSeek Coder model | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| results_output = gr.Markdown( | |
| label="Search Results", | |
| value="Enter a model name above and click 'Search GGUF Models' to find quantized versions.", | |
| height=400 | |
| ) | |
| with gr.Column(scale=1): | |
| info_output = gr.Markdown( | |
| label="Additional Information", | |
| value="", | |
| height=400 | |
| ) | |
| # Event handlers | |
| search_btn.click( | |
| fn=find_gguf_models, | |
| inputs=[model_input], | |
| outputs=[results_output, info_output], | |
| show_progress=True | |
| ) | |
| model_input.submit( | |
| fn=find_gguf_models, | |
| inputs=[model_input], | |
| outputs=[results_output, info_output], | |
| show_progress=True | |
| ) | |
| gr.HTML(""" | |
| <div style="margin-top: 30px; text-align: center; color: #666;"> | |
| <p>Made with β€οΈ using Gradio | Data from <a href="https://huggingface.co">Hugging Face</a></p> | |
| <p>GGUF format by the <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> team</p> | |
| </div> | |
| """) | |
| return iface | |
| if __name__ == "__main__": | |
| # Create and launch the interface | |
| demo = create_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| ) |