Spaces:

ReallyFloppyPenguin
/

GGUF-Finder

Sleeping

App Files Files Community

GGUF-Finder / app.py

ReallyFloppyPenguin

Update app.py

52cb539 verified 6 months ago

raw

history blame contribute delete

12.7 kB

	import gradio as gr
	import requests
	import json
	import re
	import time
	from typing import List, Dict, Tuple, Optional
	from urllib.parse import quote

	class GGUFModelFinder:
	def __init__(self):
	self.popular_gguf_creators = [
	"TheBloke",
	"bartowski",
	"mradermacher",
	"microsoft",
	"QuantFactory",
	"lmstudio-ai",
	"huggingface",
	"mlabonne",
	"NousResearch",
	"MaziyarPanahi"
	]

	self.api_base = "https://huggingface.co/api"
	self.headers = {
	"User-Agent": "GGUF-Model-Finder/1.0"
	}

	def clean_model_name(self, model_name: str) -> str:
	"""Clean and normalize model name for better searching"""
	# Remove common prefixes and suffixes
	cleaned = model_name.strip()

	# Remove author/organization prefix if present
	if "/" in cleaned:
	cleaned = cleaned.split("/")[-1]

	# Remove common suffixes
	suffixes_to_remove = [
	"-GGUF", "-gguf", "-GPTQ", "-gptq", "-AWQ", "-awq",
	"-HF", "-hf", "-chat", "-instruct", "-base", "-v1",
	"-v2", "-v3", "-uncensored", "-finetune"
	]

	for suffix in suffixes_to_remove:
	if cleaned.lower().endswith(suffix.lower()):
	cleaned = cleaned[:-len(suffix)]

	return cleaned.strip()

	def search_models(self, query: str, author: str = None, limit: int = 20) -> List[Dict]:
	"""Search for models using HuggingFace API"""
	try:
	search_url = f"{self.api_base}/models"
	params = {
	"search": query,
	"filter": "gguf",
	"limit": limit,
	"sort": "downloads"
	}

	if author:
	params["author"] = author

	response = requests.get(search_url, params=params, headers=self.headers, timeout=10)

	if response.status_code == 200:
	return response.json()
	else:
	return []
	except Exception as e:
	print(f"Error searching models: {e}")
	return []

	def search_gguf_variants(self, model_name: str) -> List[Dict]:
	"""Search for GGUF variants of a given model"""
	cleaned_name = self.clean_model_name(model_name)
	all_results = []

	# Search with different query variations
	search_terms = [
	cleaned_name,
	f"{cleaned_name} GGUF",
	f"{cleaned_name}-GGUF",
	f"{cleaned_name}_GGUF"
	]

	# Search through popular GGUF creators
	for creator in self.popular_gguf_creators:
	for term in search_terms:
	results = self.search_models(term, author=creator, limit=10)
	all_results.extend(results)
	time.sleep(0.1) # Rate limiting

	# Also search generally without author filter
	for term in search_terms:
	results = self.search_models(term, limit=15)
	all_results.extend(results)
	time.sleep(0.1)

	# Remove duplicates and filter relevant results
	seen_ids = set()
	filtered_results = []

	for model in all_results:
	model_id = model.get('id', '')
	if model_id not in seen_ids and 'gguf' in model_id.lower():
	seen_ids.add(model_id)

	# Check if model name is relevant
	model_name_clean = self.clean_model_name(model_id)
	if self.is_relevant_match(cleaned_name, model_name_clean):
	filtered_results.append(model)

	# Sort by downloads (descending)
	filtered_results.sort(key=lambda x: x.get('downloads', 0), reverse=True)

	return filtered_results[:20] # Return top 20 results

	def is_relevant_match(self, original: str, candidate: str) -> bool:
	"""Check if candidate model is a relevant match for original"""
	original_lower = original.lower()
	candidate_lower = candidate.lower()

	# Direct substring match
	if original_lower in candidate_lower or candidate_lower in original_lower:
	return True

	# Check word overlap
	original_words = set(re.findall(r'\w+', original_lower))
	candidate_words = set(re.findall(r'\w+', candidate_lower))

	# If most words overlap, it's likely a match
	if len(original_words) > 0:
	overlap_ratio = len(original_words.intersection(candidate_words)) / len(original_words)
	return overlap_ratio >= 0.6

	return False

	def get_model_details(self, model_id: str) -> Dict:
	"""Get detailed information about a specific model"""
	try:
	url = f"{self.api_base}/models/{model_id}"
	response = requests.get(url, headers=self.headers, timeout=10)

	if response.status_code == 200:
	return response.json()
	return {}
	except Exception as e:
	print(f"Error getting model details: {e}")
	return {}

	def format_model_info(self, model: Dict) -> str:
	"""Format model information for display"""
	model_id = model.get('id', 'Unknown')
	downloads = model.get('downloads', 0)
	likes = model.get('likes', 0)
	updated = model.get('lastModified', 'Unknown')

	# Format the date
	if updated != 'Unknown':
	try:
	from datetime import datetime
	date_obj = datetime.fromisoformat(updated.replace('Z', '+00:00'))
	updated = date_obj.strftime('%Y-%m-%d')
	except:
	pass

	# Get model size info if available
	size_info = ""
	if 'siblings' in model:
	total_size = 0
	file_count = 0
	for sibling in model['siblings']:
	if sibling.get('rfilename', '').endswith('.gguf'):
	file_count += 1
	if 'size' in sibling:
	total_size += sibling['size']

	if total_size > 0:
	size_gb = total_size / (1024**3)
	size_info = f" \| Size: {size_gb:.1f}GB ({file_count} GGUF files)"

	model_url = f"https://huggingface.co/{model_id}"

	return f"""
	[{model_id}]({model_url})
	- Downloads: {downloads:,} \| Likes: {likes} \| Updated: {updated}{size_info}
	"""

	def find_gguf_models(model_name: str, progress=gr.Progress()) -> Tuple[str, str]:
	"""Main function to find GGUF models"""
	if not model_name.strip():
	return "Please enter a model name to search for.", ""

	progress(0.1, desc="Initializing search...")

	finder = GGUFModelFinder()

	progress(0.3, desc="Searching for GGUF variants...")
	results = finder.search_gguf_variants(model_name)

	progress(0.8, desc="Formatting results...")

	if not results:
	no_results = f"""
	# No GGUF Models Found 😞

	Could not find any GGUF variants for {model_name}.

	## Suggestions:
	1. Check the spelling of the model name
	2. Try a simpler name (e.g., just "llama-2-7b" instead of "meta-llama/Llama-2-7b-chat-hf")
	3. Search manually on [Hugging Face](https://huggingface.co/models?other=gguf) with the GGUF filter
	4. Check popular GGUF creators:
	- [ReallyFloppyPenguin](https://huggingface.co/ReallyFloppyPenguin)
	- [TheBloke](https://huggingface.co/TheBloke)
	- [bartowski](https://huggingface.co/bartowski)
	- [mradermacher](https://huggingface.co/mradermacher)
	- [QuantFactory](https://huggingface.co/QuantFactory)

	The model you're looking for might not have been converted to GGUF format yet, or might be available under a different name.
	"""
	return no_results, ""

	# Create main results
	results_md = f"""
	# GGUF Models Found for "{model_name}" 🎯

	Found {len(results)} GGUF variant(s):

	"""

	for i, model in enumerate(results, 1):
	results_md += f"{i}. {finder.format_model_info(model)}\n"

	# Create additional info
	additional_info = f"""
	## 📋 What is GGUF?

	GGUF (GPT-Generated Unified Format) is a file format for storing models for inference with GGML and llama.cpp. It's designed to be fast to load and save, and to be extensible.

	## 🔧 How to Use These Models

	### With llama.cpp:
	```bash
	./main -m model.gguf -p "Your prompt here"
	```

	### With Ollama:
	```bash
	ollama create mymodel -f Modelfile
	ollama run mymodel
	```

	### With Python (llama-cpp-python):
	```python
	from llama_cpp import Llama

	llm = Llama(model_path="model.gguf")
	output = llm("Your prompt here")
	```

	## 💡 Tips for Choosing a Model

	- Q4_K_M: Good balance of quality and size
	- Q5_K_M: Higher quality, larger size
	- Q6_K: Even higher quality, larger size
	- Q8_0: Highest quality, largest size

	Lower numbers = smaller file size but lower quality
	Higher numbers = larger file size but higher quality

	## 🌟 Popular GGUF Model Creators

	The results above are from trusted model quantizers who regularly convert popular models to GGUF format.
	"""

	progress(1.0, desc="Complete!")

	return results_md, additional_info

	# Create the Gradio interface
	def create_interface():
	with gr.Blocks(
	title="GGUF Model Finder",
	theme=gr.themes.Soft(),
	css="""
	.container { max-width: 1200px; margin: auto; }
	.header { text-align: center; margin: 20px 0; }
	.search-box { margin: 20px 0; }
	"""
	) as iface:

	gr.HTML("""
	<div class="header">
	<h1>🔍 GGUF Model Finder</h1>
	<p>Find GGUF (quantized) versions of your favorite language models for local inference</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=3):
	model_input = gr.Textbox(
	label="Model Name",
	placeholder="e.g., llama-2-7b, mistral-7b, codellama-34b, deepseek-coder-6.7b",
	info="Enter the name of the model you want to find GGUF versions for",
	lines=1
	)

	with gr.Column(scale=1):
	search_btn = gr.Button("🔍 Search GGUF Models", variant="primary", size="lg")

	gr.HTML("""
	<div style="margin: 20px 0; padding: 15px; background-color: #374151; border-radius: 8px;">
	<strong>💡 Quick Examples:</strong><br>
	• <code>llama-2-7b</code> - Meta's Llama 2 7B model<br>
	• <code>mistral-7b</code> - Mistral AI's 7B model<br>
	• <code>codellama-34b</code> - Code Llama 34B model<br>
	• <code>neural-chat-7b</code> - Intel's Neural Chat model<br>
	• <code>deepseek-coder</code> - DeepSeek Coder model
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=2):
	results_output = gr.Markdown(
	label="Search Results",
	value="Enter a model name above and click 'Search GGUF Models' to find quantized versions.",
	height=400
	)

	with gr.Column(scale=1):
	info_output = gr.Markdown(
	label="Additional Information",
	value="",
	height=400
	)

	# Event handlers
	search_btn.click(
	fn=find_gguf_models,
	inputs=[model_input],
	outputs=[results_output, info_output],
	show_progress=True
	)

	model_input.submit(
	fn=find_gguf_models,
	inputs=[model_input],
	outputs=[results_output, info_output],
	show_progress=True
	)

	gr.HTML("""
	<div style="margin-top: 30px; text-align: center; color: #666;">
	<p>Made with ❤️ using Gradio \| Data from <a href="https://huggingface.co">Hugging Face</a></p>
	<p>GGUF format by the <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> team</p>
	</div>
	""")

	return iface

	if __name__ == "__main__":
	# Create and launch the interface
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	)