Spaces:

Hyeonseo
/

hf-translation-reviewer-test

Sleeping

App Files Files Community

hf-translation-reviewer-test / services.py

Hyeonseo

Convert MCP server submodules to regular directories

e596ab5 6 days ago

raw

history blame contribute delete

18.4 kB

	from __future__ import annotations

	import json
	import re
	import textwrap
	from pathlib import Path
	from typing import Dict, List, Optional, Tuple

	from urllib.parse import urlparse

	import requests

	from setting import SETTINGS
	from adapters import github_request, fetch_file_from_pr, dispatch_review


	PROMPT_TEMPLATE = textwrap.dedent(
	"""
	You are a meticulous bilingual reviewer checking a translation PR.

	PR number: {pr_number}
	PR URL: {pr_url}

	Review the translated text against the original and focus on:
	1. Are there any typos or spelling mistakes?
	2. Are any sentences difficult to understand?
	3. Is the overall content hard to comprehend?

	Always respond with strict JSON using this schema:
	{{
	"verdict": "request_changes" \| "comment" \| "approve",
	"summary": "<High-level Markdown summary of the review findings>",
	"comments": [
	{{
	"line": <1-based line number in the translated file>,
	"issue": "<Short Markdown description of the problem>",
	"suggested_edit": "<Replacement text for the entire translated line>",
	"context": "<Exact current text of that line for grounding>"
	}},
	...
	]
	}}

	Guidelines:
	- Only include comments for issues that warrant direct feedback.
	- When a concrete rewrite is possible, populate "suggested_edit" with the full replacement line exactly as it should appear after fixing the issue.
	- Keep edits scoped to the referenced line; do not span multiple lines.
	- Always copy the current text of that line verbatim into "context".
	- Omit the "suggested_edit" field or set it to an empty string if no suggestion is available.
	- Use "request_changes" when the identified problems must be fixed before merging.
	- Use "approve" only when the translation is correct and clear with no changes needed.
	- For optional improvements or general observations, use "comment".
	- Keep suggestions tightly scoped so they can be applied as GitHub suggestions.
	- Do not output partial fragments in "suggested_edit"; always provide the entire replacement line including unchanged portions.
	- Use the line numbers from the "TRANSLATED TEXT WITH LINE NUMBERS" section.
	"""
	).strip()


	# --------------------- Core helpers ------------------


	def parse_pr_url(pr_url: str) -> Tuple[str, int]:
	"""Extract repo (owner/name) and PR number from a GitHub PR URL."""
	if not pr_url:
	raise ValueError("PR URL is required")
	parsed = urlparse(pr_url)
	parts = [p for p in parsed.path.split("/") if p]
	# Expect: [owner, repo, 'pull', pr_number, ...]
	if len(parts) < 4 or parts[2] != "pull":
	raise ValueError(f"Not a valid GitHub PR URL: {pr_url}")
	owner, repo, _, num = parts[0], parts[1], parts[2], parts[3]
	if not num.isdigit():
	raise ValueError(f"PR number not found in URL: {pr_url}")
	return f"{owner}/{repo}", int(num)


	def add_line_numbers(text: str) -> str:
	return "\n".join(f"{i:04d}: {line}" for i, line in enumerate(text.splitlines(), 1))


	def load_pr_files(
	github_token: str,
	pr_url: str,
	original_path: str,
	translated_path: str,
	) -> Tuple[str, int, str, str]:
	repo_name, pr_number = parse_pr_url(pr_url)
	pr_api = f"{SETTINGS.github_api_base}/repos/{repo_name}/pulls/{pr_number}"
	pr_data = github_request(pr_api, github_token)
	head_sha = pr_data.get("head", {}).get("sha")
	if not head_sha:
	raise RuntimeError(f"Unable to determine head SHA for PR {pr_number} in {repo_name}.")
	original = fetch_file_from_pr(repo_name, pr_number, original_path, head_sha, github_token)
	translated = fetch_file_from_pr(repo_name, pr_number, translated_path, head_sha, github_token)
	return repo_name, pr_number, original, translated


	def build_messages(
	original: str,
	translated: str,
	pr_number: int,
	pr_url: str,
	) -> Tuple[str, str]:
	system_prompt = (
	"You are an expert translation reviewer ensuring clarity, accuracy, "
	"and readability of localized documentation."
	)
	user_prompt = (
	f"{PROMPT_TEMPLATE}\n\n"
	"----- ORIGINAL TEXT -----\n"
	f"{original}\n\n"
	"----- TRANSLATED TEXT -----\n"
	f"{translated}\n\n"
	"----- TRANSLATED TEXT WITH LINE NUMBERS -----\n"
	f"{add_line_numbers(translated)}"
	)
	return system_prompt, user_prompt


	def normalize_summary_for_body(summary: str) -> str:
	"""
	GitHub review body로 쓸 텍스트 정리.
	"""
	s = (summary or "").strip()
	if not s:
	return "LLM translation review"

	if s.startswith("{") or s.startswith("["):
	try:
	obj = json.loads(s)
	if isinstance(obj, dict):
	inner = obj.get("summary")
	if isinstance(inner, str) and inner.strip():
	return inner.strip()
	except Exception:
	return s

	return s


	# ----------------------- Parsing & GitHub glue ----------------------


	def _extract_json_candidates(raw_response: str) -> List[str]:
	candidates: List[str] = []
	for match in re.finditer(r"```(?:json)?\s(\{.?\})\s*```", raw_response, re.DOTALL):
	snippet = match.group(1).strip()
	if snippet:
	candidates.append(snippet)
	stripped = raw_response.strip()
	if stripped:
	candidates.append(stripped)
	return candidates


	def parse_review_response(raw_response: str) -> Tuple[str, str, List[Dict[str, object]]]:
	parsed: Optional[Dict[str, object]] = None
	for candidate in _extract_json_candidates(raw_response):
	try:
	parsed_candidate = json.loads(candidate)
	except json.JSONDecodeError:
	continue
	if isinstance(parsed_candidate, dict):
	parsed = parsed_candidate
	break
	if parsed is None:
	return "comment", raw_response.strip(), []

	verdict = parsed.get("verdict", "comment")
	summary = parsed.get("summary", "").strip()
	comments = parsed.get("comments", [])

	if not isinstance(verdict, str):
	verdict = "comment"
	verdict = verdict.lower()
	if verdict not in {"request_changes", "comment", "approve"}:
	verdict = "comment"

	if not summary:
	summary = raw_response.strip()

	if not isinstance(comments, list):
	comments = []

	normalized_comments: List[Dict[str, object]] = []
	for comment in comments:
	if not isinstance(comment, dict):
	continue
	line = comment.get("line")
	issue = comment.get("issue", "").strip()
	suggested_edit = comment.get("suggested_edit", "").strip()
	context = comment.get("context", "").strip()
	if not isinstance(line, int) or line <= 0:
	continue
	if not issue:
	continue
	normalized_comments.append(
	{
	"line": line,
	"issue": issue,
	"suggested_edit": suggested_edit,
	"context": context,
	}
	)
	return verdict, summary, normalized_comments


	def review_event_from_verdict(verdict: str) -> str:
	return {
	"request_changes": "REQUEST_CHANGES",
	"comment": "COMMENT",
	"approve": "APPROVE",
	}.get(verdict, "COMMENT")


	def build_review_comments(
	translated_path: str,
	comments: List[Dict[str, object]],
	) -> List[Dict[str, object]]:
	review_comments: List[Dict[str, object]] = []
	for comment in comments:
	line = int(comment["line"])
	issue = str(comment["issue"]).strip()
	raw_suggested = comment.get("suggested_edit", "")
	if isinstance(raw_suggested, str):
	suggested_edit = raw_suggested.rstrip("\r\n")
	else:
	suggested_edit = str(raw_suggested).rstrip("\r\n") if raw_suggested else ""
	context = str(comment.get("context", "")).rstrip("\n")
	full_line_suggestion = suggested_edit.rstrip("\n") if suggested_edit else ""

	body_parts = [issue]
	if context:
	body_parts.append(f"> _Current text_: {context}")
	if full_line_suggestion:
	body_parts.append("```suggestion\n" + full_line_suggestion + "\n```")

	body = "\n\n".join(body_parts).strip()
	review_comments.append(
	{
	"path": translated_path,
	"side": "RIGHT",
	"line": line,
	"body": body,
	}
	)
	return review_comments


	def attach_translated_line_context(
	translated_text: str,
	comments: List[Dict[str, object]],
	) -> None:
	if not comments:
	return
	lines = translated_text.splitlines()
	for comment in comments:
	line_idx = comment.get("line")
	if not isinstance(line_idx, int):
	continue
	list_index = line_idx - 1
	if list_index < 0 or list_index >= len(lines):
	continue
	current_line = lines[list_index].rstrip("\n")
	if not comment.get("context"):
	comment["context"] = current_line


	def build_github_review_payload(
	body: str,
	event: str = "COMMENT",
	comments: Optional[List[Dict[str, object]]] = None,
	) -> Dict[str, object]:
	payload: Dict[str, object] = {"event": event, "body": body}
	if comments:
	payload["comments"] = comments
	return payload


	def submit_pr_review(
	repo_name: str,
	pr_number: int,
	github_token: str,
	body: str,
	event: str,
	comments: Optional[List[Dict[str, object]]] = None,
	allow_self_request_changes: bool = True,
	) -> Tuple[Dict, str]:
	"""
	GitHub PR 리뷰 전송 (self-review REQUEST_CHANGES 우회 포함).
	"""
	url = f"{SETTINGS.github_api_base}/repos/{repo_name}/pulls/{pr_number}/reviews"
	headers = {
	"Accept": "application/vnd.github.v3+json",
	"Authorization": f"token {github_token}",
	}

	def _post(event_to_use: str, body_to_use: str) -> requests.Response:
	payload = build_github_review_payload(
	body=body_to_use,
	event=event_to_use,
	comments=comments,
	)
	return requests.post(url, headers=headers, json=payload, timeout=30)

	# 1차 요청
	response = _post(event, body)

	if response.status_code == 401:
	raise PermissionError(
	"GitHub token is invalid or lacks permission to submit a review."
	)

	# 본인 PR + REQUEST_CHANGES 케이스 처리
	if response.status_code == 422 and event == "REQUEST_CHANGES":
	try:
	error_payload = response.json()
	except ValueError:
	error_payload = {"message": response.text}
	message = str(error_payload.get("message", ""))
	errors = " ".join(str(item) for item in error_payload.get("errors", []))
	combined_error = f"{message} {errors}".strip()

	if "own pull request" in combined_error.lower():
	if not allow_self_request_changes:
	raise RuntimeError(
	"GitHub does not allow REQUEST_CHANGES on your own pull request: "
	+ combined_error
	)

	fallback_event = "COMMENT"
	fallback_body = "[REQUEST_CHANGES (self-review)]\n\n" + (body or "").strip()

	comment_response = _post(fallback_event, fallback_body)
	if comment_response.status_code >= 400:
	raise RuntimeError(
	"Failed to submit fallback self-review comment: "
	f"HTTP {comment_response.status_code} - {comment_response.text}"
	)
	return comment_response.json(), "REQUEST_CHANGES_SELF"

	if response.status_code >= 400:
	raise RuntimeError(
	"Failed to submit review: "
	f"HTTP {response.status_code} - {response.text}"
	)

	return response.json(), event


	# --------------------- High-level domain services ------------------


	def prepare_translation_context(
	github_token: str,
	pr_url: str,
	original_path: str,
	translated_path: str,
	) -> Dict[str, object]:
	"""
	PR에서 파일을 가져와 system/user prompt까지 구성.
	"""
	repo_name, pr_number, original, translated = load_pr_files(
	github_token=github_token,
	pr_url=pr_url,
	original_path=original_path,
	translated_path=translated_path,
	)
	system_prompt, user_prompt = build_messages(
	original=original,
	translated=translated,
	pr_number=pr_number,
	pr_url=pr_url,
	)
	return {
	"repo": repo_name,
	"pr_number": pr_number,
	"original": original,
	"translated": translated,
	"system_prompt": system_prompt,
	"user_prompt": user_prompt,
	}


	def review_and_emit_payload(
	provider: str,
	provider_token: str,
	model_name: str,
	pr_url: str,
	translated_path: str,
	original: str,
	translated: str,
	) -> Dict[str, object]:
	"""
	LLM 리뷰 수행 후 verdict / summary / comments 및 GitHub payload 생성.
	"""
	_, pr_number = parse_pr_url(pr_url)
	system_prompt, user_prompt = build_messages(
	original=original,
	translated=translated,
	pr_number=pr_number,
	pr_url=pr_url,
	)

	raw = dispatch_review(
	provider=provider,
	token=provider_token,
	system_prompt=system_prompt,
	user_prompt=user_prompt,
	model_name=model_name,
	)
	verdict, summary, comments = parse_review_response(raw)
	attach_translated_line_context(translated, comments)

	event = review_event_from_verdict(verdict)
	github_comments = build_review_comments(translated_path, comments)
	payload = build_github_review_payload(
	body=summary,
	event=event,
	comments=github_comments,
	)

	return {
	"verdict": verdict,
	"summary": summary,
	"comments": comments,
	"event": event,
	"payload": payload,
	}


	def submit_review_to_github(
	github_token: str,
	pr_url: str,
	translated_path: str,
	payload_or_review: Dict[str, object],
	allow_self_request_changes: bool = True,
	) -> Dict[str, object]:
	"""
	payload JSON 또는 review JSON을 입력받아 GitHub 리뷰 제출.
	"""
	repo, pr_number = parse_pr_url(pr_url)

	event = payload_or_review.get("event")
	body = payload_or_review.get("body")
	comments_obj = payload_or_review.get("comments")

	comments: Optional[List[Dict[str, object]]] = None

	if isinstance(event, str) and body:
	# 이미 GitHub payload 형식
	event_str = event
	if isinstance(comments_obj, list):
	comments = comments_obj
	body_str = str(body)
	else:
	# review 형식 (verdict/summary/comments)
	verdict = str(payload_or_review.get("verdict", "comment")).lower()
	summary = str(payload_or_review.get("summary", "")).strip()
	review_comments = payload_or_review.get("comments", [])
	if not isinstance(review_comments, list):
	review_comments = []

	event_str = review_event_from_verdict(verdict)
	body_str = summary if summary else "LLM translation review"
	comments = build_review_comments(translated_path, review_comments)

	if event_str == "REQUEST_CHANGES" and not body_str.strip() and not comments:
	raise ValueError(
	"REQUEST_CHANGES를 보내려면 review 본문 또는 코멘트가 하나 이상 필요합니다."
	)

	response, final_event = submit_pr_review(
	repo_name=repo,
	pr_number=pr_number,
	github_token=github_token,
	body=body_str,
	event=event_str,
	comments=comments,
	allow_self_request_changes=allow_self_request_changes,
	)
	return {
	"final_event": final_event,
	"response": response,
	}


	def run_end_to_end(
	provider: str,
	provider_token: str,
	model_name: str,
	github_token: str,
	pr_url: str,
	original_path: str,
	translated_path: str,
	save_review: bool = False,
	save_path: str = "review.json",
	submit_review_flag: bool = False,
	) -> Dict[str, object]:
	repo, pr_number, original, translated = load_pr_files(
	github_token=github_token,
	pr_url=pr_url,
	original_path=original_path,
	translated_path=translated_path,
	)

	system_prompt, user_prompt = build_messages(
	original=original,
	translated=translated,
	pr_number=pr_number,
	pr_url=pr_url,
	)

	raw = dispatch_review(
	provider=provider,
	token=provider_token,
	system_prompt=system_prompt,
	user_prompt=user_prompt,
	model_name=model_name,
	)

	verdict, summary, comments = parse_review_response(raw)
	attach_translated_line_context(translated, comments)

	body_for_github = normalize_summary_for_body(summary)

	github_comments = build_review_comments(translated_path, comments)
	event = review_event_from_verdict(verdict)
	payload = build_github_review_payload(
	body=body_for_github,
	event=event,
	comments=github_comments,
	)

	saved_file_path: Optional[str] = None
	if save_review:
	p = Path(save_path).expanduser()
	p.write_text(
	json.dumps(
	{
	"verdict": verdict,
	"summary": summary,
	"comments": comments,
	},
	ensure_ascii=False,
	indent=2,
	),
	encoding="utf-8",
	)
	saved_file_path = str(p)

	submission = None
	if submit_review_flag:
	resp, final_event = submit_pr_review(
	repo_name=repo,
	pr_number=pr_number,
	github_token=github_token,
	body=body_for_github,
	event=event,
	comments=github_comments,
	allow_self_request_changes=True,
	)
	submission = {"final_event": final_event, "response": resp}

	return {
	"repo": repo,
	"pr_number": pr_number,
	"verdict": verdict,
	"summary": summary,
	"comments": comments,
	"payload": payload,
	"saved_file": saved_file_path,
	"submission": submission,
	}