NanoGPT_charToken

Sleeping

App Files Files Community

NanoGPT_charToken / app.py

MadhurGarg

Update app.py

480a26b about 2 years ago

raw

history blame contribute delete

4.47 kB

	import os
	import pickle
	from contextlib import nullcontext
	import torch
	import tiktoken
	from model import GPTConfig, GPT
	import gradio as gr

	def nanogpt(start:str , max_new_tokens = 500, num_samples =2):

	# -----------------------------------------------------------------------------
	init_from = 'resume' # either 'resume' (from an out_dir) or a gpt2 variant (e.g. 'gpt2-xl')

	temperature = 0.8 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
	top_k = 200 # retain only the top_k most likely tokens, clamp others to have 0 probability
	seed = 1337
	device = 'cpu' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc.
	dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
	compile = False # use PyTorch 2.0 to compile the model to be faster
	#exec(open('configurator.py').read()) # overrides from command line or config file
	# -----------------------------------------------------------------------------

	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
	torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
	device_type = 'cuda' if 'cuda' in device else 'cpu' # for later use in torch.autocast
	ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
	ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)

	# model
	if init_from == 'resume':
	# init from a model saved in a specific directory
	ckpt_path = 'ckpt.pt'
	checkpoint = torch.load(ckpt_path, map_location=device)
	gptconf = GPTConfig(**checkpoint['model_args'])
	model = GPT(gptconf)
	state_dict = checkpoint['model']
	unwanted_prefix = '_orig_mod.'
	for k,v in list(state_dict.items()):
	if k.startswith(unwanted_prefix):
	state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
	model.load_state_dict(state_dict)


	model.eval()
	model.to(device)
	if compile:
	model = torch.compile(model) # requires PyTorch 2.0 (optional)

	# look for the meta pickle in case it is available in the dataset folder
	load_meta = False
	if init_from == 'resume' and 'config' in checkpoint and 'dataset' in checkpoint['config']: # older checkpoints might not have these...
	meta_path = os.path.join('data', checkpoint['config']['dataset'], 'meta.pkl')
	load_meta = os.path.exists(meta_path)
	if load_meta:
	print(f"Loading meta from {meta_path}...")
	with open(meta_path, 'rb') as f:
	meta = pickle.load(f)
	# TODO want to make this more general to arbitrary encoder/decoder schemes
	stoi, itos = meta['stoi'], meta['itos']
	encode = lambda s: [stoi[c] for c in s]
	decode = lambda l: ''.join([itos[i] for i in l])
	else:
	# ok let's assume gpt-2 encodings by default
	print("No meta.pkl found, assuming GPT-2 encodings...")
	enc = tiktoken.get_encoding("gpt2")
	encode = lambda s: enc.encode(s, allowed_special={"<\|endoftext\|>"})
	decode = lambda l: enc.decode(l)


	start_ids = encode(start)
	x = (torch.tensor(start_ids, dtype=torch.long, device=device)[None, ...])

	# run generation
	with torch.no_grad():
	with ctx:

	y = model.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
	#print(decode(y[0].tolist()))
	output = decode(y[0].tolist())
	return output

	INTERFACE = gr.Interface(fn=nanogpt, inputs=[gr.Textbox(label= "Prompt", value= 'All that glisters is not gold.'),
	gr.Slider(minimum = 300, maximum = 500, value= 300, label= "Maximum number of tokens to be generated")] ,
	outputs=gr.Text(label= "Generated Text"), title="NanoGPT",
	description="NanoGPT is a transformer-based language model with only 10.65 million parameters, trained on a small dataset of Shakespeare work (size: 1MB only). It is trained with character level tokenization with a simple objective: predict the next char, given all of the previous chars within a text.",
	examples = [['We know what we are, but know not what we may be',300],
	['Sweet are the uses of adversity which, like the toad, ugly and venomous, wears yet a precious jewel in his head',300],]
	).launch(debug=True)