Module imodelsx.iprompt.llm
Functions
def get_llm(checkpoint, seed=1, role: str = None)
-
Expand source code
def get_llm(checkpoint, seed=1, role: str = None): if checkpoint.startswith("text-"): return llm_openai(checkpoint, seed=seed) elif checkpoint.startswith("gpt-3") or checkpoint.startswith("gpt-4"): return llm_openai_chat(checkpoint, seed=seed, role=role) else: return llm_hf( checkpoint, seed=seed ) # warning: this sets torch.manual_seed(seed)
def llm_hf(checkpoint='google/flan-t5-xl', seed=1)
-
Expand source code
def llm_hf(checkpoint="google/flan-t5-xl", seed=1): LLAMA_DIR = "/home/chansingh/llama" class LLM_HF: def __init__(self, checkpoint, seed): # set tokenizer if "facebook/opt" in checkpoint: # opt can't use fast tokenizer self._tokenizer = AutoTokenizer.from_pretrained( checkpoint, use_fast=False ) elif "llama_" in checkpoint: self._tokenizer = transformers.LlamaTokenizer.from_pretrained( join(LLAMA_DIR, checkpoint) ) elif "PMC_LLAMA" in checkpoint: self._tokenizer = transformers.LlamaTokenizer.from_pretrained( "chaoyi-wu/PMC_LLAMA_7B" ) else: self._tokenizer = AutoTokenizer.from_pretrained( checkpoint, use_fast=True ) # set checkpoint if "google/flan" in checkpoint: self._model = T5ForConditionalGeneration.from_pretrained( checkpoint, device_map="auto", torch_dtype=torch.float16 ) elif "llama_" in checkpoint: self._model = transformers.LlamaForCausalLM.from_pretrained( join(LLAMA_DIR, checkpoint), device_map="auto", torch_dtype=torch.float16, ) elif checkpoint == "gpt-xl": self._model = AutoModelForCausalLM.from_pretrained(checkpoint) else: self._model = AutoModelForCausalLM.from_pretrained( checkpoint, device_map="auto", torch_dtype=torch.float16 ) self.checkpoint = checkpoint self.cache_dir = join( CACHE_DIR, "cache_hf", f'{checkpoint.replace("/", "_")}___{seed}' ) self.seed = seed def __call__( self, prompt: str, stop: str = None, max_new_tokens=20, do_sample=False, use_cache=True, ) -> str: """Warning: stop not actually used""" with torch.no_grad(): # cache os.makedirs(self.cache_dir, exist_ok=True) hash_str = hashlib.sha256(prompt.encode()).hexdigest() cache_file = join( self.cache_dir, f"{hash_str}__num_tok={max_new_tokens}.pkl" ) if os.path.exists(cache_file) and use_cache: return pkl.load(open(cache_file, "rb")) # if stop is not None: # raise ValueError("stop kwargs are not permitted.") inputs = self._tokenizer( prompt, return_tensors="pt", return_attention_mask=True ).to( self._model.device ) # .input_ids.to("cuda") # stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=max_tokens)]) # outputs = self._model.generate(input_ids, max_length=max_tokens, stopping_criteria=stopping_criteria) # print('pad_token', self._tokenizer.pad_token) if self._tokenizer.pad_token_id is None: self._tokenizer.pad_token_id = self._tokenizer.eos_token_id torch.manual_seed(0) outputs = self._model.generate( **inputs, max_new_tokens=max_new_tokens, do_sample=do_sample, # pad_token=self._tokenizer.pad_token, pad_token_id=self._tokenizer.pad_token_id, # top_p=0.92, # top_k=0 ) out_str = self._tokenizer.decode(outputs[0]) # print('out_str', out_str) if "facebook/opt" in self.checkpoint: out_str = out_str[len("</s>") + len(prompt) :] elif "google/flan" in self.checkpoint: # print("full", out_str) out_str = out_str[len("<pad>") : out_str.index("</s>")] elif "PMC_LLAMA" in self.checkpoint: # print('here!', out_str) out_str = out_str[len("<unk>") + len(prompt) :] elif "llama_" in self.checkpoint: out_str = out_str[len("<s>") + len(prompt) :] else: out_str = out_str[len(prompt) :] if stop is not None and isinstance(stop, str) and stop in out_str: out_str = out_str[: out_str.index(stop)] pkl.dump(out_str, open(cache_file, "wb")) return out_str def _get_logit_for_target_token( self, prompt: str, target_token_str: str ) -> float: """Get logits target_token_str This is weird when token_output_ids represents multiple tokens It currently will only take the first token """ # Get first token id in target_token_str target_token_id = self._tokenizer(target_token_str)["input_ids"][0] # get prob of target token inputs = self._tokenizer( prompt, return_tensors="pt", return_attention_mask=True, padding=False, truncation=False, ).to(self._model.device) # shape is (batch_size, seq_len, vocab_size) logits = self._model(**inputs)["logits"].detach().cpu() # shape is (vocab_size,) probs_next_token = softmax(logits[0, -1, :].numpy().flatten()) return probs_next_token[target_token_id] @property def _identifying_params(self) -> Mapping[str, Any]: """Get the identifying parameters.""" return vars(self) @property def _llm_type(self) -> str: return "custom_hf_llm_for_langchain" return LLM_HF(checkpoint, seed)
def llm_openai(checkpoint='text-davinci-003', seed=1)
-
Expand source code
def llm_openai(checkpoint="text-davinci-003", seed=1): import openai class LLM_OpenAI: def __init__(self, checkpoint, seed): self.cache_dir = join( CACHE_DIR, "cache_openai", f'{checkpoint.replace("/", "_")}___{seed}' ) self.checkpoint = checkpoint @retry(wait=wait_fixed(LLM_REPEAT_DELAY), stop=stop_after_attempt(10)) def __call__(self, prompt: str, max_new_tokens=250, do_sample=True, stop=None): # cache os.makedirs(self.cache_dir, exist_ok=True) id_str = (prompt + str(max_new_tokens) + (stop or "") + checkpoint) hash_str = hashlib.sha256(id_str.encode()).hexdigest() cache_file = join( self.cache_dir, f"{hash_str}__num_tok={max_new_tokens}.pkl" ) if os.path.exists(cache_file): return pkl.load(open(cache_file, "rb")) response = openai.Completion.create( engine=self.checkpoint, prompt=prompt, max_tokens=max_new_tokens, temperature=0.1, top_p=1, frequency_penalty=0.25, # maximum is 2 presence_penalty=0, stop=stop, # stop=["101"] ) response_text = response["choices"][0]["text"] pkl.dump(response_text, open(cache_file, "wb")) return response_text return LLM_OpenAI(checkpoint, seed)
def llm_openai_chat(checkpoint='gpt-3.5-turbo', seed=1, role=None)
-
Expand source code
def llm_openai_chat(checkpoint="gpt-3.5-turbo", seed=1, role=None): class LLM_Chat: """Chat models take a different format: https://platform.openai.com/docs/guides/chat/introduction""" def __init__(self, checkpoint, seed, role): self.cache_dir = join( CACHE_DIR, "cache_openai", f'{checkpoint.replace("/", "_")}___{seed}' ) self.checkpoint = checkpoint self.role = role @retry(wait=wait_fixed(LLM_REPEAT_DELAY), stop=stop_after_attempt(10)) def __call__( self, prompts_list: List[Dict[str, str]], max_new_tokens=250, stop=None, ): """ prompts_list: list of dicts, each dict has keys 'role' and 'content' Example: [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Who won the world series in 2020?"}, {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, {"role": "user", "content": "Where was it played?"} ] prompts_list: str Alternatively, string which gets formatted into basic prompts_list: messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": <<<<<prompts_list>>>>}, ] """ if isinstance(prompts_list, str): role = self.role if role is None: role = "You are a helpful assistant." prompts_list = [ {"role": "system", "content": role}, {"role": "user", "content": prompts_list}, ] assert isinstance(prompts_list, list), prompts_list # cache os.makedirs(self.cache_dir, exist_ok=True) prompts_list_dict = { str(i): sorted(v.items()) for i, v in enumerate(prompts_list) } if not self.checkpoint == "gpt-3.5-turbo": prompts_list_dict["checkpoint"] = self.checkpoint dict_as_str = json.dumps(prompts_list_dict, sort_keys=True) id_str = (dict_as_str + str(max_new_tokens) + (stop or "") + checkpoint) hash_str = hashlib.sha256(id_str.encode()).hexdigest() cache_file = join( self.cache_dir, f"chat__{hash_str}__num_tok={max_new_tokens}.pkl", ) if os.path.exists(cache_file): print("cached!") return pkl.load(open(cache_file, "rb")) print("not cached") response = openai.ChatCompletion.create( model=self.checkpoint, messages=prompts_list, max_tokens=max_new_tokens, temperature=0.1, top_p=1, frequency_penalty=0.25, # maximum is 2 presence_penalty=0, stop=stop, # stop=["101"] )["choices"][0]["message"]["content"] pkl.dump(response, open(cache_file, "wb")) return response return LLM_Chat(checkpoint, seed, role)
Classes
class T5Tokenizer (*args, **kwargs)
-
Methods
def call(self, *args, **kwargs)
-
Expand source code
def call(self, *args, **kwargs): pass
class LlamaTokenizer (*args, **kwargs)
-
Methods
def call(self, *args, **kwargs)
-
Expand source code
def call(self, *args, **kwargs): pass