Module imodelsx.augtree.llm
Expand source code
import re
from typing import List
import numpy as np
import os.path
from os.path import join
import pickle as pkl
def expand_keyword(
keyphrase_str: str = 'bad',
llm_prompt_context: str = '', # ' in the context of movie reviews',
cache_dir: str=None,
seed: int=0,
verbose: bool=False,
):
"""Refine a keyphrase by making a call to gpt-3
"""
# check cache
if cache_dir is not None:
if llm_prompt_context == '':
cache_dir = join(cache_dir, 'base')
else:
cache_dir = join(cache_dir, ''.join(llm_prompt_context.split()))
os.makedirs(cache_dir, exist_ok=True)
keyphrase_str = keyphrase_str.replace('/', ' ')
cache_file = join(cache_dir, f'_{keyphrase_str}___{seed}.pkl')
cache_raw_file = join(cache_dir, f'raw_{keyphrase_str}___{seed}.pkl')
if os.path.exists(cache_file):
if verbose:
print('cached!')
return pkl.load(open(cache_file, 'rb'))
prompt = f'Generate 100 concise phrases that are very similar to the keyphrase{llm_prompt_context}:\n'
prompt += f'Keyphrase: "{keyphrase_str}"\n'
prompt += '1.'
import openai
response = openai.Completion.create(
engine="text-davinci-003",
prompt=prompt,
max_tokens=1000,
temperature=0.1,
top_p=1,
frequency_penalty=0.25, # maximum is 2
presence_penalty=0,
stop=["101"]
)
response_text = response['choices'][0]['text']
ks = convert_response_to_keywords(response_text)
if cache_dir is not None:
pkl.dump(response_text, open(cache_raw_file, 'wb'))
pkl.dump(ks, open(cache_file, 'wb'))
return ks
def convert_response_to_keywords(response_text: str, remove_duplicates=True) -> List[str]:
# clean up the keyphrases
# (split the string s on any numeric character)
ks = [
k.replace('.', '').replace('"', '').strip()
for k in re.split(r'\d', response_text) if k.strip()
]
# lowercase & len > 2
ks = [k.lower() for k in ks if len(k) > 2]
if remove_duplicates:
ks = list(set(ks))
return ks
if __name__ == '__main__':
refined_keyphrase = expand_keyword(
keyphrase_str='koala',
# task_context_str=' about movie reviews'
)
print('Refined keyphrase:', refined_keyphrase)
Functions
def convert_response_to_keywords(response_text: str, remove_duplicates=True) ‑> List[str]
-
Expand source code
def convert_response_to_keywords(response_text: str, remove_duplicates=True) -> List[str]: # clean up the keyphrases # (split the string s on any numeric character) ks = [ k.replace('.', '').replace('"', '').strip() for k in re.split(r'\d', response_text) if k.strip() ] # lowercase & len > 2 ks = [k.lower() for k in ks if len(k) > 2] if remove_duplicates: ks = list(set(ks)) return ks
def expand_keyword(keyphrase_str: str = 'bad', llm_prompt_context: str = '', cache_dir: str = None, seed: int = 0, verbose: bool = False)
-
Refine a keyphrase by making a call to gpt-3
Expand source code
def expand_keyword( keyphrase_str: str = 'bad', llm_prompt_context: str = '', # ' in the context of movie reviews', cache_dir: str=None, seed: int=0, verbose: bool=False, ): """Refine a keyphrase by making a call to gpt-3 """ # check cache if cache_dir is not None: if llm_prompt_context == '': cache_dir = join(cache_dir, 'base') else: cache_dir = join(cache_dir, ''.join(llm_prompt_context.split())) os.makedirs(cache_dir, exist_ok=True) keyphrase_str = keyphrase_str.replace('/', ' ') cache_file = join(cache_dir, f'_{keyphrase_str}___{seed}.pkl') cache_raw_file = join(cache_dir, f'raw_{keyphrase_str}___{seed}.pkl') if os.path.exists(cache_file): if verbose: print('cached!') return pkl.load(open(cache_file, 'rb')) prompt = f'Generate 100 concise phrases that are very similar to the keyphrase{llm_prompt_context}:\n' prompt += f'Keyphrase: "{keyphrase_str}"\n' prompt += '1.' import openai response = openai.Completion.create( engine="text-davinci-003", prompt=prompt, max_tokens=1000, temperature=0.1, top_p=1, frequency_penalty=0.25, # maximum is 2 presence_penalty=0, stop=["101"] ) response_text = response['choices'][0]['text'] ks = convert_response_to_keywords(response_text) if cache_dir is not None: pkl.dump(response_text, open(cache_raw_file, 'wb')) pkl.dump(ks, open(cache_file, 'wb')) return ks