[SETTINGS]
folder_path = /path/to/your/folder
openai_api_key = your_openai_api_key
hotkey = your_preferred_hotkey
special_option = True or False
gpt_model = gpt-4
system_prompt = You are a knowledgeable job interview assistant that uses information from provided textual excerpts to provide impressive, but concise answers to interview questions.
temperature = 0.5
max_tokens = 1000
resume_title = resume
job_desc_title = description

#config.py 

import configparser
from colorama import init
import os

init(autoreset=True)

config_data = {}

def open_config():
    config = configparser.ConfigParser()

    if os.path.exists('config.ini'):
        config.read('config.ini')
    else:
        config['SETTINGS'] = {
            'folder_path': '',
            'openai_api_key': '',
            'hotkey': 'alt_r',
            'interview_mode': 'True',
            'gpt_model': 'gpt-4',
            'system_prompt': 'You are a knowledgeable job interview assistant that uses information from provided textual excerpts to provide impressive, but concise answers to interview questions.',
            'temperature': '0.5',
            'max_tokens': '1000',
            'resume_title': '',
            'job_desc_title': ''
        }
        with open('config.ini', 'w') as configfile:
            config.write(configfile)

    return config

def get_config(key):
    return config_data.get(key, None)
def configure_user_settings():
    config = open_config()

    folder_path = config.get('SETTINGS', 'folder_path')
    openai_api_key = config.get('SETTINGS', 'openai_api_key')
    hotkey = config.get('SETTINGS', 'hotkey')
    interview_mode = config.getboolean('SETTINGS', 'interview_mode')
    resume_title = config.get('SETTINGS', 'resume_title')
    job_desc_title = config.get('SETTINGS', 'job_desc_title')

    # Ask user for folder path
    if not folder_path:
        folder_path = input("Enter directory path for .txt documents: ")

    # Ask user for OpenAI API key
    if not openai_api_key:
        openai_api_key = input("Enter your OpenAI API key: ")

    # Ask user if they want to use the special interview option
    if interview_mode is False:
        response = input("Do you want to use interview mode? (y/n): ")
        interview_mode = True if response.lower() == 'y' else False

    # If special option is enabled, ask for resume and job description titles
    if interview_mode:
        if not resume_title:
            resume_title = input("Enter resume doc title (without the .txt): ")

        if not job_desc_title:
            job_desc_title = input("Enter job description doc title (without the .txt): "  + "\n")

    # Save user settings to config
    config['SETTINGS']['folder_path'] = folder_path
    config['SETTINGS']['openai_api_key'] = openai_api_key
    config['SETTINGS']['hotkey'] = hotkey
    config['SETTINGS']['interview_mode'] = str(interview_mode)
    config['SETTINGS']['resume_title'] = resume_title
    config['SETTINGS']['job_desc_title'] = job_desc_title

    config_data.update({
        'folder_path': folder_path,
        'openai_api_key': openai_api_key,
        'hotkey': hotkey,
        'resume_title': resume_title,
        'job_desc_title': job_desc_title,
        'interview_mode': interview_mode,
    })

    return folder_path, openai_api_key, hotkey, interview_mode, resume_title, job_desc_title


def configure_gpt_settings():
    config = open_config()

    gpt_model = config.get('SETTINGS', 'gpt_model')
    system_prompt = config.get('SETTINGS', 'system_prompt')
    temperature = config.getfloat('SETTINGS', 'temperature')
    max_tokens = config.getint('SETTINGS', 'max_tokens')

    # Save GPT settings to config
    config['SETTINGS']['gpt_model'] = gpt_model
    config['SETTINGS']['system_prompt'] = system_prompt
    config['SETTINGS']['temperature'] = str(temperature)
    config['SETTINGS']['max_tokens'] = str(max_tokens)

    config_data.update({
        'gpt_model': gpt_model,
        'system_prompt': system_prompt,
        'temperature': temperature,
        'max_tokens': max_tokens
    })

    return gpt_model, system_prompt, temperature, max_tokens

def configure_settings(**kwargs):
    config = open_config()
    user_settings = configure_user_settings()
    gpt_settings = configure_gpt_settings()

    # Update the specific setting if provided
    if kwargs:
        for key, value in kwargs.items():
            config['SETTINGS'][key] = value
            config_data[key] = value

    # Save the configuration to a file
    with open('config.ini', 'w') as configfile:
        config.write(configfile)

    config_data.update(dict(zip(
        ['folder_path', 'openai_api_key', 'hotkey', 'interview_mode', 'resume_title', 'job_desc_title',
         'gpt_model', 'system_prompt', 'temperature', 'max_tokens'],
        user_settings + gpt_settings
    )))

    return user_settings + gpt_settings

#openai_util.py

import concurrent.futures
import os
import re
import time
import warnings
from contextlib import asynccontextmanager
from typing import Dict, Tuple, List

import openai
import pandas as pd
import tiktoken
from colorama import Fore, Style
from scipy import spatial
from tqdm import tqdm

from config import configure_gpt_settings, get_config

configure_gpt_settings()

EMBEDDING_MODEL = "text-embedding-ada-002"
GPT_MODEL = get_config('gpt_model')
TEMPERATURE = get_config('temperature')
MAX_TOKENS = get_config('max_tokens')
SYSTEM_PROMPT = get_config('system_prompt')

MAX_LENGTH = 200
TOP_N = 3

tokenizer = tiktoken.get_encoding("cl100k_base")
warnings.filterwarnings('ignore')


def transcribe(audio_filepath) -> str:
    try:
        transcript = openai.Audio.transcribe(
            file=open(audio_filepath, "rb"),
            model="whisper-1",
            prompt="This is an audio recording of a professional, personable, and fluid conversation.",
        )
        return transcript["text"]
    except openai.error.OpenAIError as api_err:
        print(Style.BRIGHT + Fore.RED + "API Error:", api_err)
    except Exception as e:
        print(Style.BRIGHT + Fore.RED + "Error:", e)
    return ""


def remove_non_ascii(text: str) -> str:
    return ''.join(i for i in text if ord(i) < 128)

def transcribe_and_clean(mp3_filepath) -> str:
    transcription = transcribe(mp3_filepath)
    if transcription:
        cleaned_transcription = remove_non_ascii(transcription)
        return cleaned_transcription
    else:
        return "Transcription failed. Please try again."

def num_tokens(text: str, model: str = GPT_MODEL) -> int:
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))


def preprocess_text(text):
    text = re.sub(r'\n', ' ', text)

    text = re.sub(r'\s+', ' ', text)

    return text


def split_text(text, document_title):
    text = preprocess_text(text)

    tokens = tokenizer.encode(text)

    sections = []

    def decode_tokens(token_ids):
        return tokenizer.decode(token_ids).strip()

    processed_tokens = []
    current_section = {"title": document_title, "loc": "", "text": "", "tokens": 0}

    for token_id in tokens:
        processed_tokens.append(token_id)
        current_section["tokens"] += 1

        if len(processed_tokens) == 10:
            current_section["loc"] = decode_tokens(processed_tokens)

        if current_section["tokens"] >= MAX_LENGTH:
            current_section["text"] = decode_tokens(processed_tokens)
            sections.append(current_section)

            current_section = {"title": document_title, "loc": "", "text": "", "tokens": 0}
            processed_tokens = []

    if processed_tokens:
        current_section["text"] = decode_tokens(processed_tokens)
        sections.append(current_section)

    return sections


def get_embedding(text: str, model: str = EMBEDDING_MODEL, retry_limit=3, retry_delay=5) -> list[float]:
    for i in range(retry_limit):
        try:
            time.sleep(0.1)  # Wait for a tiny interval of time between each call
            result = openai.Embedding.create(
                model=model,
                input=text
            )
            return result["data"][0]["embedding"]
        except openai.error.RateLimitError:
            time.sleep(5)
        except openai.error.OpenAIError as e:
            print(f"Error: {e}")
            return None
        print(f"Retrying... (attempt {i + 1})")
        time.sleep(retry_delay)
    return None


def compute_doc_embeddings(df: pd.DataFrame, batch_size=3, num_workers=6) -> Dict[Tuple[str, str], List[float]]:
    embeddings = {}

    def process_batch(batches: pd.DataFrame) -> Dict[Tuple[str, str], List[float]]:
        batch_embeddings = {}
        texts = [r.text for idx, r in batches.iterrows()]
        for j, text in enumerate(texts):
            embedding = get_embedding(text)
            if embedding is None:
                print("Failed to compute embedding for document with index:", batches.index[j])
            else:
                batch_embeddings[batches.index[j]] = embedding
        return batch_embeddings

    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = []
        for i in range(0, len(df), batch_size):
            batch = df.iloc[i:i + batch_size]
            futures.append(executor.submit(process_batch, batch))

        # Add desc parameter to tqdm to display custom text
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc='Creating Document '
                                                                                              'Embeddings'):
            embeddings.update(future.result())

    return embeddings


def embed_documents(folder_path):
    dfs = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            with open(os.path.join(folder_path, filename), 'r') as f:
                original = f.read()
                original_title = os.path.splitext(filename)[0]
                sections = split_text(original, original_title)
                df = pd.DataFrame(sections)
                dfs.append(df)

    # Concatenate all dataframes in dfs
    combined_df = pd.concat(dfs, ignore_index=True)

    # Compute embeddings for the combined dataframe
    combined_df['embeddings'] = compute_doc_embeddings(combined_df)

    return combined_df


def strings_ranked_by_relatedness(query: str, df: pd.DataFrame,
                                  relatedness_fn=lambda x, y: 1 - spatial.distance.cosine(x, y),
                                  top_n: int = TOP_N) -> pd.DataFrame:
    query_embedding_response = openai.Embedding.create(
        model=EMBEDDING_MODEL,
        input=query,
    )
    query_embedding = query_embedding_response["data"][0]["embedding"]

    df['relatedness'] = df['embeddings'].apply(lambda x: relatedness_fn(query_embedding, x))
    sorted_df = df.sort_values(by='relatedness', ascending=False).head(top_n)
    return sorted_df


def query_message(query: str, df: pd.DataFrame) -> str:
    resume_title = get_config('resume_title')
    job_desc_title = get_config('job_desc_title')

    introduction = ('Use the textual excerpts to provide detailed, bullet point answers for the subsequent question. '
                    'If the answer cannot be found in the provided text, do your best to provide the most rational and  '
                    'comprehensive response. The response should be able to be seamlessly used to quickly answer the question.'
                    'Be as succinct as possible.')
    question = query

    message = introduction
    full_message = introduction

    docs_used = []

    if get_config("special_option"):
        # Get the most relevant section from the resume
        resume_section = strings_ranked_by_relatedness(query, df[df['title'] == resume_title]).iloc[0]
        docs_used.append((resume_section["title"], resume_section["loc"]))

        # Get the most relevant section from the job description
        job_desc_section = strings_ranked_by_relatedness(query, df[df['title'] == job_desc_title]).iloc[0]
        docs_used.append((job_desc_section["title"], job_desc_section["loc"]))

        # For the third section, sort the dataframe by relevance excluding already used docs and pick the top section
        third_section_df = df[~df['loc'].isin([resume_section["loc"], job_desc_section["loc"]])]
        third_section = strings_ranked_by_relatedness(query, third_section_df).iloc[0]
        docs_used.append((third_section["title"], third_section["loc"]))
    else:
        # If the special option isn't enabled, just pick the top 3 most relevant sections
        docs_used.extend(
            [(row["title"], row["loc"]) for _, row in strings_ranked_by_relatedness(query, df).head(3).iterrows()])

    for title, loc in docs_used:
        doc_info = f'\n\nTitle: {title}'
        section_text = df[(df['title'] == title) & (df['loc'] == loc)]['text'].iloc[0]
        next_article = doc_info + f'\nTextual excerpt section:\n"""\n{section_text}\n"""'
        message += doc_info
        full_message += next_article

    full_message += question
    return message, full_message, docs_used

@asynccontextmanager
async def async_chat_completion(*args, **kwargs):
    chat_completion = await openai.ChatCompletion.acreate(*args, **kwargs)
    try:
        yield chat_completion
    finally:
        await chat_completion.aclose()

async def ask(transcription, df, interruption_event) -> str:

    if interruption_event.is_set():
        return

    print(Fore.CYAN + "\n──────────────────────────────────────────────────────────────────────────")
    print(Style.BRIGHT + Fore.BLUE + "Question:" + "\n" + Style.NORMAL + Fore.RESET + f"{transcription}")
    print(Style.BRIGHT + Fore.MAGENTA + "\n" + "AI Response:")
    max_tokens = MAX_TOKENS
    temperature = TEMPERATURE
    model = GPT_MODEL
    max_tokens - num_tokens(transcription, model=model)

    message, full_message, docs_used = query_message(transcription, df)

    max_tokens = max_tokens - num_tokens(transcription + full_message, model=model)
    messages = [
        {"role": "system",
         "content": SYSTEM_PROMPT},
        {"role": "user", "content": full_message},
    ]

    response_content = ""
    async with async_chat_completion(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=True,
    ) as chat_completion:
        try:
            async for chunk in chat_completion:
                # Check for interruption after each response chunk
                if interruption_event.is_set():
                    return

                content = chunk["choices"][0].get("delta", {}).get("content", "")
                if content is not None:
                    print(content, end='')
                response_content += content

        except RuntimeError as e:
            if 'asynchronous generator is already running' in str(e):
                # This is the error we expect when interrupted.
                print("Generator was interrupted.")
            else:
                raise

    print(Fore.CYAN + "\n──────────────────────────────────────────────────────────────────────────")
    print(Fore.LIGHTGREEN_EX + "\nPress and hold the hotkey again to record another segment.")

#gui_itil.py

import os
from colorama import Fore, Style
from art import *
from config import configure_settings,get_config
from openai_util import embed_documents

def clear_screen():
    os.system('cls' if os.name == 'nt' else 'clear')

def display_intro():
    clear_screen()

    # Generate the ASCII art text with 'slant' font
    ascii_art = text2art("Hinterview", "slant")

    # Print the ANSI escape codes for bright cyan color
    print(Style.BRIGHT + Fore.CYAN, end="")

    # Replace both the '/' and '_' characters with the desired colors
    colored_ascii_art = ascii_art.replace("/", Fore.GREEN + "/" + Fore.CYAN)
    colored_ascii_art = colored_ascii_art.replace("_", Fore.GREEN + "_" + Fore.CYAN)

    # Print the generated ASCII art with the desired colors
    print(colored_ascii_art)
    print(Fore.CYAN + "──────────────────────────────────────────────────────────────────────────")
    configure_settings()
    folder_path = get_config('folder_path')
    print("\nCurrent directory path:" + Fore.LIGHTGREEN_EX + Style.BRIGHT + f"{folder_path}\n")



def display_initial_menu():
    print(Fore.YELLOW + "1. Continue to Program")
    print(Fore.YELLOW + "2. Open Settings Menu")
    choice = input(Fore.GREEN + "Please select an option (1-2): ")
    return choice

def display_settings_menu():
    clear_screen()
    print(Fore.CYAN + "──────────────────────────────────────────────────────────────────────────")
    print(Style.BRIGHT + Fore.GREEN + "                          SETTINGS")
    print(Fore.YELLOW + "1. Folder Path")
    print(Fore.YELLOW + "2. OpenAI API Key")
    print(Fore.YELLOW + "3. Hotkey")
    print(Fore.YELLOW + "4. Interview Mode")
    print(Fore.YELLOW + "5. GPT Model")
    print(Fore.YELLOW + "6. System Prompt")
    print(Fore.YELLOW + "7. Temperature")
    print(Fore.YELLOW + "8. Max Tokens")
    print(Fore.YELLOW + "9. Resume Title")
    print(Fore.YELLOW + "10. Job Description Title")
    print(Fore.CYAN + "──────────────────────────────────────────────────────────────────────────")
    print(Fore.GREEN + "0. Return to Main Menu")
    choice = input(Fore.LIGHTGREEN_EX + "Please select an option (0-10): ")
    return choice

def handle_settings_menu():
    while True:
        choice = display_settings_menu()
        if choice == '0':
            display_intro()
            break
        elif choice in ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'):
            settings_options = {
                '1': ('Enter the new folder path: ', 'folder_path'),
                '2': ('Enter the new OpenAI API Key: ', 'openai_api_key'),
                '3': ('Enter the new hotkey: ', 'hotkey'),
                '4': ('Enter the new special option value: ', 'special_option'),
                '5': ('Enter the new GPT model: ', 'gpt_model'),
                '6': ('Enter the new system prompt: ', 'system_prompt'),
                '7': ('Enter the new temperature value: ', 'temperature'),
                '8': ('Enter the new max tokens value: ', 'max_tokens'),
                '9': ('Enter the new resume title: ', 'resume_title'),
                '10': ('Enter the new job description title: ', 'job_description_title'),
            }
            prompt, setting_name = settings_options[choice]
            new_value = input(Fore.GREEN + prompt)
            configure_settings(**{setting_name: new_value})
            print(Fore.GREEN + "Setting updated successfully!")
            clear_screen()
        else:
            print(Fore.RED + "Invalid choice. Please try again.")

def display_instructions():
    print("\nPress and hold the hotkey (default: Option) to record a segment of your interview.")
    print("Release the key to stop recording and get insights.")


def display_recording():
    print(Fore.CYAN + "\n──────────────────────────────────────────────────────────────────────────")
    print(Fore.YELLOW + "\n[STATUS] Recording...")


def display_transcribing():
    print(Fore.BLUE + "[STATUS] Transcribing...")


def display_processing():
    print(Fore.MAGENTA + "[STATUS] Fetching AI Response...")


def display_error(error_message):
    print(Fore.CYAN + "\n──────────────────────────────────────────────────────────────────────────")
    print(Fore.RED + "\nError:", error_message)

def primary_gui():
    display_intro()

    while True:
        choice = display_initial_menu()

        if choice == '1':
            print(Fore.GREEN + "Continuing to the Program...\n")
            break
        elif choice == '2':
            handle_settings_menu()
        else:
            print(Fore.RED + "Invalid choice. Please try again.")
    FOLDER_PATH = get_config("folder_path")
    df = embed_documents(FOLDER_PATH)

    display_instructions()

    return df

from IPython.display import Image, display, Markdown

images_and_explanations = [
    ("https://res.cloudinary.com/dn9bcrimg/image/upload/v1692908956/intro_r9agpc.png", "Intro Header & Options"),
    ("https://res.cloudinary.com/dn9bcrimg/image/upload/v1692908956/settings_xw6jyi.png", "Settings Page"),
    ("https://res.cloudinary.com/dn9bcrimg/image/upload/v1692908956/continue_gzewmt.png", "Continuing to Program"),
    ("https://res.cloudinary.com/dn9bcrimg/image/upload/v1692908956/response_bk1qcp.png", "Use of Program"),
]

for url, explanation in images_and_explanations:
    display(Image(url=url, width=600)) # Adjust the width as needed
    display(Markdown(f"**{explanation}**"))

#helper.py

import threading
import asyncio
from config import get_config, configure_user_settings
from gui_util import display_recording, display_transcribing, display_processing, \
    clear_screen, primary_gui
import pyaudio
from pydub import AudioSegment
from pynput import keyboard
from colorama import init, Fore
from openai_util import transcribe_and_clean, ask

init(autoreset=True)

df = primary_gui()

HOTKEY = get_config("hotkey")
FOLDER_PATH = get_config("folder_path")

FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
MP3_OUTPUT_FILENAME = "temp.mp3"
DEVICE_INDEX = 1  # Index for BlackHole 2ch
audio = pyaudio.PyAudio()

recording_event = threading.Event()
interruption_event = threading.Event()

def record_audio():

    frames = []
    display_recording()

    try:
        stream = audio.open(format=FORMAT, channels=CHANNELS,
                            rate=RATE, input=True, input_device_index=DEVICE_INDEX,
                            frames_per_buffer=CHUNK)

        while recording_event.is_set():
            data = stream.read(CHUNK)
            frames.append(data)

        stream.stop_stream()
        stream.close()

        audio_segment = AudioSegment(
            data=b''.join(frames),
            sample_width=audio.get_sample_size(FORMAT),
            frame_rate=RATE,
            channels=CHANNELS
        )
        audio_segment.export(MP3_OUTPUT_FILENAME, format="mp3", bitrate="64k")

        display_transcribing()
        transcription_result = transcribe_and_clean(MP3_OUTPUT_FILENAME)

        if transcription_result != "Transcription failed. Please try again.":
            if not interruption_event.is_set():  # Only process if not interrupted
                display_processing()
                asyncio.run(ask(transcription_result, df, interruption_event))
        else:
            print(Fore.RED + transcription_result)
    finally:  # Display the error message
        frames.clear()

def on_press(key):
    if key == getattr(keyboard.Key, HOTKEY) and not recording_event.is_set():
        clear_screen()
        recording_event.set()
        threading.Thread(target=record_audio).start()
    interruption_event.set()

def on_release(key):
    if key == getattr(keyboard.Key, HOTKEY) and recording_event.is_set():
        recording_event.clear()
        interruption_event.clear()

#main.py

from helper import *

def main():

    with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
        listener.join()

if __name__ == "__main__":
    main()

Hinterview - AI Interview Assistant¶

Features¶

Config¶

OpenAI Utilities¶

Various Additional Helper Functions¶

Main¶