config.yaml

PATH:
  INPUT: "/kaggle/input/"
  OUTPUT: "./output"
  DEFAULT_PROMPTS: "./prompts" # the baseline prompt folder that Augmentoolkit falls back to if it can't find a step in the PROMPTS path
  PROMPTS: "./prompts" # Where Augmentoolkit first looks for prompts
API:
  API_KEY: "DUMMY" # Add the API key for your favorite provider here
  BASE_URL: "http://127.0.0.1:11434/v1/" # add the base url for a provider, or local server, here. Some possible values:  http://127.0.0.1:5000/ # <- local models. # https://api.together.xyz # <- together.ai, which is real cheap, real flexible, and real high-quality, if a tad unreliable. # https://api.openai.com/v1/ # <- OpenAI. Will bankrupt you very fast. # anything else that accepts OAI-style requests, so basically any API out there (openrouter, fireworks, etc etc etc...)
  LOGICAL_MODEL: "codegemma" # model used for everything except conversation generation at the very end
  LARGE_LOGICAL_MODEL: "codegemma" # model used for question generation and conversation generation at the very end. A pretty tough task, if ASSISTANT_MODE isn't on.
  QUANTIZATION_SMALL: "gptq" # Only use if Aphrodite mode is on.
  QUANTIZATION_LARGE: "gptq" # Only use if Aphrodite mode is on.
SKIP:
  QUESTION_CHECK: False
  ANSWER_RELEVANCY_CHECK: False # turn on if using the negative question prompt override
  FILTER_CHUNKS: False
SYSTEM:
  CHUNK_SIZE: 1000
  USE_FILENAMES: False # give the AI context from the filenames provided to it. Useful if the filenames are meaningful, otherwise turn them off.
  DOUBLE_CHECK_COUNTER: 1 # How many times to check a question and answer pair during each validation step. Majority vote decides if it passes that step. There are three steps. So most questions are by default checked around 9 times (fewer if the first two checks for a step pass, obviously).
  SUBSET_SIZE: 10
  USE_SUBSET: False # Whether to take only the first 13 chunks from a text during the run. Useful for experimenting and iterating and seeing all the steps without costing too much money or time.
  CONCURRENCY_LIMIT: 50 # Hard limit of how many calls can be run at the same time, useful for API mode (aphrodite automatically manages this and queues things, as far as I know)
  COMPLETION_MODE: False # Change to false if you want to use chat (instruct) mode; this requires .json files in your chosen prompts directory, in the OpenAI API format. Not all APIs support completion mode.
  MODE: "api" # can be one of "api"|"aphrodite"
  STOP: True # True = Use stop tokens, False = do not use stop tokens. OpenAI's API restricts you to four stop tokens and all steps have way more than four stop tokens, so you'll need to turn this to False if you're using OAI's API. Also NOTE that if you turn this OFF while using COMPLETION MODE, EVERYTHING WILL BREAK and it will cost you money in the process. Don't do that.
  CONVERSATION_INSTRUCTIONS: For this conversation, you are generating a chat between a generalist, generic AI assistant, and a human.
  FINAL_ASSISTANT_PROMPT_NO_RAG: |
   You are a helpful AI assistant.
  FINAL_ASSISTANT_PROMPT_RAG: |
   You are a helpful AI assistant.

   Context information is below:
   
   ----------------------
   {data}
PHASE:
  WORK_IN_PHASES: False
  PHASE_INDEX: 3 # index of the phase we are currently on (index 0 = filtering out chunks with no relevant context; index 1 = question generation; index 2 = question validation; index 3 = context revision and conversation generation, the final phase)
HUGGINGFACE:
  HUB_PATH: "Heralax/test-atk-dataset-do-not-use-3"
  PRIVATE: false
  PUSH_TO_HUB: false


# May be much more efficient to rent H100s for large-scale inference than A100s