## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align = "center"
)
library(pairwiseLLM)
library(dplyr)
library(readr)
library(tidyr)
library(stringr)
library(knitr)

## -----------------------------------------------------------------------------
td <- trait_description("overall_quality")
td

## -----------------------------------------------------------------------------
data("example_writing_samples", package = "pairwiseLLM")

# Inspect the structure
glimpse(example_writing_samples)

# Print the 20 samples (full text)
example_writing_samples |>
  kable(
    caption = "20 example writing samples included with pairwiseLLM."
  )

## -----------------------------------------------------------------------------
template_ids <- paste0("test", 1:5)
template_ids

## -----------------------------------------------------------------------------
cat(substr(get_prompt_template("test1"), 1, 500), "...\n")

## ----eval = FALSE-------------------------------------------------------------
# # Retrieve another template
# tmpl_test3 <- get_prompt_template("test3")
# 
# # Use it to build a concrete prompt for a single comparison
# pairs <- example_writing_samples |>
#   make_pairs() |>
#   head(1)
# 
# prompt_text <- build_prompt(
#   template   = tmpl_test3,
#   trait_name = td$name,
#   trait_desc = td$description,
#   text1      = pairs$text1[1],
#   text2      = pairs$text2[1]
# )
# 
# cat(prompt_text)

## -----------------------------------------------------------------------------
pairs_all <- example_writing_samples |>
  make_pairs()

pairs_forward <- pairs_all |>
  alternate_pair_order()

pairs_reverse <- sample_reverse_pairs(
  pairs_forward,
  reverse_pct = 1.0,
  seed        = 2002
)

pairs_forward[1:3, c("ID1", "ID2")]
pairs_reverse[1:3, c("ID1", "ID2")]

## -----------------------------------------------------------------------------
summary_path <- system.file("extdata", "template_test_summary_all.csv", package = "pairwiseLLM")
if (!nzchar(summary_path)) stop("Data file not found in installed package.")

summary_tbl <- readr::read_csv(summary_path, show_col_types = FALSE)
head(summary_tbl)

## -----------------------------------------------------------------------------
cat(get_prompt_template("test1"))

## -----------------------------------------------------------------------------
summary_tbl |>
  filter(template_id == "test1") |>
  arrange(backend, model, thinking) |>
  mutate(
    Prop_Consistent = round(prop_consistent, 3),
    Prop_SAMPLE_1   = round(prop_pos1, 3),
    Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3)
  ) |>
  select(
    Backend = backend,
    Model = model,
    Thinking = thinking,
    Prop_Consistent,
    Prop_SAMPLE_1,
    Binomial_Test_p
  ) |>
  kable(
    align = c("l", "l", "l", "r", "r", "r")
  )

## -----------------------------------------------------------------------------
cat(get_prompt_template("test2"))

## -----------------------------------------------------------------------------
summary_tbl |>
  filter(template_id == "test2") |>
  arrange(backend, model, thinking) |>
  mutate(
    Prop_Consistent = round(prop_consistent, 3),
    Prop_SAMPLE_1   = round(prop_pos1, 3),
    Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3)
  ) |>
  select(
    Backend = backend,
    Model = model,
    Thinking = thinking,
    Prop_Consistent,
    Prop_SAMPLE_1,
    Binomial_Test_p
  ) |>
  kable(
    align = c("l", "l", "l", "r", "r", "r")
  )

## -----------------------------------------------------------------------------
cat(get_prompt_template("test3"))

## -----------------------------------------------------------------------------
summary_tbl |>
  filter(template_id == "test3") |>
  arrange(backend, model, thinking) |>
  mutate(
    Prop_Consistent = round(prop_consistent, 3),
    Prop_SAMPLE_1   = round(prop_pos1, 3),
    Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3)
  ) |>
  select(
    Backend = backend,
    Model = model,
    Thinking = thinking,
    Prop_Consistent,
    Prop_SAMPLE_1,
    Binomial_Test_p
  ) |>
  kable(
    align = c("l", "l", "l", "r", "r", "r")
  )

## -----------------------------------------------------------------------------
cat(get_prompt_template("test4"))

## -----------------------------------------------------------------------------
summary_tbl |>
  filter(template_id == "test4") |>
  arrange(backend, model, thinking) |>
  mutate(
    Prop_Consistent = round(prop_consistent, 3),
    Prop_SAMPLE_1   = round(prop_pos1, 3),
    Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3)
  ) |>
  select(
    Backend = backend,
    Model = model,
    Thinking = thinking,
    Prop_Consistent,
    Prop_SAMPLE_1,
    Binomial_Test_p
  ) |>
  kable(
    align = c("l", "l", "l", "r", "r", "r")
  )

## -----------------------------------------------------------------------------
cat(get_prompt_template("test5"))

## -----------------------------------------------------------------------------
summary_tbl |>
  filter(template_id == "test5") |>
  arrange(backend, model, thinking) |>
  mutate(
    Prop_Consistent = round(prop_consistent, 3),
    Prop_SAMPLE_1   = round(prop_pos1, 3),
    Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3)
  ) |>
  select(
    Backend = backend,
    Model = model,
    Thinking = thinking,
    Prop_Consistent,
    Prop_SAMPLE_1,
    Binomial_Test_p
  ) |>
  kable(
    align = c("l", "l", "l", "r", "r", "r")
  )

## -----------------------------------------------------------------------------
summary_tbl |>
  filter(backend == "anthropic") |>
  arrange(template_id, model, thinking) |>
  mutate(
    Prop_Consistent = round(prop_consistent, 3),
    Prop_SAMPLE_1   = round(prop_pos1, 3),
    Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3)
  ) |>
  select(
    Template = template_id,
    Model    = model,
    Thinking = thinking,
    Prop_Consistent,
    Prop_SAMPLE_1,
    Binomial_Test_p
  ) |>
  kable(
    caption = "Anthropic: Positional-bias summary by template, model, and thinking configuration.",
    align = c("l", "l", "l", "r", "r", "r")
  )

## -----------------------------------------------------------------------------
summary_tbl |>
  filter(backend == "gemini") |>
  arrange(template_id, model, thinking) |>
  mutate(
    Prop_Consistent = round(prop_consistent, 3),
    Prop_SAMPLE_1   = round(prop_pos1, 3),
    Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3)
  ) |>
  select(
    Template = template_id,
    Model    = model,
    Thinking = thinking,
    Prop_Consistent,
    Prop_SAMPLE_1,
    Binomial_Test_p
  ) |>
  kable(
    caption = "Gemini: Positional-bias summary by template, model, and thinking configuration.",
    align = c("l", "l", "l", "r", "r", "r")
  )

## -----------------------------------------------------------------------------
summary_tbl |>
  filter(backend == "openai") |>
  arrange(template_id, model, thinking) |>
  mutate(
    Prop_Consistent = round(prop_consistent, 3),
    Prop_SAMPLE_1   = round(prop_pos1, 3),
    Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3)
  ) |>
  select(
    Template = template_id,
    Model    = model,
    Thinking = thinking,
    Prop_Consistent,
    Prop_SAMPLE_1,
    Binomial_Test_p
  ) |>
  kable(
    caption = "OpenAI: Positional-bias summary by template, model, and thinking configuration.",
    align = c("l", "l", "l", "r", "r", "r")
  )

## -----------------------------------------------------------------------------
summary_tbl |>
  filter(backend == "together") |>
  arrange(template_id, model, thinking) |>
  mutate(
    Prop_Consistent = round(prop_consistent, 3),
    Prop_SAMPLE_1   = round(prop_pos1, 3),
    Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3)
  ) |>
  select(
    Template = template_id,
    Model    = model,
    Thinking = thinking,
    Prop_Consistent,
    Prop_SAMPLE_1,
    Binomial_Test_p
  ) |>
  kable(
    caption = "TogetherAI: Positional-bias summary by template, model, and thinking configuration.",
    align = c("l", "l", "l", "r", "r", "r")
  )

