####Set Environment####
    if(!require(tidyverse)){
      install.packages("tidyverse")
    }

    if(!require(Rtsne)){
      install.packages("Rtsne")
    }

    if(!require(shiny)){
      install.packages("shiny")
    }

    if(!require(shinydashboard)){
      install.packages("shinydashboard")
    }

    if(!require(tidyverse)){
      install.packages("tidyverse")
    }

    if(!require(tidyverse)){
      install.packages("DT")
    }

    library(Rtsne)
    library(tidyverse)
    library(shiny)
    library(shinydashboard)
    library(lsa)
    library(DT)
    
    if(.Platform$GUI == "RStudio"){
      setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
    }else if(.Platform$GUI == "Rgui"){
      setwd(getSrcDirectory()[1])
    }
    
    student_indecies = c(1:63,100:106)
    
####Functions####
    subset_data<-function(user_input){
      if(user_input == "Students"){
        subset = simp_dat[-c(64:99),]
      }else if(user_input=="Resources"){
        subset = simp_dat[-c(1:63,100:106),]
      }else{
        subset = simp_dat
      }
      return(subset)
    }
    
    rank_readings<-function(cosine_matrix
                            ,corpus_indecies,
                            search_indecies=-corpus_indecies){
      corpus_to_search_cosines = cosine_matrix[corpus_indecies,search_indecies]
      reading_importance = apply(corpus_to_search_cosines,MARGIN = 1, mean)
      return(reading_importance)
    }

####Read in Data and run TSNE####

    class_data = read.csv("Class_Content.csv")
    
  #Numpy uses Row major, R uses column major. Always learning fun things. Thus, the strange transpose on import
    openai_embeddings = as.data.frame(t(read.csv("openai_embeddings.csv",header = FALSE)))
    rownames(openai_embeddings) = rownames(class_data)
    
    tSNE_vars=Rtsne(openai_embeddings)
    full_data = cbind.data.frame(class_data,tSNE_vars$Y)
    names(full_data) = c("Description", "Type", "User",
                         "Content", "TSNE1","TSNE2")
    full_data = cbind.data.frame(full_data,openai_embeddings)
    
    simp_dat = full_data[,1:6]
    
####Cosine Similarity Matrix####
    doc_similarity = cosine(t(openai_embeddings))
    rownames(doc_similarity) = paste(simp_dat$User,simp_dat$Description,sep = " ")

####Calculate Influential Documents (average)####
    class_readings = rank_readings(doc_similarity,
                                   corpus_indecies = -student_indecies,
                                   search_indecies = student_indecies)
    
####Calculate Influential Documents (by student)####
    student_readings = list()
    j = 1
    for(i in 1:length(unique(simp_dat$User))){
      this_indecies = which(simp_dat$User==unique(simp_dat$User)[i])
      if(length(this_indecies)>1){
        important_readings = rank_readings(doc_similarity,
                                           corpus_indecies = -student_indecies,
                                           search_indecies = this_indecies)
        student_readings[[j]]<-important_readings
        names(student_readings)[j]  <-unique(simp_dat$User)[i]
        j = j+1
      }
    }
    

    
####UI####
###Dashboard Overview Page###
    ui <- 
    dashboardPage(
      dashboardHeader(title="P633 DLA"),
      dashboardSidebar(
        sidebarMenu(
          menuItem("Dashboard",tabName = "dashboard",icon = icon("dashboard")),
          menuItem("Source Code",tabName = "code", icon = icon("code"))
        )
      ),
      dashboardBody(
        tabItems(
          tabItem(tabName ="dashboard",
                    fluidRow(
                      box(title = "The Topography of Your Class",solidHeader = TRUE, collapsible = TRUE, status = "primary",
                          shiny::h3("The graph below is a 'map' of the content of your class, including student artifacts and cirricular material. By reading all of the content of your course, our AI was able to organize
                                   the content so that similar items are closer together. You can think of this graph as a 'overhead map' of your course. 
                                   The exact orientiaion (e.g North/South, East/West) is not important, just the distance between content."),width=12)
                    ),
                    fluidRow(
                      box(
                          title="Graph Controls", solidHeader = TRUE, status = "warning", width=3,
                          selectInput("subset",
                                      "Subset:",
                                      c("All","Students","Resources"),
                                      selected="All"),
                          varSelectInput("text", 
                                         "Text:", 
                                         simp_dat[,1:3],
                                         selected="User"),
                          varSelectInput("color","Color:", 
                                         simp_dat[,1:3],
                                         selected="Type"),
                          sliderInput("textsize",
                                      "Text Size:",
                                      min=.5,
                                      max=6,
                                      step=.1,
                                      value=3.5),
                          shiny::h3("What Insights Could I Gain From this Graph?"),
                          shiny::h4("We're glad you asked! The exact intepretation is going to depend on your context, but you could start by asking yourself a few question. 
                                   For example, you may notice that some cirricular material is near related assessment material (e.g., an exam is next to the textbook chapter that talks about it, 
                                   or student essays are near the book they are writing about). Is there any cirricular material that isn't connected to assessment material? If so, how is it connected to the rest of the course?
                                   What purpose does it serve?"),
                          shiny::h4("We'll give you one more example. If students wrote essays in your course, you may see that certain students essays cluster together. What is the shared experience of these students? Do you see multiple clusters?
                                   If so, do the experiences of the students in these two clusters differ? Are you suporting both clusters of students?")
                          ),
                      box(
                          title="Class Content",solidHeader = TRUE, status="primary",width=9,collapsible = TRUE,
                          plotOutput("data",
                                 height="800px",
                                 click = "plot1_click")
                          )
                    ),
                    fluidRow(
                      box(title="Raw Data", solidHeader = TRUE, status = "danger",collapsible = TRUE,
                          shiny::h4("Click on a data point on the graph to view the raw data for that point."),
                        verbatimTextOutput("click_info"),width=12)
                    ),
                    fluidRow(
                      box(title = "What material are my students using?",solidHeader = TRUE, status = "primary", width = 12, collapsible = TRUE,
                        shiny::h4("It may be helpful to look at specific content in more detail. This table gives each piece of class content a score based on how relevent 
                                 it was to students (on average) when they completed assignment. Scores can range anywhere from 10 to -10, but it's pretty common for all of the material 
                                 to fit in a range between 9 and 6, with 9 suggesting material was very important, and 6 suggesting it wasn't as important."),
                        DT::dataTableOutput("readings_table")),
                    ),
                    fluidRow(
                      box(title = "Drill down by student",soliderHeader=TRUE, status = "primary", width = 12, collapsible = TRUE, background = "light-blue",
                        shiny::h4("This table below breaks down the information in the above table even more, scoring content based on it's relevence to each student."),
                        DT::dataTableOutput("readings_table_details")
                      )
                    )),
          tabItem(tabName = "code", fluidRow(h3("Raw Code")))
        )
      )
    )
    

####Server####
    server <- function(input,output){
      
      df_subset <-reactive({
        a <- subset_data(input$subset)
        return(a)
      })
      
      output$data <- renderPlot({
        ggplot(df_subset(),aes(x=TSNE1,y=TSNE2,label=!!input$text))+
          geom_point(aes(color=!!input$color,size=2))+
          geom_text(nudge_x =.25 ,nudge_y=.2,size=input$textsize)
      })
      
      output$click_info <- renderPrint({
        nearPoints(df_subset(), input$plot1_click, addDist = TRUE)
      })
      
      output$readings_table<-DT::renderDataTable({
        readings_table = cbind.data.frame(names(class_readings),round(as.vector(class_readings),3)*10)
        names(readings_table) = c("Readings","Relevence to Students (out of ten)")
        readings_table
      })
      output$readings_table_details<-DT::renderDataTable({
        round(as.data.frame(student_readings),3)*10
      })
    }

####Run App####
    shinyApp(ui = ui, server = server)