diff --git a/DESCRIPTION b/DESCRIPTION index 96ab584..b54aa72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,11 +2,11 @@ Package: ThreadNet Type: Package Title: ThreadNet Version: 0.03 -Author: Brian T. Pentland, Michigan State University -Maintainer: Brian T. Pentland -Description: ThreadNet is a tool for exploratory sequential data analysis. It converts threads into networks and provides some simple visualizations. +Author: Brian T. Pentland, Michigan State University +Maintainer: Brian T. Pentland +Description: ThreadNet is a Shiny App for exploratory analysis and visualization of sequential data. It converts threads into networks and provides some simple visualizations. License: GPL (>= 2) Encoding: UTF-8 LazyData: true RoxygenNote: 6.0.1 -Imports: shiny, plotly, tidyverse, ngram, stringr,stringdist,networkD3, colorspace, igraph, RColorBrewer, xesreadR, DT, lubridate, knitr +Imports: shiny, shinyjs, plotly, tidyverse, ngram, stringr,stringdist, networkD3, visNetwork, colorspace, igraph, RColorBrewer, xesreadR, DT, lubridate, knitr diff --git a/Inst/ThreadNet/global.R b/Inst/ThreadNet/global.R index 01f157a..221d5fd 100644 --- a/Inst/ThreadNet/global.R +++ b/Inst/ThreadNet/global.R @@ -8,23 +8,30 @@ # 15 October Point of view code has solidified somewhat # 20 October Starting on NetworkD3 +# June 6 finishing up Package -library(shiny) -library(plotly) -library(tidyverse) -library(ngram) -library(stringr) -library(stringdist) -library(ggplot2) -library(networkD3) -library(xesreadR) -library(colorspace) -library(igraph) -library(DT) -library(RColorBrewer) -library(lubridate) -library(knitr) +# add the dependent packages +suppressPackageStartupMessages({ + library(shiny) + library(shinyjs) + library(plotly) + library(ggplot2) + library(tidyverse) + library(ngram) + library(stringr) + library(stringdist) + library(igraph) + library(networkD3) + library(visNetwork) + library(xesreadR) + library(colorspace) + library(DT) + library(RColorBrewer) + library(lubridate) + library(knitr) + library(ThreadNet) +}) # visualization types for UI dropdowns visualizations <- c( diff --git a/Inst/ThreadNet/server.R b/Inst/ThreadNet/server.R index 5df2925..2951e70 100644 --- a/Inst/ThreadNet/server.R +++ b/Inst/ThreadNet/server.R @@ -320,6 +320,26 @@ server <- shinyServer(function(input, output, session) { showNotification(paste(input$ManageEventMapInputID, " exported as .csv file"), type='message', duration=10) }) + CurrentNetwork <- reactiveValues() + observe({ + if(!is.null(viz_net())) + isolate( + CurrentNetwork <<- viz_net() + ) + }) + + output$downloadNetwork <- downloadHandler( + filename = paste("CurrentNetwork_POV.Rdata"), + # content = function(file) { save( eval(assign('CurrentNetwork',viz_net())) , file = file) } + content = function(file) { save( CurrentNetwork , file = file) } + ) + + # This is on visualize tab, but logically it fits better here + observeEvent(input$save_edge_list_button,{ + export_network(input$VisualizeEventMapInputID, viz_net() ) + showNotification("Exported to CurrentNetwork_POV.Rdata", type='message', duration=10) + }) + # Another opportunity to make subsets... observeEvent(input$SelectSubsetButton, if (check_POV_name(input$SelectSubsetMapName)){ diff --git a/Inst/ThreadNet/server/readData.R b/Inst/ThreadNet/server/readData.R index 245f283..af6bf3c 100644 --- a/Inst/ThreadNet/server/readData.R +++ b/Inst/ThreadNet/server/readData.R @@ -4,7 +4,7 @@ ############################## # limit what files to accept on input -fileTypes <- c("text/csv","text/comma-separated-values,text/plain",".csv") +fileTypes <- c("text/csv","text/comma-separated-values,text/plain",".csv",".xes") ########################## # Tab Output Definitions # @@ -14,7 +14,7 @@ fileTypes <- c("text/csv","text/comma-separated-values,text/plain",".csv") output$fileSelector <- renderUI({ tags$div( align = "center", - fileInput("inputFile","Please select a .csv file",accept=fileTypes) + fileInput("inputFile","Please select a .csv or .xes file",accept=fileTypes) ) }) @@ -46,10 +46,28 @@ output$dataFilter <- DT::renderDataTable( # return dataframe of occurences parseInputData <- function(inputFile){ - withProgress(message = "Cleaning Data", value = 0,{ + withProgress(message = "Reading and cleaning Data", value = 0,{ - # read in the table of occurrences - fileRows <- read.csv(inputFile$datapath) + # Check if this is an xes file + fileType= tools::file_ext(inputFile$datapath) + if (fileType=='xes') + { + # read in the table of occurrences + fileRows=as.data.frame(read_xes(inputFile$datapath)) + + if (any(match(colnames(fileRows),"timestamp"))) { + + # rename column as tStamp + colnames(fileRows)[colnames(fileRows)=="timestamp"] <- "tStamp" + + # move tStamp to the first column + fileRows=fileRows[c('tStamp', setdiff(names(fileRows), 'tStamp'))] + } + else {return(NULL)} + } + else + { # read in the table of occurrences + fileRows <- read.csv(inputFile$datapath) } incProgress(1/3) diff --git a/Inst/ThreadNet/server/visualize.R b/Inst/ThreadNet/server/visualize.R index e11c0d7..0a012ed 100644 --- a/Inst/ThreadNet/server/visualize.R +++ b/Inst/ThreadNet/server/visualize.R @@ -3,6 +3,7 @@ #### Main Tab Output Functions #### # Controls for the whole set of tabs + output$Visualize_Tab_Controls_1 <- renderUI({ selectizeInput( "VisualizeEventMapInputID", @@ -81,6 +82,7 @@ output$WholeSequenceThreadMap_RelativeTime <- renderPlotly({ threadMap(threadedE # use this to select how to color the nodes in force layout output$Circle_Network_Tab_Controls <- renderUI({ tags$div( + downloadButton('downloadNetwork', 'Export this Network', class="dlButton"), sliderInput("circleEdgeTheshold","Display edges above", 0,1,0,step = 0.01,ticks = FALSE ), radioButtons( "Label_or_Zoom_1", @@ -88,24 +90,55 @@ output$Circle_Network_Tab_Controls <- renderUI({ choices = c('Labels','Zooming'), selected = 'Labels', inline = TRUE) + ) }) +output$Circle_Network_Path_Estimate <- renderText({ + paste0('Estimated paths = ', + round(estimate_network_complexity(viz_net()) ),1) +}) + +output$Network_Nodes_Edges <- renderText({ + paste0( print_network_nodes_edges(viz_net()) ) +}) -output$circleVisNetwork <- renderVisNetwork({ +# Create the network to be exported and also displayed +viz_net <<- reactive({ req(input$circleEdgeTheshold) - # first convert the threads to the network + # first convert the threads to the network if (input$Label_or_Zoom_1 == 'Labels') { n <- threads_to_network_original(threadedEventsViz(), "threadNum", 'label') } else { n <- threads_to_network_original(threadedEventsViz(), "threadNum", get_Zoom_VIZ()) } # filter out the edges if desired - n <- filter_network_edges(n,input$circleEdgeTheshold) - circleVisNetwork(n, 'directed', TRUE) + n <- filter_network_edges(n,input$circleEdgeTheshold) + n }) + +output$circleVisNetwork <- renderVisNetwork({ + req(input$circleEdgeTheshold) + + circleVisNetwork(viz_net(), 'directed', TRUE) +}) + +# output$circleVisNetwork <- renderVisNetwork({ +# req(input$circleEdgeTheshold) +# +# # first convert the threads to the network +# if (input$Label_or_Zoom_1 == 'Labels') +# { n <- threads_to_network_original(threadedEventsViz(), "threadNum", 'label') } +# else +# { n <- threads_to_network_original(threadedEventsViz(), "threadNum", get_Zoom_VIZ()) } +# +# # filter out the edges if desired +# n <- filter_network_edges(n,input$circleEdgeTheshold) +# circleVisNetwork(n, 'directed', TRUE) +# }) + #### Other Networks sub-tab #### # use this to select how to color the nodes in force layout diff --git a/Inst/ThreadNet/ui.R b/Inst/ThreadNet/ui.R index e70f0eb..63c4d67 100644 --- a/Inst/ThreadNet/ui.R +++ b/Inst/ThreadNet/ui.R @@ -10,10 +10,10 @@ # May 2, 2018 Separation of tab definitions into files # pdf(NULL) # prevent plotly errors -library(shiny) -library(shinyjs) -library(networkD3) -library(visNetwork) +# library(shiny) +# library(shinyjs) +# library(networkD3) +# library(visNetwork) ui <- fluidPage( diff --git a/Inst/ThreadNet/ui/acknowledgements.R b/Inst/ThreadNet/ui/acknowledgements.R index 42411c9..72f151e 100644 --- a/Inst/ThreadNet/ui/acknowledgements.R +++ b/Inst/ThreadNet/ui/acknowledgements.R @@ -1,14 +1,27 @@ tabPanel( "Acknowledgements", - tags$h4("Support"), - tags$a(href="https://www.nsf.gov/awardsearch/showAward?AWD_ID=1734237","NSF SES-1734237"), + tags$h4("Author"), + tags$p("Brian Pentland, Michigan State University"), + + tags$h4("Contact"), + tags$a(href="mailto:ThreadNetHelp@gmail.com","ThreadNetHelp@gmail.com"), + + tags$h4("Financial Support"), + tags$a(href="https://www.nsf.gov/awardsearch/showAward?AWD_ID=1734237","NSF SES-1734237",target="_blank"), tags$p("Antecedents of Complexity in Healthcare Routines"), + tags$h4("Code Gurus"), - tags$p("Yu Lucy Han, Ezra Brooks, Patrick Bills, Danielle Barnes, Morgan Patterson, Douglas Krum"), + tags$p("Danielle Barnes, Patrick Bills, Ezra Brooks, Yu Lucy Han, Morgan Patterson, Douglas Krum, Mahmood Shafeie Zargar"), + tags$h4("Collaborators"), tags$p("Katharina Dittrich, Martha Feldman, Ken Frank, Thorvald Haerem, Inkyu Kim, Waldemar Kremser, Christian Mahringer, Alice Pentland, Jan Recker, Sudhanshu Srivastava, Julie Ryan Wolf, George Wyner "), - tags$h4("Related Publications"), - tags$a(href="http://routines.broad.msu.edu/resources/","http://routines.broad.msu.edu/resources/" ), + + tags$h4("Documentation and Related Publications"), + tags$a(href="http://routines.broad.msu.edu/resources/","http://routines.broad.msu.edu/resources/",target="_blank" ), + tags$h4("ThreadNet 2 (MatLab version)"), - tags$a(href="http://routines.broad.msu.edu/ThreadNet/","http://routines.broad.msu.edu/ThreadNet/" ) + tags$a(href="http://routines.broad.msu.edu/ThreadNet/","http://routines.broad.msu.edu/ThreadNet/",target="_blank" ), + + tags$h4("Official Song"), + tags$a(href="https://www.youtube.com/watch?v=tSnoHWBYA8U", "Doctor Decade: Tell Us a Story", target="_blank" ) ) diff --git a/Inst/ThreadNet/ui/visualize.R b/Inst/ThreadNet/ui/visualize.R index a2adf11..ad0c4e3 100644 --- a/Inst/ThreadNet/ui/visualize.R +++ b/Inst/ThreadNet/ui/visualize.R @@ -40,6 +40,8 @@ tabPanel(value = "visualize", tabPanel( "Event network (circle)", uiOutput("Circle_Network_Tab_Controls"), + textOutput("Circle_Network_Path_Estimate"), + textOutput("Network_Nodes_Edges"), visNetworkOutput("circleVisNetwork", width = "100%", height = "1200px") ), tabPanel( diff --git a/NAMESPACE b/NAMESPACE index 7be0832..5ac8de6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,7 @@ # Generated by roxygen2: do not edit by hand -export(ACHR_batch_V1) export(CF_multi_pie) +export(CF_multi_pie_event) export(Comparison_Plots) export(OccToEvents3) export(OccToEvents_By_Chunk) @@ -17,14 +17,17 @@ export(compute_entropy) export(convert_TN_to_TramineR) export(count_ngrams) export(delete_POV) +export(diff_handoffs) +export(diff_tStamp) +export(dual_window_correlation) export(dualmovingWindowCorrelation) export(estimate_network_complexity) export(estimate_task_complexity_index) -export(eventNetwork) export(export_POV) export(export_POV_csv) export(filter_network_edges) export(forceNetworkD3) +export(frequent_ngrams) export(get_CF_levels) export(get_POV) export(get_POV_COMPARISON_CF) @@ -33,15 +36,29 @@ export(get_POV_THREAD_CF) export(get_POV_name_list) export(get_moving_window) export(get_threadList) +export(make_example_DF) +export(make_nice_event_DT) +export(make_subsets) export(movingWindowCorrelation) +export(newColName) export(ng_bar_chart) export(normalNetwork) export(numThreads) +export(plot_entropy) export(role_map) export(routineness_metric) export(store_POV) +export(support_level) export(threadMap) export(threadSizeTable) export(threadTrajectory) +export(thread_text_vector) export(threads_to_network_original) +export(threshold_slider_max) +export(threshold_slider_min) +export(threshold_slider_selected) +export(window_correlation) +export(zoomColumn) export(zoom_upper_limit) +importFrom(grDevices,colorRampPalette) +importFrom(graphics,plot) diff --git a/R/Event_Mappings.R b/R/Event_Mappings.R index 8f90e74..1a5b0f8 100644 --- a/R/Event_Mappings.R +++ b/R/Event_Mappings.R @@ -1,15 +1,13 @@ ##################################################### # Global_POV is a global variable -#' Checks the name attempting to be create against the list of +#' @title check_POV_name +#' @description Checks the name attempting to be create against the list of #' map names in memory and forces the creation of a new name. -#' -#' @family Event_mappings -#' +#' @name check_POV_name #' @param mapname name of map attempting to be created #' @return TRUE or FALSE -#' -#' @export check_POV_name +#' @export check_POV_name <- function(mapname){ if (mapname %in% get_POV_name_list()){ @@ -20,9 +18,10 @@ check_POV_name <- function(mapname){ return(existingMap) } -#' @family Event_mappings +#' @title get_POV_name_list +#' @description Get list of POV names for all of the dropdown boxes on the UI +#' @name get_POV_name_list #' @return List of POV names -#' #' @export get_POV_name_list get_POV_name_list <- function(){ @@ -32,18 +31,17 @@ get_POV_name_list <- function(){ return(n) } -#' @family Event_mappings +#' @title store_POV +#' @description Stores the POV and context factors +#' @name store_POV #' @param EventMapName name of map attempting to be created #' @param e data frame with POV to be stored #' @param thread_CF List of CFs to be stored #' @param event_CF List of CFs to be stored -#' #' @return None, updates global variables -#' -#' @export store_POV +#' @export store_POV <- function(EventMapName, e, thread_CF, event_CF){ - # print(paste('in store_POV, EventMapName=',EventMapName)) # print(paste('in store_POV, thread_CF=',thread_CF)) # print(paste('in store_POV, event_CF=',event_CF)) @@ -61,11 +59,12 @@ store_POV <- function(EventMapName, e, thread_CF, event_CF){ } } -#' @family Event_mappings +#' @title get_POV +#' @description Gets the data frame for the POV +#' @name get_POV #' @param mapname name of POV map #' @return data frame with POV -#' -#' @export get_POV +#' @export get_POV <- function(mapname){ idx <- which(mapname==get_POV_name_list() ) @@ -78,11 +77,12 @@ get_POV <- function(mapname){ } } -#' @family Event_mappings +#' @title get_POV_THREAD_CF +#' @description Gets the CFs that define threads in this POV +#' @name get_POV_THREAD_CF #' @param mapname name of POV map #' @return thread CFs for that POV -#' -#' @export get_POV_THREAD_CF +#' @export get_POV_THREAD_CF <- function(mapname){ idx <- which(mapname==get_POV_name_list() ) @@ -95,11 +95,12 @@ get_POV_THREAD_CF <- function(mapname){ } } -#' @family Event_mappings +#' @title get_POV_EVENT_CF +#' @description Gets the CFs that define events in this POV +#' @name get_POV_EVENT_CF #' @param mapname name of POV map #' @return event CFs for that POV -#' -#' @export get_POV_EVENT_CF +#' @export get_POV_EVENT_CF <- function(mapname){ idx <- which(mapname==get_POV_name_list() ) @@ -113,12 +114,13 @@ get_POV_EVENT_CF <- function(mapname){ } # this one compute the list based on the other two -#' @family Event_mappings +#' @title get_POV_COMPARISON_CF +#' @description Gets the CFs that can be used for comparisons in this POV +#' @name get_POV_COMPARISON_CF #' @param mapname name of POV map #' @param CF_list list of other column names #' @return comparison CFs for that POV -#' -#' @export get_POV_COMPARISON_CF +#' @export get_POV_COMPARISON_CF <- function(mapname, CF_list){ idx <- which(mapname==get_POV_name_list() ) @@ -131,11 +133,12 @@ get_POV_COMPARISON_CF <- function(mapname, CF_list){ } } -#' @family Event_mappings +#' @title delete_POV +#' @description Deletes all the data assocated with this POV +#' @name delete_POV #' @param mapname name of POV map #' @return None, updates global variables -#' -#' @export delete_POV +#' @export delete_POV <- function(mapname){ # get the index for the mapname @@ -151,11 +154,12 @@ delete_POV <- function(mapname){ } -#' @family Event_mappings +#' @title export_POV +#' @description Exports the data assocated with this POV as Rdata +#' @name export_POV #' @param mapname name of POV map #' @return (writes Rdata file) -#' -#' @export export_POV +#' @export export_POV <- function(mapname){ # get the nice variable names @@ -172,11 +176,12 @@ export_POV <- function(mapname){ } -#' @family Event_mappings +#' @title export_POV_csv +#' @description Exports the data assocated with this POV as CSV +#' @name export_POV_csv #' @param mapname name of POV map #' @return (writes CSV file) -#' -#' @export export_POV_csv +#' @export export_POV_csv <- function(mapname){ output = as.data.frame( get_POV(mapname) ) @@ -186,3 +191,27 @@ export_POV_csv <- function(mapname){ write.csv(output, file=file.choose(), quote = TRUE, row.names = FALSE) } + +#' @title export_network +#' @description Exports the edge list for the graph that is displayed +#' @name export_network +#' @param mapname name of POV map to use as name of the file +#' @param n +#' @return (saves network into file) +#' @export +export_network <- function(mapname, CurrentNetwork ){ + + # get the nice variable names + nicename = paste0("CurrentNetwork_from_",mapname) + nicename = "CurrentNetwork_POV" + + # get the edge list for the network + # edge_list <- n.edgeDF + + print('saving network: CurrentNetwork_POV.Rdata') + # save the data + save( CurrentNetwork , file = paste0(nicename,".Rdata")) + print(' network saved') + +} + diff --git a/R/ThreadNet.R b/R/ThreadNet.R index d05e54f..3fe3546 100644 --- a/R/ThreadNet.R +++ b/R/ThreadNet.R @@ -1,4 +1,5 @@ -#' @family ThreadNet_Core -#' +#' @title ThreadNet_Core +#' @description This function launches the Shiny App called ThreadNet +#' @name ThreadNet_Core #' @export ThreadNet ThreadNet <- function() { shiny::runApp(system.file('ThreadNet', package='ThreadNet')) } diff --git a/R/ThreadNet_Batch.R b/R/ThreadNet_Batch.R deleted file mode 100644 index 863cb5c..0000000 --- a/R/ThreadNet_Batch.R +++ /dev/null @@ -1,138 +0,0 @@ -########################################################################################################## -# THREADNET: Batch processing for larger data sets - -# (c) 2017 Michigan State University. This software may be used according to the terms provided in the -# GNU General Public License (GPL-3.0) https://opensource.org/licenses/GPL-3.0? -# Absolutely no warranty! -########################################################################################################## - - -# Take a large number of patient visits and create a data frame where each row contains -# a set of variables that describes a set of one or more visits. - -# This is just a test -#' Batch processing for larger numbers of threads -#' -#' ACHR stands for Antecedents of Complexity in Healthcare Routines. This is function is set up to compute process parameters on thousands of patient visits. -#' -#' @param inFileName name of file (CSV format) containing the raw thread data. -#' -#' @return data frame ready for further analysis -#' -#' @export -ACHR_batch_V1 <- function(inFileName){ - - -# first read in the csv -rawOcc = fread(inFileName) - - -# HARD-CODED COLUMNS!! Should probably pass in as a parameters -TN = "threadID" -CFs = c("role","workstn","action") -DV= newColName(CFs) - - -# clean up the ocurrences, add week and month columns -occ = cleanOcc(rawOcc,CFs) - -# make threads - this will also make a new column that combines the CFs -threadedOcc <- ThreadOccByPOV(occ,TN,CFs) - -# may want to make threads with and without different CFs to define events, as well - -# pick subsets -- typically just one thread at a time, but could be more -# write a function for this -criteria <-"threadID" -bucket_list <- make_buckets(threadedOcc, criteria) - -# get the size (number of buckets) -N = length(bucket_list) - -# pre-allocate the data.table. Tables are supposed to be faster. -ACHR = data.table(bucket=integer(N), - NEvents = integer(N), - NDiagnoses = integer(N), - NProcedures = integer(N), - visitStartTime = numeric(N), # might need special data type for time - VisitDuration=numeric(N), # might need special data type for time - NetComplexity=double(N), - CompressRatio = double(N), - Clinic = character(N), - PrimaryDiagnosis = character(N), - PayerType = character(N), - Provider = character(N) # might not be available - ) - -# Now add columns for the IVs. There will be three for each IV - -# Add the IV columns -for (cf in CFs){ - - ACHR[, paste0(cf,"_count"):= double(N)] - ACHR[, paste0(cf,"_compression"):= double(N)] - ACHR[, paste0(cf,"_entropy"):= double(N)] - -} - -# loop through the buckets. Result will be data frame with one row per bucket -for (i in 1:N){ - - b = i # as.integer(bucket_list[i]) - - # select the threads that go in this bucket - df = threadedOcc[threadedOcc[[TN]] ==bucket_list[i],] - - # bucket number - ACHR[b,bucket := b] - - # length of the thread (number of rows) - ACHR[b,NEvents := nrow(df)] - - # only do the computations if there are more than two occurrences - if (nrow(df) > 2) { - - # compressibility of DV - ACHR[b,CompressRatio := compression_index(df,DV)] - - # NetComplexity of DV - # First get the network - n = threads_to_network(df,TN, DV) - ACHR[b,NetComplexity := estimate_network_complexity( n )] - - # compute stuff on each context factor - for (cf in CFs){ - - # Count the unique elements in each cf - ACHR[b, paste0(cf,"_count") := length(unique(df[[cf]])) ] - - # get the compression - ACHR[b, paste0(cf,"_compression") := compression_index(df,cf) ] - - # get the entropy - ACHR[b, paste0(cf,"_entropy") := compute_entropy(table(df[[cf]])[table(df[[cf]])>0]) ] - - } -} # kf nrows > 2 - -} # loop thru buckets - -# return the table -return(ACHR) -} - - -# Each bucket is a list of thread numbers that can be used to subset the list of occurrences -make_buckets <- function(o, criteria){ - - return( levels(o[[criteria]]) ) - -} - - -make_box_plots <- function(){ -ggboxplot(ACHR_test[NEvents>100 & Clinic=='DRH'], x = "VisitMonth", y = "NetComplexity", - color = "VisitDay", - ylab = "Complexity", xlab = "Month (DRH)") -} - diff --git a/R/ThreadNet_Core.R b/R/ThreadNet_Core.R index ddb3304..6fc381e 100644 --- a/R/ThreadNet_Core.R +++ b/R/ThreadNet_Core.R @@ -6,21 +6,18 @@ # Absolutely no warranty! ########################################################################################################## + # These are the basic functions that convert threads to networks, etc. -#' Converts threads to network -#' -#' Converts a sequentially ordered streams of ;events (threads) and creates a unimodal, unidimensional network. +#' @title Converts threads to network +#' @description Converts a sequentially ordered streams of ;events (threads) and creates a unimodal, unidimensional network. #' Sequentially adjacent pairs of events become edges in the resulting network. -#' @family ThreadNet_Core -#' +#' @name threads_to_network_original #' @param et dataframe containing threads #' @param TN name of column in dataframe that contains a unique thread number for each thread #' @param CF name of the column in dataframe that contains the events that will form the nodes of the network #' @param grp grouping variable for coloring the nodes -#' #' @return a list containing two dataframes, one for the nodes (nodeDF) and one for the edges (edgeDF) -#' #' @export threads_to_network_original # here is a version without all the position stuff, which should be separated out, if possible. # Added in the "group" for the network graphics - default group is 'threadNum' because it will always be there @@ -78,13 +75,16 @@ threads_to_network_original <- function(et,TN,CF,grp='threadNum'){ to[i] = match(to_labels[i], nodes$label) } + # Stopped filtering out selfies July 20, 2019 for Kerstin Sailer bug report edges = data.frame( from, to, label = ngdf$freq, - Value =ngdf$freq) %>% filter(!from==to) + Value =ngdf$freq) # %>% filter(!from==to) - # print(paste("Edges:",edges)) + # print(paste("T2N nodes:",nodes)) + # print(paste("ngdf = :",ngdf)) + # print(paste("edges= :",edges)) return(list(nodeDF = nodes, edgeDF = edges)) } @@ -150,29 +150,28 @@ threads_to_network_original <- function(et,TN,CF,grp='threadNum'){ # return(list(nodeDF = nodes, edgeDF = edges)) # } -# Counting ngrams is essential to several ThreadNet functions -#' Counts ngrams in a set of threads -#' -#' This function counts n-grams within threads where the length of the thread is greater than n. -#' @family ThreadNet_Core -#' + +#' @title Counts ngrams in a set of threads +#' @description Counting ngrams is essential to several ThreadNet functions. This function counts n-grams within threads where the length of the thread is greater than n. +#' @name count_ngrams #' @param o dataframe containing threads #' @param TN name of column in dataframe that contains a unique thread number for each thread #' @param CF name of the column in dataframe that contains the events that will form the nodes of the network #' @param n length of ngrams to count -#' #' @return a dataframe with ngram, frequency and proportion in descending order -#' #' @export count_ngrams <- function(o,TN,CF,n){ # Need a vector of strings, one for each thread, delimited by spaces # the function long_enough filters out the threads that are shorter than n # use space for the delimiter here - text_vector = long_enough( thread_text_vector(o,TN,CF,' '), n, ' ') + text_vector = long_enough( thread_text_vector(o,TN,CF,' '), n, ' ') + # text_vector = thread_text_vector(o,TN,CF,' ') + + + # print(paste0("thread=", o[1,TN] ,", text_vector")) + # print(text_vector) - # print("text_vector") - # print(text_vector) ng = get.phrasetable(ngram(text_vector,n)) @@ -184,20 +183,15 @@ count_ngrams <- function(o,TN,CF,n){ ################################################################# -# -#' Make new threads from a new POV -#' -#' Take the raw occurrences from the input file and sort them by time stamp within +#' @title Converts occurrences into events, make threads from a new POV +#' @description Take the raw occurrences from the input file and sort them by time stamp within #' a set of contextual factors that remain constant for each thread. -#' @family ThreadNet_Core -#' +#' @name ThreadOccByPOV #' @param o is the dataframe of cleaned ocurrences #' @param THREAD_CF is a list of 1 or more context factors that define the threads (and stay constant during each thread) #' @param EVENT_CF is a list of 1 or more context factors that define events (and change during threads) -#' #' @return dataframe containing the same occurrences sorted from a different point of view -#' -#'@export +#' @export ThreadOccByPOV <- function(o,THREAD_CF,EVENT_CF){ timescale = get_timeScale() @@ -320,9 +314,10 @@ ThreadOccByPOV <- function(o,THREAD_CF,EVENT_CF){ incProgress(4/n) - # this will store the event map in the GlobalEventMappings and return events with network cluster added for zooming... - # e=clusterEvents(occ, 'OneToOne', 'Network Proximity', THREAD_CF, EVENT_CF,'threads') - e=clusterEvents(occ, '', 'Network Proximity', THREAD_CF, EVENT_CF,'threads') + # return events with network cluster added for zooming... + # print('assign label to ZM_1') + # e$ZM_1 = e$label + e=clusterEvents(occ, '', 'Network Proximity', THREAD_CF, EVENT_CF,'threads') # sort them by threadnum and seqnum e = e[order(e[['threadNum']],e[['seqNum']]),] @@ -344,12 +339,10 @@ print('done converting occurrences...') ############################################################################################################## -#' Maps occurrences into events -#' -#' Thus function provides a place to map occurrences into events, so is is not necessary to interpret individual -#' occurrences in isolation. There are many ways to accomplish this mapping. -#' @family ThreadNet_Core -#' +#' @title Maps occurrences into events by chunks. +#' @description Thus function provides a way to map occurrences into events, so is is not necessary to interpret individual +#' occurrences in isolation. Provides three ways to accomplish this mapping. +#' @name OccToEvents_By_Chunk #' @param o a dataframe of occurrences #' @param m = method parameter = one of c('Variable chunks','Uniform chunks') #' @param EventMapName = used to store this mapping for visualization and comparison @@ -360,9 +353,7 @@ print('done converting occurrences...') #' @param thread_CF - context factors used to delineate threads #' @param event_CF - context factors used to define events #' @param compare_CF = context factors used for comparison -- need to be copied over here when the thread is created. -#' #' @return event data frame, with occurrences aggregated into events. -#' #' @export OccToEvents_By_Chunk <- function(o, m, EventMapName, uniform_chunk_size, tThreshold, timescale='mins', chunk_CF, thread_CF, event_CF, compare_CF){ @@ -503,14 +494,12 @@ OccToEvents_By_Chunk <- function(o, m, EventMapName, uniform_chunk_size, tThresh } -# this one creates events based on frequent ngrams or regular expressions -#' @family ThreadNet_Core +################################################################################ +#' @title OccToEvents3 +#' @description Creates events based on frequent ngrams or regular expressions +#' @name OccToEvents3 #' @param o a dataframe of occurrences #' @param EventMapName = used to store this mapping for visualization and comparison -#' @param uniform_chunk_size = used to identify breakpoints -- from input slider -#' @param tThreshold = used to identify breakpoints -- from input slider -#' @param timescale hours, min or sec -#' @param chunk_CF - context factors used to delineate chunks #' @param THREAD_CF - context factors used to delineate threads #' @param EVENT_CF - context factors used to define events #' @param compare_CF = context factors used for comparison -- need to be copied over here when the thread is created. @@ -518,9 +507,7 @@ OccToEvents_By_Chunk <- function(o, m, EventMapName, uniform_chunk_size, tThresh #' @param CF context factor #' @param rx list of patterns #' @param KeepIrregularEvents = keep or drop events that don't fit patterns -#' #' @return event data frame, with occurrences aggregated into events. -#' #' @export OccToEvents3 <- function(o, EventMapName, THREAD_CF, EVENT_CF, compare_CF,TN, CF, rx, KeepIrregularEvents){ @@ -670,26 +657,20 @@ OccToEvents3 <- function(o, EventMapName, THREAD_CF, EVENT_CF, compare_CF,TN, CF } -# new function for new tab -# e is the event list -# EventMapName is an input selected from the list of available mappings -# cluster_method is either "Sequential similarity" or "Contextual Similarity" or "Network Structure" -#' @family ThreadNet_Core +###################################################################################### +#' @title Clusters occurrences or eents +#' @description cluster_method is either "Sequential similarity" or "Contextual Similarity" or "Network Structure" +#' @name clusterEvents #' @param e a dataframe of events or occurrences #' @param NewMapName = used to store this mapping for visualization and comparison #' @param cluster_method = method for clustering #' @param thread_CF - context factors used to delineate threads #' @param event_CF - context factors used to define events -#' @param compare_CF = context factors used for comparison -- need to be copied over here when the thread is created. -#' @param TN ThreadNum #' @param what_to_return POV or Cluster solution - #' @return event data frame with occurrences aggregated into events or cluster solution -#' #' @export clusterEvents <- function(e, NewMapName, cluster_method, thread_CF, event_CF,what_to_return='POV'){ - # make sure to cluster on the correct column (one that exists...) if (cluster_method=="Sequential similarity") { dd = dist_matrix_seq(e) } @@ -708,6 +689,10 @@ clusterEvents <- function(e, NewMapName, cluster_method, thread_CF, event_CF,wha # print(paste('in cluster_events, then, focalCol=',focalCol)) dd = dist_matrix_network(e,focalCol) } + # if there are NA or NaN values, replace then with numbers 10x as big as the largest + dd[is.na(dd)] = max(dd[!is.na(dd)])*10 + dd[is.infinite(dd)] = max(dd[!is.infinite(dd)])*10 + ### cluster the elements clust = hclust( dd, method="ward.D2" ) @@ -752,6 +737,42 @@ clusterEvents <- function(e, NewMapName, cluster_method, thread_CF, event_CF,wha {return(newmap)} } + +# this is used for the regex pages to show the threads. +# similar code is used in count_ngrams and to make networks, but with different delimiters +# and with a minimum sequence length (ngram size), but this can be filtered after this function3 +#' @title thread_text_vector +#' @description Create a vector of threads +#' @name thread_text_vector +#' @param o a dataframe of events or occurrences +#' @param TN = threadNum +#' @param CF = CF or columm to include +#' @param delimiter usually comma or blank +#' @return vector of threads as delimited character strings +#' @export + thread_text_vector <- function(o, TN, CF, delimiter){ + + # Initialize text vector + tv = vector(mode="character") + + # Loop through the unique thread numbers + j=0 + for (i in unique(o[[TN]])){ + txt =o[o[[TN]]==i,CF] + + j=j+1 + tv[j] = str_replace_all(concatenate(o[o[[TN]]==i,CF] ),' ',delimiter) + } + return(tv) + +} + +############################################################################# +############################################################################# +## LOCAL FUNCTIONS from here on down +############################################################################# +############################################################################# + # this function pulls computes their similarity of chunks based on sequence # these functions are only used locally dist_matrix_seq <- function(e){ @@ -783,7 +804,10 @@ dist_matrix_network <- function(e,CF){ # first get the nodes and edges n=threads_to_network_original(e,'threadNum',CF) - # print(paste('in dist_matrix_network, n=', n)) + # print(paste('in dist_matrix_network, n=', n)) + # print(n$nodeDF[['label']]) + # print(n$edgeDF) + # now get the shortest paths between all nodes in the graph d=distances(graph_from_data_frame(n$edgeDF), @@ -942,25 +966,7 @@ one_vcf_matrix <- function(e, vcf){ } -# this is used for the regex pages to show the threads. -# similar code is used in count_ngrams and to make networks, but with different delimiters -# and with a minimum sequence length (ngram size), but this can be filtered after this function3 -thread_text_vector <- function(o, TN, CF, delimiter){ - - # Initialize text vector - tv = vector(mode="character") - - # Loop through the unique thread numbers - j=0 - for (i in unique(o[[TN]])){ - txt =o[o[[TN]]==i,CF] - - j=j+1 - tv[j] = str_replace_all(concatenate(o[o[[TN]]==i,CF] ),' ',delimiter) - } - return(tv) -} # use this to replace patterns for regex and ngrams # tv is the text vector for the set of threads @@ -993,6 +999,17 @@ replace_regex_list <- function(tv, rx ){ # combined set of frequent ngrams # add parameter to make maximal a choice +#' @title frequent_ngrams +#' @description combined set of frequent ngrams within a range o lengths +#' @name frequent_ngrams +#' @param e event data +#' @param TN threadNum +#' @param CF context factor (column) to look at +#' @param minN miniumum ngram length +#' @param maxN maximum ngram length +#' @param onlyMaximal Filters out ngrams that are included in longer ngrams. Default is true. +#' @return dataframe of ngrams +#' @export frequent_ngrams <- function(e, TN, CF, minN, maxN, onlyMaximal=TRUE){ # initialize the output @@ -1034,6 +1051,13 @@ maximal_ngrams <- function(ng){ # tv = text vectors for the threads # ng = frequent ngrams data frame # returns ng data frame with support level added +#' @title support_level +#' @description Counts what fraction of the threads a particular ngram appears in +#' @name support_level +#' @param tv text vector of threads +#' @param ng ngram to be located in the threads +#' @return percentage of threads containing the ngram +#' @export support_level <- function(tv, ng) { # change the commas back to spaces @@ -1055,7 +1079,7 @@ support_level <- function(tv, ng) { return(ng) } -# compute the generativity = in-degree and out-degree + generativity_level<- function(tv, ng){ # for each ngram, look at the next longer size diff --git a/R/ThreadNet_Graphics.R b/R/ThreadNet_Graphics.R index 71c30ec..adfdbd1 100644 --- a/R/ThreadNet_Graphics.R +++ b/R/ThreadNet_Graphics.R @@ -8,23 +8,21 @@ # graphic functions used in Shiny App. # some plotly, but some from other packages -library(plotly) +# explicitly add packages/functions to the NAMESPACE +#' @importFrom grDevices colorRampPalette +#' @importFrom graphics plot + ###### Pie charts for context factors #### # It would be nice to display some other helpful information, perhaps (like the % of possible combinations that occur) -#' Creates pie charts for one or more contextual factors -#' -#' When selecting contextual factors that define threads, events and comparisons, this function provide visual feedback about the number of factors levels +#' @title Creates pie charts for one or more contextual factors +#' @description When selecting contextual factors that define threads, events and comparisons, this function provide visual feedback about the number of factors levels #' and also the number of levels when the factors are combined -#' -#' @family ThreadNet_Graphics -#' +#' @name CF_multi_pie #' @param oc data frame of occurrences #' @param CF list of contextual factors (columns) to include in the display -#' #' @return plotyly pie charts (one or more) -#' #' @export CF_multi_pie <- function(oc,CF){ @@ -32,10 +30,10 @@ CF_multi_pie <- function(oc,CF){ # print(paste('in CF_multiPie, CF=',CF)) # avoid unpleasant error messages - if (length(CF)==0) {return(plotly_empty())} + if (length(CF)==0) {return(plotly_empty(type='scatter',mode='markers'))} # make sure the necessary columns are present - if (!all(CF %in% colnames(oc))) {return(plotly_empty())} + if (!all(CF %in% colnames(oc))) {return(plotly_empty(type='scatter',mode='markers'))} # first add the combined column if there is more than one if (length(CF) >1){ @@ -59,7 +57,7 @@ CF_multi_pie <- function(oc,CF){ # Now loop for each CF, computing entropy and adding on the next "trace" to the plot - # start with blank plot object + # start with blank plot object. Assign type=scatter to suppress warnings. pies = plot_ly() max_combos = 1 for (i in 1:nPlots) { @@ -91,7 +89,7 @@ CF_multi_pie <- function(oc,CF){ } pies = pies %>% - layout(showlegend=FALSE, + plotly::layout(showlegend=FALSE, xaxis = list(showgrid = FALSE,zeroline = FALSE, showticklabels = FALSE), yaxis = list(showgrid = FALSE, zeroline = FALSE,showticklabels = FALSE) # , @@ -107,11 +105,6 @@ CF_multi_pie <- function(oc,CF){ # CF = list of context factor names used to define events # r is the row name for the event being examined # -# KNOWN ISSUES: -# * Need to pass in the factor levels as labels for the pie slices -# * Need to compute the values differently for a node in the dendrogram or in a zoomed graph -# * Probably need to pass in the vector of values -# # Call this for one CF at a time # o is the raw occurrences. This is where we get the labels. # e is the events. This is where we get the frequencies @@ -133,12 +126,22 @@ make_df_for_one_pie <- function(o,e,cf,r,zm){ # zoom level as an integer (so you can grab it from the slider) # r = row number or cluster number. Should be the number on the event # z = integer for zoom column +#' @title CF_multi_pie_event +#' @description Make multi-pie for click event from force network layout. Generate a small plot of context factor pie charts when you click on a node in the graph +#' @name CF_multi_pie_event +#' @param o data frame of raw occurrences (for the names) +#' @param e data frame with events +#' @param CF list of contextual factors (columns) to include in the display +#' @param r row number of cluster (the number of the event node) +#' @param zm integer for zoom column +#' @return plotyly pie charts (one or more) +#' @export CF_multi_pie_event <- function(o, e,CF,r, zm){ # avoid unpleasant error messages - if (length(CF)==0) {return(plotly_empty())} + if (length(CF)==0) {return(plotly_empty(type='scatter',mode='markers'))} # print(paste('in CF_multi_pie_event, r=',r)) - if (is.na(as.numeric(r))) {return(plotly_empty())} + if (is.na(as.numeric(r))) {return(plotly_empty(type='scatter',mode='markers'))} # get number of plots nPlots = length(CF) @@ -161,8 +164,10 @@ CF_multi_pie_event <- function(o, e,CF,r, zm){ n=length # Now loop for each CF, computing entropy and adding on the next "trace" to the plot - # start with blank plot object + # start with blank plot object. Assign type = scatter to suppress warnings. +# pies = plot_ly(type='scatter') pies = plot_ly() + max_combos = 1 for (i in 1:nPlots) { @@ -197,7 +202,7 @@ CF_multi_pie_event <- function(o, e,CF,r, zm){ } pies = pies %>% - layout(showlegend=FALSE, + plotly::layout(showlegend=FALSE, xaxis = list(showgrid = FALSE,zeroline = FALSE, showticklabels = FALSE), yaxis = list(showgrid = FALSE, zeroline = FALSE,showticklabels = FALSE) # , @@ -209,21 +214,16 @@ CF_multi_pie_event <- function(o, e,CF,r, zm){ ###################################################################### # ThreadMap shows the threads in a horizongal layout -#' Shows threads in a horizontal layout -#' -#' Creates a plotly chart of threads in either clock time or event time, depending on the timescale parameter. -#' -#' @family ThreadNet_Graphics -#' +#' @title threadMap shows threads in a horizontal layout +#' @description Creates a plotly chart of threads in either clock time or event time, depending on the timescale parameter. +#' @name threadMap #' @param or Dataframe of threads #' @param TN name of column with thread number #' @param timescale name of column that will be used to plot x-axis of events. It can be the can be the time stamp (for clock time) or the sequence number (for event time) #' @param CF name of contextual factor that will determine the colors #' @param shape shape code for the markers on the threadmap -#' #' @return plotly object #' @export -#' threadMap <- function(or, TN, timescale, CF, shape){ @@ -250,7 +250,7 @@ threadMap <- function(or, TN, timescale, CF, shape){ hoverinfo = "text+x+y", symbol= "line-ew", symbols=shape, showlegend=FALSE) %>% - layout( + plotly::layout( xaxis = list(title = xaxis), yaxis = list(title = knitr::combine_words(get_THREAD_CF(), sep = ", ")) ) @@ -259,18 +259,14 @@ threadMap <- function(or, TN, timescale, CF, shape){ ################################################ -#' Create an ngram bar chart -#' -#' Shows the n-grams within a set of threads (but not splitting across threads). This provides a visual indication of how repetitive the threads are. -#' -#' @family ThreadNet_Graphics -#' +#' @title Create an ngram bar chart +#' @description Shows the n-grams within a set of threads (but not splitting across threads). This provides a visual indication of how repetitive the threads are. +#' @name ng_bar_chart #' @param o a dataframe of occurrences or events #' @param TN the column that contains the threadNum #' @param CF the contextual factor within which to count the n-grams #' @param n the length of the ngram #' @param mincount the minimum count to display -#' #' @return plotly object #' @export ng_bar_chart <- function(o,TN, CF, n, mincount){ @@ -291,7 +287,7 @@ ng_bar_chart <- function(o,TN, CF, n, mincount){ ngBars = ngdf[ngdf$freq>=mincount,] ngp <- plot_ly( ngBars, x = ~ngrams, y = ~freq, type = "bar",showlegend=FALSE) %>% - layout(xaxis= list(showticklabels = FALSE, title=paste0(n,"-grams of ",CF, " that occur > ",mincount," times"))) + plotly::layout(xaxis= list(showticklabels = FALSE, title=paste0(n,"-grams of ",CF, " that occur > ",mincount," times"))) return(ngp) } @@ -303,92 +299,88 @@ ng_bar_chart_freq <- function(ngdf){ ngdf$ngrams = factor(ngdf$ngrams, levels =unique(ngdf$ngrams)[order(ngdf$freq, decreasing = FALSE)]) ngp <- plot_ly( ngdf, y = ~ngrams, x = ~freq, type = "bar",showlegend=FALSE) %>% - layout(xaxis= list(showticklabels = TRUE, title='Frequency')) %>% - layout(yaxis= list(showticklabels = FALSE, title='')) + plotly::layout(xaxis= list(showticklabels = TRUE, title='Frequency')) %>% + plotly::layout(yaxis= list(showticklabels = FALSE, title='')) return(ngp) } ############################################################################# -#' Circular network layout for event network (USES visnetwork) -#' -#' Should be replaced with a more expressive layout in plotly -#' -#' @family ThreadNet_Graphics -#' -#' @param et dataframe with the threads to be graphed -#' @param TN the column with the threadNumber -#' @param CF is the contetual factors (column) -#' @param timesplit time measure -#' -#' @return plotly object -#' @export - -eventNetwork <- function(et, TN, CF, timesplit){ - - n <- threads_to_network(et, TN, CF, timesplit) - - - title_phrase = paste("Estimated complexity index =",estimate_network_complexity(n)) - - edge_shapes <- list() - for(i in 1:length(n$edgeDF$from)) { - E <- n$edgeDF[i,] - - edge_shape = list( - type = "line", - line = list(color = "#030303", width = 0), - x0 = E[['from_x']], - x1 = E[['to_x']], - y0 = E[['from_y']], - y1 = E[['to_y']], - xref = "x", - yref = "y" - ) - - edge_shapes[[i]] <- edge_shape - } - - x <- list( - title = 'Average Time' - ) - - y <- list( - title = 'Frequency' - ) - color_pal = colorRampPalette(brewer.pal(11,'Spectral')) - size_pal = (n$nodeDF$y_pos-min(n$nodeDF$y_pos))/(max(n$nodeDF$y_pos)-min(n$nodeDF$y_pos))*15+10 - network <- plot_ly(x = ~n$nodeDF$x_pos, y = ~n$nodeDF$y_pos, - width = 0, - mode = "markers", - marker = list(size= size_pal, - color=color_pal(100)[as.numeric(cut(n$nodeDF$x_pos, breaks=100))] - - ), - text = n$nodeDF$label, key = n$nodeDF$label, hoverinfo = "text", source = 'A') - - p <- layout( - network, - title = title_phrase, - shapes = edge_shapes, - xaxis = x, - yaxis = y - ) - return(p) +# @title Circular network layout for event network (USES visnetwork) +# +# Should be replaced with a more expressive layout in plotly +# +# @name eventNetwork +# @param et dataframe with the threads to be graphed +# @param TN the column with the threadNumber +# @param CF is the contetual factors (column) +# @param timesplit time measure +# @return plotly object +# @export +# eventNetwork <- function(et, TN, CF, timesplit){ +# +# n <- threads_to_network(et, TN, CF, timesplit) +# +# +# title_phrase = paste("Estimated complexity index =",estimate_network_complexity(n)) +# +# edge_shapes <- list() +# for(i in 1:length(n$edgeDF$from)) { +# E <- n$edgeDF[i,] +# +# edge_shape = list( +# type = "line", +# line = list(color = "#030303", width = 0), +# x0 = E[['from_x']], +# x1 = E[['to_x']], +# y0 = E[['from_y']], +# y1 = E[['to_y']], +# xref = "x", +# yref = "y" +# ) +# +# edge_shapes[[i]] <- edge_shape +# } +# +# x <- list( +# title = 'Average Time' +# ) +# +# y <- list( +# title = 'Frequency' +# ) +# color_pal = colorRampPalette(brewer.pal(11,'Spectral')) +# size_pal = (n$nodeDF$y_pos-min(n$nodeDF$y_pos))/(max(n$nodeDF$y_pos)-min(n$nodeDF$y_pos))*15+10 +# network <- plot_ly(x = ~n$nodeDF$x_pos, y = ~n$nodeDF$y_pos, +# width = 0, +# mode = "markers", +# marker = list(size= size_pal, +# color=color_pal(100)[as.numeric(cut(n$nodeDF$x_pos, breaks=100))] +# +# ), +# text = n$nodeDF$label, key = n$nodeDF$label, hoverinfo = "text", source = 'A') +# +# p <- plotly::layout( +# network, +# title = title_phrase, +# shapes = edge_shapes, +# xaxis = x, +# yaxis = y +# ) +# return(p) +# +# } -} ################################################################ ## Here is the networkD3 version of the same thing. # it has a bunch of extra code because of the groups... # needs to be re-written to separate computation of the network from the layout... -#' NetworkD3 layout for event network -#' -#' @family ThreadNet_Graphics -#' +#' @title forceNetworkD3 is an Interactive layout for event network +#' @description This produces a force layout network using networkD3 +#' @name forceNetworkD3 #' @param n = list with data frames for nodes and edges -#' #' @return networkD3 object #' @export forceNetworkD3 <- function(n){ @@ -398,7 +390,6 @@ forceNetworkD3 <- function(n){ n$edgeDF['from'] = n$edgeDF['from']-1 n$edgeDF['to'] = n$edgeDF['to']-1 - return( forceNetwork(Links = n$edgeDF, Nodes = n$nodeDF, Source = "from", Target = "to", Value = "Value", NodeID = "label", Group = "Group", opacity = 1, zoom = T,arrows=TRUE, bounded = FALSE, @@ -407,13 +398,10 @@ forceNetworkD3 <- function(n){ ###################################################################################### -#' Comparison plots -#' -#' Produce a set set of comparison sub-plots in an array. Ideally, we should be able to use any of the plots. So far it is only bar charts. +#' @title Comparison plots +#' @description Produce a set set of comparison sub-plots in an array. Ideally, we should be able to use any of the plots. So far it is only bar charts. #' This is a prototype that could use rather extensive redesign... -#' -#' @family ThreadNet_Graphics -#' +#' @name Comparison_Plots #' @param e dataframe with threads to be plotted #' @param o dataframe with the original data #' @param CF contextul factors @@ -421,7 +409,6 @@ forceNetworkD3 <- function(n){ #' @param nTimePeriods how many time periods to divide the data? #' @param plot_type a type of plotly plot with a function written #' @param role_map_cfs context factors for the role map plot -#' #' @return plotly object, including subplots #' @export Comparison_Plots <- function(e, o, CF, CF_levels, nTimePeriods=1, plot_type,role_map_cfs){ @@ -468,7 +455,7 @@ Comparison_Plots <- function(e, o, CF, CF_levels, nTimePeriods=1, plot_type,rol else if (plot_type=='Threads (event time)') {plot_list[[plotName]] = threadMap(dfp, "threadNum", "seqNum", 1, 15 ) } else - {plot_list[[plotName]] = plotly_empty()} + {plot_list[[plotName]] = plotly_empty(type='scatter',mode='marker')} } }} @@ -494,12 +481,12 @@ Comparison_Plots <- function(e, o, CF, CF_levels, nTimePeriods=1, plot_type,rol # Basic Network layout # accepts the data stucture with nodeDF and edgeDF created by threads_to_network and normalNetwork -#' @family ThreadNet_Graphics -#' +#' @title circleVisNetwork +#' @description Produces a circle network layout using visNetwork +#' @name circleVisNetwork #' @param n list with nodeDF and edgeDF dataframes #' @param directed type of network = directed or not #' @param showTitle - show the title or not -#' #' @return visnetwork object #' @export circleVisNetwork <- function( n,directed='directed', showTitle=FALSE ){ @@ -539,12 +526,12 @@ if (directed =='directed') # e is any set of events # vcf is the context factor to graph as network for that set of events # l is the set of labels = factor levels of original data for that VCF -#' @family ThreadNet_Graphics -#' +#' @title normalNetwork +#' @description Produced a network of co-occurrences for any given CF and displays it in a visNetwork circle layout +#' @name normalNetwork #' @param e event data frame #' @param o occurrence data frame #' @param cf context factor for the graph -#' #' @return visnetwork object #' @export normalNetwork <- function(e, o, cf){ @@ -573,8 +560,6 @@ normalNetwork <- function(e, o, cf){ diag(a) = 0 a=a/max(a) - - # print(a) g=graph_from_adjacency_matrix(a, mode='undirected', weighted=TRUE) @@ -587,8 +572,9 @@ normalNetwork <- function(e, o, cf){ return(list(nodeDF = nodes, edgeDF = as.data.frame(edges) )) } -#' @family ThreadNet_Graphics -#' +#' @title filter_network_edges +#' @description Filters out network edges with weight below the threshold +#' @name filter_network_edges #' @param n network list of nodeDF and edgeDF #' @param threshold numeric threshold for filtering edges. #' @return n network list of nodeDF and edgeDF @@ -604,20 +590,18 @@ filter_network_edges <- function(n, threshold){ return(n) } -# role map will show "who does what" for any set of events -# cfs contains a list of two contextual factors. -#' @family ThreadNet_Graphics -#' +#' @title role_map +#' @description A role map (like a heat map) that will show "who does what" for any set of events +#' @name role_map #' @param e event data frame #' @param o occurrence data frame -#' @param cf context factor for the graph -#' +#' @param cfs context factors for the graph #' @return plotly heat map #' @export role_map <- function(e, o, cfs){ if (!length(cfs)==2) - return(plot_ly()) + return(plot_ly(type='scatter')) # Get the context factors vcf_1 = paste0('V_',cfs[1]) @@ -640,10 +624,11 @@ role_map <- function(e, o, cfs){ } -# this shows relative time versus sequential time -# Inspired by Gergen and Danner-Schroeder -#' @family ThreadNet_Graphics -#' + +#' @title threadTrajectory +#' @description Create a plotly diagram showing relative time versus sequential time. +#' Inspired by Gergen and Danner-Schroeder +#' @name threadTrajectory #' @param or event data frame #' @return plotly scatter plot #' @export @@ -661,12 +646,16 @@ threadTrajectory <- function(or){ hoverinfo = "text", showlegend=FALSE) %>% - layout( + plotly::layout( xaxis = list(title='Relative time'), yaxis = list(title='Sequence') )) } +#' @title movingWindowCorrelation +#' @description Creates plotly diagram showing correlation of moving windows across time +#' In this version, the moving window can overlap with itself as it slides +#' @name movingWindowCorrelation #' @param trace list of (x,y) coordinates #' @return plotly scatter plot #' @export @@ -679,7 +668,7 @@ movingWindowCorrelation <- function( trace ){ symbol= "line-ew", symbols=15, showlegend=FALSE ) %>% - layout( + plotly::layout( xaxis = list(title='Window number'), yaxis = list(title='Correlation', range = c(0, 1), @@ -692,6 +681,11 @@ movingWindowCorrelation <- function( trace ){ showticklabels = TRUE)) ) } + +#' @title dualmovingWindowCorrelation +#' @description Creates plotly diagram showing correlation of moving windows across time +#' This version computes the correlation of TWO adjacent windows that never overlap +#' @name dualmovingWindowCorrelation #' @param trace list of (x,y) coordinates #' @return plotly scatter plot #' @export @@ -704,7 +698,7 @@ dualmovingWindowCorrelation <- function( trace ){ symbol= "line-ew", symbols=15, showlegend=FALSE #,height = 200 ) %>% - layout( + plotly::layout( xaxis = list(title='Window number'), yaxis = list(title='Correlation', range = c(0, 1), diff --git a/R/ThreadNet_Metrics.R b/R/ThreadNet_Metrics.R index b067184..1f5b1c6 100644 --- a/R/ThreadNet_Metrics.R +++ b/R/ThreadNet_Metrics.R @@ -1,4 +1,4 @@ -########################################################################################################## + ########################################################################################################## # THREADNET: Metrics # This software may be used according to the terms provided in the @@ -8,30 +8,30 @@ # Functions for metrics: entropy, complexity, routine-ness, etc. -# takes the output from the function that maps threads to networks -#' Estimates the number of paths in a directed graph -#' -#' This function takes a network descripts (nodes and edges, as generaged by the functino threads_to_network, and estimates the number of paths. +#' @title Estimates the number of paths in a directed graph +#' @description This function takes a network descripts (nodes and edges, as generaged by the functino threads_to_network, and estimates the number of paths. #' as described in Haerem, Pentland and Miller (2015). The estimate correlates with the McCabe's (1975) cyclometric complexity. -#' -#' @family ThreadNet_Metrics -#' +#' @name estimate_network_complexity #' @param net Object with dataframe for nodes and edges -#' #' @return number #' @export estimate_network_complexity <- function(net){ return(estimate_task_complexity_index( nrow(net$nodeDF), nrow(net$edgeDF)) ) } -# this version takes vertices and edges -#' Estimates the number of paths in a directed graph -#' -#' Same as estimate_network_complexity, but takes different parameters -#' -#' @family ThreadNet_Metrics -#' +# returns a string with the number of nodes and edges in the network +#' @title returns a string with the number of nodes and edges in the network +#' @description returns a string with the number of nodes and edges +#' @name print_network_nodes_edges +#' @param net Object with dataframe for nodes and edges +#' @return string +#' @export +print_network_nodes_edges <- function(net){ return(paste0('Number of nodes = ', nrow(net$nodeDF),' Number of edges = ', nrow(net$edgeDF) ) ) } + + +#' @title Estimates the number of paths in a directed graph +#' @description Same as estimate_network_complexity, but takes this version takes vertices and edges as parameters +#' @name estimate_task_complexity_index #' @param v number of vertices (or nodes) #' @param e number of edges -#' #' @return number #' @export estimate_task_complexity_index <- function(v,e){ @@ -41,36 +41,31 @@ estimate_task_complexity_index <- function(v,e){ # v = number of vertices # tested for range of 10 < v < 100 # e = number of edges - print("edges") - print(e) - print("vertices") - print(v) + # print("edges") + # print(e) + # print("vertices") + # print(v) # # OUTPUT ARG: # cidx correlates with Log10(simple paths) with r>= 0.8 # from ORM paper analysis, constant is 0.12. # For boundary condition of 2 nodes and 1 edge, complexity index=0, constant = 0.08 - return( 0.08 + 0.08*e - 0.08*v ) + return( 10^( 0.08 + 0.08*e - 0.08*v) ) } ################################################################# -#' Computes a metric of routineness based on frequency of ngrams -#' -#' Computes the fraction of observed behavior that conforms to an observed pattern. +#' @title Computes a metric of routineness based on frequency of ngrams +#' @description Computes the fraction of observed behavior that conforms to an observed pattern. #' Current version uses ngrams, but it would be good to use spmf pattern mining to avoid including duplicate patterns (e.g., a-b-c and b-c-d) -#' -#' @family ThreadNet_Metrics -#' +#' @name routineness_metric #' @param o data frame with occurresnces or events #' @param TN name of column with threadNumbers #' @param CF name of column with contextual factor #' @param n size of ngram #' @param m how many of the most frequent ngrams to include. When m > 1, there is a risk of duplication. -#' #' @return number, index of routineness. -#' #' @export routineness_metric <- function(o,TN,CF,n,m){ @@ -85,16 +80,12 @@ routineness_metric <- function(o,TN,CF,n,m){ ############################################################################# -#' Computes the compressibility of the data in one column of a data frame -#' -#' Compressibility is an index of complexity -- more compressible means less complex. This function computes the ratio of compressed data +#' @title Computes the compressibility of the data in one column of a data frame +#' @description Compressibility is an index of complexity -- more compressible means less complex. This function computes the ratio of compressed data #' to the original data. Should be between zero and one. Uses built-in functions for in=memory compression -#' -#' @family ThreadNet_Metrics -#' +#' @name compression_index #' @param df a data frame containing occurrences or events #' @param CF a column or contextual factor in that data frame -#' #' @return number containing compressibility index, 0 < i < 1 #' @export compression_index <- function(df,CF){ return( @@ -102,18 +93,13 @@ compression_index <- function(df,CF){ return( length(paste0(as.character(df[[CF]]))) ) } -####################################################################### +#######################################################################es #compute entropy for a set of observations in a column from a data frame -# freq is typically going to the $freq column from ngram table, or -# the frequency of each level in the CFs, as counted by table() -#' Compute the entropy of a contextual factor -#' -#' Each column in the raw data represents a contextual factor. This function computes the entropy of each factor that is selected for use in the +#' @title Compute the entropy of a contextual factor +#' @description Each column in the raw data represents a contextual factor. This function computes the entropy of each factor that is selected for use in the #' analysis. -#' @family ThreadNet_Metrics -#' +#' @name compute_entropy #' @param freq is the frequency distribution of the levels in the factor -#' #' @return number #' @export compute_entropy <- function(freq){ @@ -124,7 +110,12 @@ compute_entropy <- function(freq){ } # code to plot entropy as a function of zoom_level -# need to get the zoom levels -- grep out the 'Z_' column names... +#' @title plot entropy as a function of zoom_level +#' @description Gets the zoom levels, grep out the 'Z_' column names... +#' @name plot_entropy +#' @param e data from of events with zoom levels +#' @return regular R plot +#' @export plot_entropy <- function(e){ plot(unlist(lapply(grep('ZM_',colnames(e)),function(i){compute_entropy(table(e[[i]]))}))) } diff --git a/R/ThreadNet_Misc.R b/R/ThreadNet_Misc.R index be906a2..6eb7ed5 100644 --- a/R/ThreadNet_Misc.R +++ b/R/ThreadNet_Misc.R @@ -7,7 +7,11 @@ ########################################################################################################## -## Make an example data frame for display... +#' @title make_example_DF +#' @description Make an example data frame for display... +#' @name make_example_DF +#' @return DF with some data +#' @export make_example_DF = function(){ correct_occ = read.table(text="tStamp actor action object location '2017-4-7 17:52:04' jimmy tosses ball playground @@ -19,14 +23,12 @@ make_example_DF = function(){ -#' numThreads counts how many threads in the data set -#' -#' Threads must have unique thred numbers for this function to work -#' -#' @family ThreadNet_Misc + +#' @title numThreads counts how many threads in the data set +#' @description Threads must have unique thred numbers for this function to work +#' @name numThreads #' @param o data frame with occurrences or events #' @param TN column with thread number -#' #' @return number of threads #' @export numThreads = function(o,TN) {length(unique(o[[TN]]))} @@ -47,12 +49,24 @@ timeRangePhrase = function(tr){ # this function is used to split up the threads into n ~equal buckets +#' @title make_subsets +#' @description this function is used to split up the threads into n ~equal buckets +#' @name make_subsets +#' @param d data frame with occurrences or events +#' @param n number of buckets +#' @return list of smaller data frames +#' @export make_subsets <- function(d,n){ return(split(d, ceiling(seq_along(d)/(length(d)/n)))) } -# This function takes a slider value and returns a valid column name for zooming -# if the argument is null, then use ZM_1 +#' @title zoomColumn +#' @description This function takes a slider value and returns a valid column name for zooming +#' if the argument is null, then use ZM_1 +#' @name zoomColumn +#' @param z integer for the zoom level +#' @return column name for that zoom level ('ZM_n) +#' @export zoomColumn <- function(z){ # print(paste("In zoomColumn z=",z)) @@ -67,49 +81,38 @@ zoomColumn <- function(z){ } ######### Functions that return column names ####### - -# names of the columns for contextual factors -# grab all of the columns except the first, which has the time stamp -# tStamp in the first column -#' cfnames provides names of all the contextual factors (except the time stamp) -#' @family ThreadNet_Misc +#' @title cfnames provides names of all the contextual factors (except the time stamp) +#' @description grab all of the columns except the first, which has the time stamp +#' tStamp in the first colummn +#' @name cfnames #' @param o data frame with threads -#' #' @return list of column names #' @export -#' cfnames <- function(o){ colnames(o)[2:length(colnames(o))]} -## this is used to populate the UI for comparison of categories within a CF -#' get_CF_levels returns the levels of a contextual factor -#' @family ThreadNet_Misc +#' @title get_CF_levels returns the levels of a contextual factor +#' @description this is used to populate the UI for comparison of categories within a CF +#' @name get_CF_levels #' @param o data frame with threads #' @param cf a contextual factors (column) -#' #' @return list of unique factor levels #' @export get_CF_levels <- function(o,cf){ - return(levels(o[,cf])) } ########################################################################################################## -# this function adds a new column to the occurrenes table based on a combination of context factors CF) -#' Creates a new column that combines some set of other columns -#' -#' For example, actor+action -#' -#' @family ThreadNet_Misc +#' @title combineContextFactors creates a new column that combines some set of other columns +#' @description this function adds a new column to the occurrenes table based on a combination of context factors CF). For example, actor+action +#' @name combineContextFactors #' @param o data frame with threads #' @param CF contextual factors to be combined. #' @param newCol name of the new combined conextual factor -#' #' @return data frame with the new column #' @export combineContextFactors <- function(o,CF,newCol){ - # Use the old column if there is one if (!(newCol %in% names(o))) { @@ -128,20 +131,44 @@ combineContextFactors <- function(o,CF,newCol){ return(o) } -# just keep this simple + +#' @title newColName +#' @description returns new combined column name from a list of CFs +#' @name newColName +#' @param CF_list list of context factors to define events +#' @return column name +#' @export newColName <- function(CF_list){ return(paste0(CF_list,collapse="_")) } # These were used on the occ-to-event tab to configure the slider +#' @title threshold_slider_min +#' @description These were used on the occ-to-event tab to configure the slider +#' @name threshold_slider_min +#' @param o dataframe of occurrences +#' @return slider min +#' @export threshold_slider_min <- function(o){ return(floor(min(o$timeGap))) } +#' @title threshold_slider_max +#' @description These were used on the occ-to-event tab to configure the slider +#' @name threshold_slider_max +#' @param o dataframe of occurrences +#' @return slider max +#' @export threshold_slider_max <- function(o){ return(ceiling(max(o$timeGap))) } +#' @title threshold_slider_selected +#' @description These were used on the occ-to-event tab to configure the slider +#' @name threshold_slider_selected +#' @param o dataframe of occurrences +#' @return selected value = min +#' @export threshold_slider_selected <- function(o){ return(min(o$timeGap)) } @@ -150,6 +177,12 @@ threshold_slider_selected <- function(o){ #### count the handoffs, but reverse coded -- zero = all different +#' @title diff_handoffs +#' @description count the handoffs, but reverse coded -- zero = all different +#' @name diff_handoffs +#' @param o dataframe of occurrences +#' @return o dataframe of occurrences with handoff count filled in +#' @export diff_handoffs <- function(o){ # initialize the previous row @@ -172,6 +205,12 @@ row_diff_handoff <- function(this_row){ #### Time gaps -- just pass in the column of time stamps +#' @title diff_tStamp +#' @description Time gaps -- just pass in the column of time stamps - uses auto units +#' @name diff_tStamp +#' @param ts column of time stamps +#' @return Column of differences between timestamps +#' @export diff_tStamp <- function(ts){ # initialize the first row @@ -187,7 +226,7 @@ row_diff_tStamp <- function(this_row){ # just add up the differences. - d <-max(0,difftime(this_row, previous_row, units="secs")) + d <-max(0,difftime(this_row, previous_row, units='auto')) # store the previous row previous_row <<-this_row @@ -199,16 +238,12 @@ row_diff_tStamp <- function(this_row){ -#' threadSizeTable provides a distribution of the length of threads -#' -#' This function should work on either ocurrences or events. +#' @title threadSizeTable provides a distribution of the length of threads +#' @description This function should work on either ocurrences or events. #' it returns length and duration of each thread.It requires tStamp field to compute duration. -#' -#' @family ThreadNet_Misc -#' +#' @name threadSizeTable #' @param o data frame with threads #' @param TN column comtaining the threadNumber -#' #' @return data frame with table of thread lengths #' @export threadSizeTable <- function(o,TN){ @@ -238,14 +273,11 @@ threadSizeTable <- function(o,TN){ ######################################################### -#' convert_TN_to_TramineR -#' -#' converts the csv format used in ThreadNet to the format used by TraMiner. Should provide a way to save this, as well. -#' -#' @family ThreadNet_Misc +#' @title convert_TN_to_TramineR +#' @description converts the csv format used in ThreadNet to the format used by TraMiner. Should provide a way to save this, as well. +#' @name convert_TN_to_TramineR #' @param df threads (occurrences or events) #' @param CF Contextual factor that will be used to define the state sequences in TraMineR -#' #' @return Dataframe in TraMineR format (state sequeces in horizontal rows) #' @export convert_TN_to_TramineR <- function(df, CF){ @@ -278,14 +310,12 @@ convert_TN_to_TramineR <- function(df, CF){ } # these functions support the moving window -#' get_threadList returns a list of all thread numbers -#' -#' @family ThreadNet_Misc -#' +#' @title get_threadList +#' @description get_threadList returns a list of all thread numbers +#' @name get_threadList #' @param e data frame with threaded events #' @param TN Column with threadNumber #' @param SN Column with sequence numbers -#' #' @return list of thread numbers #' @export get_threadList <- function(e,TN,SN){ @@ -294,14 +324,12 @@ get_threadList <- function(e,TN,SN){ return(e[e[[SN]]==1,TN]) } -#' get_moving_window returns a set of threads for a moving window -#' -#' @family ThreadNet_Misc -#' +#' @title get_moving_window +#' @description get_moving_window returns a set of threads for a moving window +#' @name get_moving_window #' @param e data frame with threads (needs to have threadNum and seqNum) #' @param s size of window #' @param l location of window -#' #' @return data from with just the threads in the window #' @export get_moving_window <- function(e, s, l ){ @@ -318,10 +346,15 @@ get_moving_window <- function(e, s, l ){ } -# e is the data -# w = window size -# s = step (how far to move the window in each step) -# n is the ngram size + +#' @title window_correlation +#' @description Correlation between moving windows +#' @name window_correlation +#' @param e data fraom for POV +#' @param w width of moving window +#' @param s step - how far to move window in each increment (default is 1) +#' @param n number of windows (default is 2) +#' @export window_correlation <- function(e,w,s=1,n=2){ # make data frame @@ -394,11 +427,14 @@ window_correlation <- function(e,w,s=1,n=2){ } -# e is the data -# w = window size -# s = step (how far to move the window in each step) -# n is the ngram size -# similar as above, except one window on each side of a focal thread. +#' @title dual_window_correlation +#' @description similar to moving window, except one window on each side of a focal thread. +#' @name dual_window_correlation +#' @param e data fraom for POV +#' @param w width of moving window +#' @param s step - how far to move window in each increment (default is 1) +#' @param n number of windows (default is 2) +#' @export dual_window_correlation <- function(e,w,s=1,n=2){ # make data frame @@ -475,7 +511,12 @@ dual_window_correlation <- function(e,w,s=1,n=2){ } # Make a nice dataframe to display -# Issue is that DT::renderdatatable cannot display lists correctly. +#' @title make_nice_event_DT +#' @description Issue is that DT::renderdatatable cannot display lists correctly, so cut then out. +#' @name make_nice_event_DT +#' @description Removes columns that do not need to be displayed +#' @param e data frame with POV +#' @export make_nice_event_DT <- function(e){ # Add new column for the occurrences as a character string for display @@ -495,9 +536,11 @@ make_nice_event_DT <- function(e){ return(e) } -# find the biggest column with ZM_, and then get the number that goes with that. -# It will not be the same as the column number. -#' @param event data frame +#' @title zoom_upper_limit +#' @description find the biggest column with ZM_, and then get the number that goes with that. +#' @name zoom_upper_limit +#' @description Used to set upper limit on sliders for zooming +#' @param e data frame #' @return biggest zoom level #' @export zoom_upper_limit <- function(e){ @@ -514,25 +557,22 @@ zoom_upper_limit <- function(e){ ###################################################### # Just putting this code here to play with for now. # this function finds the common events in two subsets of thread data -common_events <- function(ss1, ss2, TN, CF, n){ - - # get the list of ngrams for each subset of threads - e1 = count_ngrams(ss1, TN, CF, n)[1] - e2 = count_ngrams(ss2, TN, CF, n)[1] - - # return the intersection - return(intersect(as.matrix(e1), as.matrix(e2))) - -} - -rr_grams <- function(o,TN, CF, N, R) { - # N - max length of ngram - # R = threshold for repetition - - - - -} +# common_events <- function(ss1, ss2, TN, CF, n){ +# +# # get the list of ngrams for each subset of threads +# e1 = count_ngrams(ss1, TN, CF, n)[1] +# e2 = count_ngrams(ss2, TN, CF, n)[1] +# +# # return the intersection +# return(intersect(as.matrix(e1), as.matrix(e2))) +# +# } +# +# rr_grams <- function(o,TN, CF, N, R) { +# # N - max length of ngram +# # R = threshold for repetition +# +# } # Ideas for regex work # https://stackoverflow.com/questions/35704369/identify-repetitive-pattern-in-numeric-vector-in-r-with-fuzzy-search diff --git a/R/runExample.R b/R/runExample.R new file mode 100644 index 0000000..5220c1d --- /dev/null +++ b/R/runExample.R @@ -0,0 +1,9 @@ +#' @export +runExample <- function() { + appDir <- system.file("ThreadNet", package = "ThreadNet") + if (appDir == "") { + stop("Could not find shiny app directory. Try re-installing `ThreadNet`.", call. = FALSE) + } + + shiny::runApp(appDir, display.mode = "normal") +} diff --git a/README.md b/README.md index 9b56927..2978178 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,11 @@ ## Overview ThreadNet is a tool for visualization of repetitive sequences, such as organizational routines. It emphasizes the role of sequential and temporal context. It is being created for NSF (SES-1734237) Antecedents of Complexity in Healthcare Routine, a collaborative project between Michigan State University and the University of Rochester Medical Center. Co-PIs: Brian Pentland and Kenneth Frank (MSU), Julie Ryan Wolf and Alice Pentland (URMC). The original version of ThreadNet was implemented in MatLab. +## Documentation and sample data +You can access installation instructions, documentation and sample data here: http://routines.broad.msu.edu/ThreadNet + ## File format -ThreadNet reads data in simple .CSV format. We will soon support the IEEE standard .XES format for process event log data. +ThreadNet reads data in simple .CSV format and .XES format (IEEE standard for process event log data). ### First column must be either "tStamp" or "sequence" When using timestamped data, the first column must be called "tStamp". The timestamps should be in default R format: "yyyy-mm-dd hh:mm:ss" diff --git a/ThreadNet.Rproj b/ThreadNet.Rproj index 497f8bf..270314b 100644 --- a/ThreadNet.Rproj +++ b/ThreadNet.Rproj @@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/man/ACHR_batch_V1.Rd b/man/ACHR_batch_V1.Rd deleted file mode 100644 index 6a358ae..0000000 --- a/man/ACHR_batch_V1.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ThreadNet_Batch.R -\name{ACHR_batch_V1} -\alias{ACHR_batch_V1} -\title{Batch processing for larger numbers of threads} -\usage{ -ACHR_batch_V1(inFileName) -} -\arguments{ -\item{inFileName}{name of file (CSV format) containing the raw thread data.} -} -\value{ -data frame ready for further analysis -} -\description{ -ACHR stands for Antecedents of Complexity in Healthcare Routines. This is function is set up to compute process parameters on thousands of patient visits. -} diff --git a/man/CF_multi_pie.Rd b/man/CF_multi_pie.Rd index 014e461..ba96e2b 100644 --- a/man/CF_multi_pie.Rd +++ b/man/CF_multi_pie.Rd @@ -18,12 +18,3 @@ plotyly pie charts (one or more) When selecting contextual factors that define threads, events and comparisons, this function provide visual feedback about the number of factors levels and also the number of levels when the factors are combined } -\seealso{ -Other ThreadNet_Graphics: \code{\link{Comparison_Plots}}, - \code{\link{circleVisNetwork}}, - \code{\link{eventNetwork}}, - \code{\link{filter_network_edges}}, - \code{\link{forceNetworkD3}}, \code{\link{ng_bar_chart}}, - \code{\link{normalNetwork}}, \code{\link{role_map}}, - \code{\link{threadMap}}, \code{\link{threadTrajectory}} -} diff --git a/man/CF_multi_pie_event.Rd b/man/CF_multi_pie_event.Rd new file mode 100644 index 0000000..ba802b6 --- /dev/null +++ b/man/CF_multi_pie_event.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Graphics.R +\name{CF_multi_pie_event} +\alias{CF_multi_pie_event} +\title{CF_multi_pie_event} +\usage{ +CF_multi_pie_event(o, e, CF, r, zm) +} +\arguments{ +\item{o}{data frame of raw occurrences (for the names)} + +\item{e}{data frame with events} + +\item{CF}{list of contextual factors (columns) to include in the display} + +\item{r}{row number of cluster (the number of the event node)} + +\item{zm}{integer for zoom column} +} +\value{ +plotyly pie charts (one or more) +} +\description{ +Make multi-pie for click event from force network layout. Generate a small plot of context factor pie charts when you click on a node in the graph +} diff --git a/man/Comparison_Plots.Rd b/man/Comparison_Plots.Rd index 9b74b57..ed5e4bc 100644 --- a/man/Comparison_Plots.Rd +++ b/man/Comparison_Plots.Rd @@ -29,12 +29,3 @@ plotly object, including subplots Produce a set set of comparison sub-plots in an array. Ideally, we should be able to use any of the plots. So far it is only bar charts. This is a prototype that could use rather extensive redesign... } -\seealso{ -Other ThreadNet_Graphics: \code{\link{CF_multi_pie}}, - \code{\link{circleVisNetwork}}, - \code{\link{eventNetwork}}, - \code{\link{filter_network_edges}}, - \code{\link{forceNetworkD3}}, \code{\link{ng_bar_chart}}, - \code{\link{normalNetwork}}, \code{\link{role_map}}, - \code{\link{threadMap}}, \code{\link{threadTrajectory}} -} diff --git a/man/OccToEvents3.Rd b/man/OccToEvents3.Rd new file mode 100644 index 0000000..1a2d16a --- /dev/null +++ b/man/OccToEvents3.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Core.R +\name{OccToEvents3} +\alias{OccToEvents3} +\title{OccToEvents3} +\usage{ +OccToEvents3(o, EventMapName, THREAD_CF, EVENT_CF, compare_CF, TN, CF, rx, + KeepIrregularEvents) +} +\arguments{ +\item{o}{a dataframe of occurrences} + +\item{EventMapName}{= used to store this mapping for visualization and comparison} + +\item{THREAD_CF}{- context factors used to delineate threads} + +\item{EVENT_CF}{- context factors used to define events} + +\item{compare_CF}{= context factors used for comparison -- need to be copied over here when the thread is created.} + +\item{TN}{ThreadNum} + +\item{CF}{context factor} + +\item{rx}{list of patterns} + +\item{KeepIrregularEvents}{= keep or drop events that don't fit patterns} +} +\value{ +event data frame, with occurrences aggregated into events. +} +\description{ +Creates events based on frequent ngrams or regular expressions +} diff --git a/man/OccToEvents_By_Chunk.Rd b/man/OccToEvents_By_Chunk.Rd index 64e251a..5a436ee 100644 --- a/man/OccToEvents_By_Chunk.Rd +++ b/man/OccToEvents_By_Chunk.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ThreadNet_Core.R \name{OccToEvents_By_Chunk} \alias{OccToEvents_By_Chunk} -\title{Maps occurrences into events} +\title{Maps occurrences into events by chunks.} \usage{ OccToEvents_By_Chunk(o, m, EventMapName, uniform_chunk_size, tThreshold, timescale = "mins", chunk_CF, thread_CF, event_CF, compare_CF) @@ -32,12 +32,6 @@ OccToEvents_By_Chunk(o, m, EventMapName, uniform_chunk_size, tThreshold, event data frame, with occurrences aggregated into events. } \description{ -Thus function provides a place to map occurrences into events, so is is not necessary to interpret individual -occurrences in isolation. There are many ways to accomplish this mapping. -} -\seealso{ -Other ThreadNet_Core: \code{\link{OccToEvents3}}, - \code{\link{ThreadOccByPOV}}, - \code{\link{clusterEvents}}, \code{\link{count_ngrams}}, - \code{\link{threads_to_network_original}} +Thus function provides a way to map occurrences into events, so is is not necessary to interpret individual +occurrences in isolation. Provides three ways to accomplish this mapping. } diff --git a/man/ThreadNet_Core.Rd b/man/ThreadNet_Core.Rd new file mode 100644 index 0000000..3f3cab9 --- /dev/null +++ b/man/ThreadNet_Core.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet.R +\name{ThreadNet_Core} +\alias{ThreadNet_Core} +\alias{ThreadNet} +\title{ThreadNet_Core} +\usage{ +ThreadNet() +} +\description{ +This function launches the Shiny App called ThreadNet +} diff --git a/man/ThreadOccByPOV.Rd b/man/ThreadOccByPOV.Rd index e92040c..ac67972 100644 --- a/man/ThreadOccByPOV.Rd +++ b/man/ThreadOccByPOV.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ThreadNet_Core.R \name{ThreadOccByPOV} \alias{ThreadOccByPOV} -\title{Make new threads from a new POV} +\title{Converts occurrences into events, make threads from a new POV} \usage{ ThreadOccByPOV(o, THREAD_CF, EVENT_CF) } @@ -20,9 +20,3 @@ dataframe containing the same occurrences sorted from a different point of view Take the raw occurrences from the input file and sort them by time stamp within a set of contextual factors that remain constant for each thread. } -\seealso{ -Other ThreadNet_Core: \code{\link{OccToEvents3}}, - \code{\link{OccToEvents_By_Chunk}}, - \code{\link{clusterEvents}}, \code{\link{count_ngrams}}, - \code{\link{threads_to_network_original}} -} diff --git a/man/cfnames.Rd b/man/cfnames.Rd index 51492d1..2b8e5ee 100644 --- a/man/cfnames.Rd +++ b/man/cfnames.Rd @@ -13,13 +13,6 @@ cfnames(o) list of column names } \description{ -cfnames provides names of all the contextual factors (except the time stamp) -} -\seealso{ -Other ThreadNet_Misc: \code{\link{combineContextFactors}}, - \code{\link{convert_TN_to_TramineR}}, - \code{\link{get_CF_levels}}, - \code{\link{get_moving_window}}, - \code{\link{get_threadList}}, \code{\link{numThreads}}, - \code{\link{threadSizeTable}} +grab all of the columns except the first, which has the time stamp +tStamp in the first colummn } diff --git a/man/check_POV_name.Rd b/man/check_POV_name.Rd index b9b71bf..011f232 100644 --- a/man/check_POV_name.Rd +++ b/man/check_POV_name.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/Event_Mappings.R \name{check_POV_name} \alias{check_POV_name} -\title{Checks the name attempting to be create against the list of -map names in memory and forces the creation of a new name.} +\title{check_POV_name} \usage{ check_POV_name(mapname) } @@ -17,12 +16,3 @@ TRUE or FALSE Checks the name attempting to be create against the list of map names in memory and forces the creation of a new name. } -\seealso{ -Other Event_mappings: \code{\link{delete_POV}}, - \code{\link{export_POV_csv}}, \code{\link{export_POV}}, - \code{\link{get_POV_COMPARISON_CF}}, - \code{\link{get_POV_EVENT_CF}}, - \code{\link{get_POV_THREAD_CF}}, - \code{\link{get_POV_name_list}}, \code{\link{get_POV}}, - \code{\link{store_POV}} -} diff --git a/man/circleVisNetwork.Rd b/man/circleVisNetwork.Rd new file mode 100644 index 0000000..32cce84 --- /dev/null +++ b/man/circleVisNetwork.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Graphics.R +\name{circleVisNetwork} +\alias{circleVisNetwork} +\title{circleVisNetwork} +\usage{ +circleVisNetwork(n, directed = "directed", showTitle = FALSE) +} +\arguments{ +\item{n}{list with nodeDF and edgeDF dataframes} + +\item{directed}{type of network = directed or not} + +\item{showTitle}{- show the title or not} +} +\value{ +visnetwork object +} +\description{ +Produces a circle network layout using visNetwork +} diff --git a/man/clusterEvents.Rd b/man/clusterEvents.Rd new file mode 100644 index 0000000..6d6e780 --- /dev/null +++ b/man/clusterEvents.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Core.R +\name{clusterEvents} +\alias{clusterEvents} +\title{Clusters occurrences or eents} +\usage{ +clusterEvents(e, NewMapName, cluster_method, thread_CF, event_CF, + what_to_return = "POV") +} +\arguments{ +\item{e}{a dataframe of events or occurrences} + +\item{NewMapName}{= used to store this mapping for visualization and comparison} + +\item{cluster_method}{= method for clustering} + +\item{thread_CF}{- context factors used to delineate threads} + +\item{event_CF}{- context factors used to define events} + +\item{what_to_return}{POV or Cluster solution} +} +\value{ +event data frame with occurrences aggregated into events or cluster solution +} +\description{ +cluster_method is either "Sequential similarity" or "Contextual Similarity" or "Network Structure" +} diff --git a/man/combineContextFactors.Rd b/man/combineContextFactors.Rd index e6ddfa7..df5b433 100644 --- a/man/combineContextFactors.Rd +++ b/man/combineContextFactors.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ThreadNet_Misc.R \name{combineContextFactors} \alias{combineContextFactors} -\title{Creates a new column that combines some set of other columns} +\title{combineContextFactors creates a new column that combines some set of other columns} \usage{ combineContextFactors(o, CF, newCol) } @@ -17,13 +17,5 @@ combineContextFactors(o, CF, newCol) data frame with the new column } \description{ -For example, actor+action -} -\seealso{ -Other ThreadNet_Misc: \code{\link{cfnames}}, - \code{\link{convert_TN_to_TramineR}}, - \code{\link{get_CF_levels}}, - \code{\link{get_moving_window}}, - \code{\link{get_threadList}}, \code{\link{numThreads}}, - \code{\link{threadSizeTable}} +this function adds a new column to the occurrenes table based on a combination of context factors CF). For example, actor+action } diff --git a/man/compression_index.Rd b/man/compression_index.Rd index f137121..9e7b4dd 100644 --- a/man/compression_index.Rd +++ b/man/compression_index.Rd @@ -18,9 +18,3 @@ number containing compressibility index, 0 < i < 1 Compressibility is an index of complexity -- more compressible means less complex. This function computes the ratio of compressed data to the original data. Should be between zero and one. Uses built-in functions for in=memory compression } -\seealso{ -Other ThreadNet_Metrics: \code{\link{compute_entropy}}, - \code{\link{estimate_network_complexity}}, - \code{\link{estimate_task_complexity_index}}, - \code{\link{routineness_metric}} -} diff --git a/man/compute_entropy.Rd b/man/compute_entropy.Rd index e02ed31..34cf9af 100644 --- a/man/compute_entropy.Rd +++ b/man/compute_entropy.Rd @@ -16,9 +16,3 @@ number Each column in the raw data represents a contextual factor. This function computes the entropy of each factor that is selected for use in the analysis. } -\seealso{ -Other ThreadNet_Metrics: \code{\link{compression_index}}, - \code{\link{estimate_network_complexity}}, - \code{\link{estimate_task_complexity_index}}, - \code{\link{routineness_metric}} -} diff --git a/man/convert_TN_to_TramineR.Rd b/man/convert_TN_to_TramineR.Rd index 2f5a1b7..13caf01 100644 --- a/man/convert_TN_to_TramineR.Rd +++ b/man/convert_TN_to_TramineR.Rd @@ -17,11 +17,3 @@ Dataframe in TraMineR format (state sequeces in horizontal rows) \description{ converts the csv format used in ThreadNet to the format used by TraMiner. Should provide a way to save this, as well. } -\seealso{ -Other ThreadNet_Misc: \code{\link{cfnames}}, - \code{\link{combineContextFactors}}, - \code{\link{get_CF_levels}}, - \code{\link{get_moving_window}}, - \code{\link{get_threadList}}, \code{\link{numThreads}}, - \code{\link{threadSizeTable}} -} diff --git a/man/count_ngrams.Rd b/man/count_ngrams.Rd index 9cd7e01..2a33182 100644 --- a/man/count_ngrams.Rd +++ b/man/count_ngrams.Rd @@ -19,12 +19,5 @@ count_ngrams(o, TN, CF, n) a dataframe with ngram, frequency and proportion in descending order } \description{ -This function counts n-grams within threads where the length of the thread is greater than n. -} -\seealso{ -Other ThreadNet_Core: \code{\link{OccToEvents3}}, - \code{\link{OccToEvents_By_Chunk}}, - \code{\link{ThreadOccByPOV}}, - \code{\link{clusterEvents}}, - \code{\link{threads_to_network_original}} +Counting ngrams is essential to several ThreadNet functions. This function counts n-grams within threads where the length of the thread is greater than n. } diff --git a/man/delete_POV.Rd b/man/delete_POV.Rd new file mode 100644 index 0000000..c5f0d26 --- /dev/null +++ b/man/delete_POV.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Event_Mappings.R +\name{delete_POV} +\alias{delete_POV} +\title{delete_POV} +\usage{ +delete_POV(mapname) +} +\arguments{ +\item{mapname}{name of POV map} +} +\value{ +None, updates global variables +} +\description{ +Deletes all the data assocated with this POV +} diff --git a/man/diff_handoffs.Rd b/man/diff_handoffs.Rd new file mode 100644 index 0000000..8d6a088 --- /dev/null +++ b/man/diff_handoffs.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{diff_handoffs} +\alias{diff_handoffs} +\title{diff_handoffs} +\usage{ +diff_handoffs(o) +} +\arguments{ +\item{o}{dataframe of occurrences} +} +\value{ +o dataframe of occurrences with handoff count filled in +} +\description{ +count the handoffs, but reverse coded -- zero = all different +} diff --git a/man/diff_tStamp.Rd b/man/diff_tStamp.Rd new file mode 100644 index 0000000..6c6b41b --- /dev/null +++ b/man/diff_tStamp.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{diff_tStamp} +\alias{diff_tStamp} +\title{diff_tStamp} +\usage{ +diff_tStamp(ts) +} +\arguments{ +\item{ts}{column of time stamps} +} +\value{ +Column of differences between timestamps +} +\description{ +Time gaps -- just pass in the column of time stamps - uses auto units +} diff --git a/man/dual_window_correlation.Rd b/man/dual_window_correlation.Rd new file mode 100644 index 0000000..e2e4cc2 --- /dev/null +++ b/man/dual_window_correlation.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{dual_window_correlation} +\alias{dual_window_correlation} +\title{dual_window_correlation} +\usage{ +dual_window_correlation(e, w, s = 1, n = 2) +} +\arguments{ +\item{e}{data fraom for POV} + +\item{w}{width of moving window} + +\item{s}{step - how far to move window in each increment (default is 1)} + +\item{n}{number of windows (default is 2)} +} +\description{ +similar to moving window, except one window on each side of a focal thread. +} diff --git a/man/dualmovingWindowCorrelation.Rd b/man/dualmovingWindowCorrelation.Rd new file mode 100644 index 0000000..6a29709 --- /dev/null +++ b/man/dualmovingWindowCorrelation.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Graphics.R +\name{dualmovingWindowCorrelation} +\alias{dualmovingWindowCorrelation} +\title{dualmovingWindowCorrelation} +\usage{ +dualmovingWindowCorrelation(trace) +} +\arguments{ +\item{trace}{list of (x,y) coordinates} +} +\value{ +plotly scatter plot +} +\description{ +Creates plotly diagram showing correlation of moving windows across time +This version computes the correlation of TWO adjacent windows that never overlap +} diff --git a/man/estimate_network_complexity.Rd b/man/estimate_network_complexity.Rd index 5c98af7..729f315 100644 --- a/man/estimate_network_complexity.Rd +++ b/man/estimate_network_complexity.Rd @@ -16,9 +16,3 @@ number This function takes a network descripts (nodes and edges, as generaged by the functino threads_to_network, and estimates the number of paths. as described in Haerem, Pentland and Miller (2015). The estimate correlates with the McCabe's (1975) cyclometric complexity. } -\seealso{ -Other ThreadNet_Metrics: \code{\link{compression_index}}, - \code{\link{compute_entropy}}, - \code{\link{estimate_task_complexity_index}}, - \code{\link{routineness_metric}} -} diff --git a/man/estimate_task_complexity_index.Rd b/man/estimate_task_complexity_index.Rd index f3f90ba..f2dc14e 100644 --- a/man/estimate_task_complexity_index.Rd +++ b/man/estimate_task_complexity_index.Rd @@ -15,11 +15,5 @@ estimate_task_complexity_index(v, e) number } \description{ -Same as estimate_network_complexity, but takes different parameters -} -\seealso{ -Other ThreadNet_Metrics: \code{\link{compression_index}}, - \code{\link{compute_entropy}}, - \code{\link{estimate_network_complexity}}, - \code{\link{routineness_metric}} +Same as estimate_network_complexity, but takes this version takes vertices and edges as parameters } diff --git a/man/eventNetwork.Rd b/man/eventNetwork.Rd deleted file mode 100644 index 244e73f..0000000 --- a/man/eventNetwork.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ThreadNet_Graphics.R -\name{eventNetwork} -\alias{eventNetwork} -\title{Circular network layout for event network (USES visnetwork)} -\usage{ -eventNetwork(et, TN, CF, timesplit) -} -\arguments{ -\item{et}{dataframe with the threads to be graphed} - -\item{TN}{the column with the threadNumber} - -\item{CF}{is the contetual factors (column)} - -\item{timesplit}{time measure} -} -\value{ -plotly object -} -\description{ -Should be replaced with a more expressive layout in plotly -} -\seealso{ -Other ThreadNet_Graphics: \code{\link{CF_multi_pie}}, - \code{\link{Comparison_Plots}}, - \code{\link{circleVisNetwork}}, - \code{\link{filter_network_edges}}, - \code{\link{forceNetworkD3}}, \code{\link{ng_bar_chart}}, - \code{\link{normalNetwork}}, \code{\link{role_map}}, - \code{\link{threadMap}}, \code{\link{threadTrajectory}} -} diff --git a/man/export_POV.Rd b/man/export_POV.Rd new file mode 100644 index 0000000..cf1acf0 --- /dev/null +++ b/man/export_POV.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Event_Mappings.R +\name{export_POV} +\alias{export_POV} +\title{export_POV} +\usage{ +export_POV(mapname) +} +\arguments{ +\item{mapname}{name of POV map} +} +\value{ +(writes Rdata file) +} +\description{ +Exports the data assocated with this POV as Rdata +} diff --git a/man/export_POV_csv.Rd b/man/export_POV_csv.Rd new file mode 100644 index 0000000..df71ee5 --- /dev/null +++ b/man/export_POV_csv.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Event_Mappings.R +\name{export_POV_csv} +\alias{export_POV_csv} +\title{export_POV_csv} +\usage{ +export_POV_csv(mapname) +} +\arguments{ +\item{mapname}{name of POV map} +} +\value{ +(writes CSV file) +} +\description{ +Exports the data assocated with this POV as CSV +} diff --git a/man/filter_network_edges.Rd b/man/filter_network_edges.Rd new file mode 100644 index 0000000..006def7 --- /dev/null +++ b/man/filter_network_edges.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Graphics.R +\name{filter_network_edges} +\alias{filter_network_edges} +\title{filter_network_edges} +\usage{ +filter_network_edges(n, threshold) +} +\arguments{ +\item{n}{network list of nodeDF and edgeDF} + +\item{threshold}{numeric threshold for filtering edges.} +} +\value{ +n network list of nodeDF and edgeDF +} +\description{ +Filters out network edges with weight below the threshold +} diff --git a/man/forceNetworkD3.Rd b/man/forceNetworkD3.Rd index b2e7227..7bb0bea 100644 --- a/man/forceNetworkD3.Rd +++ b/man/forceNetworkD3.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ThreadNet_Graphics.R \name{forceNetworkD3} \alias{forceNetworkD3} -\title{NetworkD3 layout for event network} +\title{forceNetworkD3 is an Interactive layout for event network} \usage{ forceNetworkD3(n) } @@ -13,15 +13,5 @@ forceNetworkD3(n) networkD3 object } \description{ -NetworkD3 layout for event network -} -\seealso{ -Other ThreadNet_Graphics: \code{\link{CF_multi_pie}}, - \code{\link{Comparison_Plots}}, - \code{\link{circleVisNetwork}}, - \code{\link{eventNetwork}}, - \code{\link{filter_network_edges}}, - \code{\link{ng_bar_chart}}, \code{\link{normalNetwork}}, - \code{\link{role_map}}, \code{\link{threadMap}}, - \code{\link{threadTrajectory}} +This produces a force layout network using networkD3 } diff --git a/man/frequent_ngrams.Rd b/man/frequent_ngrams.Rd new file mode 100644 index 0000000..4529dd6 --- /dev/null +++ b/man/frequent_ngrams.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Core.R +\name{frequent_ngrams} +\alias{frequent_ngrams} +\title{frequent_ngrams} +\usage{ +frequent_ngrams(e, TN, CF, minN, maxN, onlyMaximal = TRUE) +} +\arguments{ +\item{e}{event data} + +\item{TN}{threadNum} + +\item{CF}{context factor (column) to look at} + +\item{minN}{miniumum ngram length} + +\item{maxN}{maximum ngram length} + +\item{onlyMaximal}{Filters out ngrams that are included in longer ngrams. Default is true.} +} +\value{ +dataframe of ngrams +} +\description{ +combined set of frequent ngrams within a range o lengths +} diff --git a/man/get_CF_levels.Rd b/man/get_CF_levels.Rd index 28c5aec..9555f96 100644 --- a/man/get_CF_levels.Rd +++ b/man/get_CF_levels.Rd @@ -15,13 +15,5 @@ get_CF_levels(o, cf) list of unique factor levels } \description{ -get_CF_levels returns the levels of a contextual factor -} -\seealso{ -Other ThreadNet_Misc: \code{\link{cfnames}}, - \code{\link{combineContextFactors}}, - \code{\link{convert_TN_to_TramineR}}, - \code{\link{get_moving_window}}, - \code{\link{get_threadList}}, \code{\link{numThreads}}, - \code{\link{threadSizeTable}} +this is used to populate the UI for comparison of categories within a CF } diff --git a/man/get_POV.Rd b/man/get_POV.Rd new file mode 100644 index 0000000..0de1bcf --- /dev/null +++ b/man/get_POV.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Event_Mappings.R +\name{get_POV} +\alias{get_POV} +\title{get_POV} +\usage{ +get_POV(mapname) +} +\arguments{ +\item{mapname}{name of POV map} +} +\value{ +data frame with POV +} +\description{ +Gets the data frame for the POV +} diff --git a/man/get_POV_COMPARISON_CF.Rd b/man/get_POV_COMPARISON_CF.Rd new file mode 100644 index 0000000..35c875f --- /dev/null +++ b/man/get_POV_COMPARISON_CF.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Event_Mappings.R +\name{get_POV_COMPARISON_CF} +\alias{get_POV_COMPARISON_CF} +\title{get_POV_COMPARISON_CF} +\usage{ +get_POV_COMPARISON_CF(mapname, CF_list) +} +\arguments{ +\item{mapname}{name of POV map} + +\item{CF_list}{list of other column names} +} +\value{ +comparison CFs for that POV +} +\description{ +Gets the CFs that can be used for comparisons in this POV +} diff --git a/man/get_POV_EVENT_CF.Rd b/man/get_POV_EVENT_CF.Rd new file mode 100644 index 0000000..3466873 --- /dev/null +++ b/man/get_POV_EVENT_CF.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Event_Mappings.R +\name{get_POV_EVENT_CF} +\alias{get_POV_EVENT_CF} +\title{get_POV_EVENT_CF} +\usage{ +get_POV_EVENT_CF(mapname) +} +\arguments{ +\item{mapname}{name of POV map} +} +\value{ +event CFs for that POV +} +\description{ +Gets the CFs that define events in this POV +} diff --git a/man/get_POV_THREAD_CF.Rd b/man/get_POV_THREAD_CF.Rd new file mode 100644 index 0000000..998416b --- /dev/null +++ b/man/get_POV_THREAD_CF.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Event_Mappings.R +\name{get_POV_THREAD_CF} +\alias{get_POV_THREAD_CF} +\title{get_POV_THREAD_CF} +\usage{ +get_POV_THREAD_CF(mapname) +} +\arguments{ +\item{mapname}{name of POV map} +} +\value{ +thread CFs for that POV +} +\description{ +Gets the CFs that define threads in this POV +} diff --git a/man/get_POV_name_list.Rd b/man/get_POV_name_list.Rd new file mode 100644 index 0000000..63591a1 --- /dev/null +++ b/man/get_POV_name_list.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Event_Mappings.R +\name{get_POV_name_list} +\alias{get_POV_name_list} +\title{get_POV_name_list} +\usage{ +get_POV_name_list() +} +\value{ +List of POV names +} +\description{ +Get list of POV names for all of the dropdown boxes on the UI +} diff --git a/man/get_moving_window.Rd b/man/get_moving_window.Rd index fe45396..b86683b 100644 --- a/man/get_moving_window.Rd +++ b/man/get_moving_window.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ThreadNet_Misc.R \name{get_moving_window} \alias{get_moving_window} -\title{get_moving_window returns a set of threads for a moving window} +\title{get_moving_window} \usage{ get_moving_window(e, s, l) } @@ -19,11 +19,3 @@ data from with just the threads in the window \description{ get_moving_window returns a set of threads for a moving window } -\seealso{ -Other ThreadNet_Misc: \code{\link{cfnames}}, - \code{\link{combineContextFactors}}, - \code{\link{convert_TN_to_TramineR}}, - \code{\link{get_CF_levels}}, - \code{\link{get_threadList}}, \code{\link{numThreads}}, - \code{\link{threadSizeTable}} -} diff --git a/man/get_threadList.Rd b/man/get_threadList.Rd index 7c49f30..a3c7bc7 100644 --- a/man/get_threadList.Rd +++ b/man/get_threadList.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ThreadNet_Misc.R \name{get_threadList} \alias{get_threadList} -\title{get_threadList returns a list of all thread numbers} +\title{get_threadList} \usage{ get_threadList(e, TN, SN) } @@ -19,11 +19,3 @@ list of thread numbers \description{ get_threadList returns a list of all thread numbers } -\seealso{ -Other ThreadNet_Misc: \code{\link{cfnames}}, - \code{\link{combineContextFactors}}, - \code{\link{convert_TN_to_TramineR}}, - \code{\link{get_CF_levels}}, - \code{\link{get_moving_window}}, - \code{\link{numThreads}}, \code{\link{threadSizeTable}} -} diff --git a/man/make_example_DF.Rd b/man/make_example_DF.Rd new file mode 100644 index 0000000..abbc7e9 --- /dev/null +++ b/man/make_example_DF.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{make_example_DF} +\alias{make_example_DF} +\title{make_example_DF} +\usage{ +make_example_DF() +} +\value{ +DF with some data +} +\description{ +Make an example data frame for display... +} diff --git a/man/make_nice_event_DT.Rd b/man/make_nice_event_DT.Rd new file mode 100644 index 0000000..b459551 --- /dev/null +++ b/man/make_nice_event_DT.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{make_nice_event_DT} +\alias{make_nice_event_DT} +\title{make_nice_event_DT} +\usage{ +make_nice_event_DT(e) +} +\arguments{ +\item{e}{data frame with POV} +} +\description{ +Issue is that DT::renderdatatable cannot display lists correctly, so cut then out. + +Removes columns that do not need to be displayed +} diff --git a/man/make_subsets.Rd b/man/make_subsets.Rd new file mode 100644 index 0000000..11a478c --- /dev/null +++ b/man/make_subsets.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{make_subsets} +\alias{make_subsets} +\title{make_subsets} +\usage{ +make_subsets(d, n) +} +\arguments{ +\item{d}{data frame with occurrences or events} + +\item{n}{number of buckets} +} +\value{ +list of smaller data frames +} +\description{ +this function is used to split up the threads into n ~equal buckets +} diff --git a/man/movingWindowCorrelation.Rd b/man/movingWindowCorrelation.Rd new file mode 100644 index 0000000..7eef9cd --- /dev/null +++ b/man/movingWindowCorrelation.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Graphics.R +\name{movingWindowCorrelation} +\alias{movingWindowCorrelation} +\title{movingWindowCorrelation} +\usage{ +movingWindowCorrelation(trace) +} +\arguments{ +\item{trace}{list of (x,y) coordinates} +} +\value{ +plotly scatter plot +} +\description{ +Creates plotly diagram showing correlation of moving windows across time +In this version, the moving window can overlap with itself as it slides +} diff --git a/man/newColName.Rd b/man/newColName.Rd new file mode 100644 index 0000000..3f765ac --- /dev/null +++ b/man/newColName.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{newColName} +\alias{newColName} +\title{newColName} +\usage{ +newColName(CF_list) +} +\arguments{ +\item{CF_list}{list of context factors to define events} +} +\value{ +column name +} +\description{ +returns new combined column name from a list of CFs +} diff --git a/man/ng_bar_chart.Rd b/man/ng_bar_chart.Rd index 851a3af..bf9772c 100644 --- a/man/ng_bar_chart.Rd +++ b/man/ng_bar_chart.Rd @@ -23,13 +23,3 @@ plotly object \description{ Shows the n-grams within a set of threads (but not splitting across threads). This provides a visual indication of how repetitive the threads are. } -\seealso{ -Other ThreadNet_Graphics: \code{\link{CF_multi_pie}}, - \code{\link{Comparison_Plots}}, - \code{\link{circleVisNetwork}}, - \code{\link{eventNetwork}}, - \code{\link{filter_network_edges}}, - \code{\link{forceNetworkD3}}, - \code{\link{normalNetwork}}, \code{\link{role_map}}, - \code{\link{threadMap}}, \code{\link{threadTrajectory}} -} diff --git a/man/normalNetwork.Rd b/man/normalNetwork.Rd new file mode 100644 index 0000000..1d54e02 --- /dev/null +++ b/man/normalNetwork.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Graphics.R +\name{normalNetwork} +\alias{normalNetwork} +\title{normalNetwork} +\usage{ +normalNetwork(e, o, cf) +} +\arguments{ +\item{e}{event data frame} + +\item{o}{occurrence data frame} + +\item{cf}{context factor for the graph} +} +\value{ +visnetwork object +} +\description{ +Produced a network of co-occurrences for any given CF and displays it in a visNetwork circle layout +} diff --git a/man/numThreads.Rd b/man/numThreads.Rd index 03d6126..9b2398c 100644 --- a/man/numThreads.Rd +++ b/man/numThreads.Rd @@ -17,12 +17,3 @@ number of threads \description{ Threads must have unique thred numbers for this function to work } -\seealso{ -Other ThreadNet_Misc: \code{\link{cfnames}}, - \code{\link{combineContextFactors}}, - \code{\link{convert_TN_to_TramineR}}, - \code{\link{get_CF_levels}}, - \code{\link{get_moving_window}}, - \code{\link{get_threadList}}, - \code{\link{threadSizeTable}} -} diff --git a/man/plot_entropy.Rd b/man/plot_entropy.Rd new file mode 100644 index 0000000..e75d3fa --- /dev/null +++ b/man/plot_entropy.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Metrics.R +\name{plot_entropy} +\alias{plot_entropy} +\title{plot entropy as a function of zoom_level} +\usage{ +plot_entropy(e) +} +\arguments{ +\item{e}{data from of events with zoom levels} +} +\value{ +regular R plot +} +\description{ +Gets the zoom levels, grep out the 'Z_' column names... +} diff --git a/man/role_map.Rd b/man/role_map.Rd new file mode 100644 index 0000000..b9e28b4 --- /dev/null +++ b/man/role_map.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Graphics.R +\name{role_map} +\alias{role_map} +\title{role_map} +\usage{ +role_map(e, o, cfs) +} +\arguments{ +\item{e}{event data frame} + +\item{o}{occurrence data frame} + +\item{cfs}{context factors for the graph} +} +\value{ +plotly heat map +} +\description{ +A role map (like a heat map) that will show "who does what" for any set of events +} diff --git a/man/routineness_metric.Rd b/man/routineness_metric.Rd index 332176e..11d0af5 100644 --- a/man/routineness_metric.Rd +++ b/man/routineness_metric.Rd @@ -24,9 +24,3 @@ number, index of routineness. Computes the fraction of observed behavior that conforms to an observed pattern. Current version uses ngrams, but it would be good to use spmf pattern mining to avoid including duplicate patterns (e.g., a-b-c and b-c-d) } -\seealso{ -Other ThreadNet_Metrics: \code{\link{compression_index}}, - \code{\link{compute_entropy}}, - \code{\link{estimate_network_complexity}}, - \code{\link{estimate_task_complexity_index}} -} diff --git a/man/store_POV.Rd b/man/store_POV.Rd new file mode 100644 index 0000000..049b56f --- /dev/null +++ b/man/store_POV.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Event_Mappings.R +\name{store_POV} +\alias{store_POV} +\title{store_POV} +\usage{ +store_POV(EventMapName, e, thread_CF, event_CF) +} +\arguments{ +\item{EventMapName}{name of map attempting to be created} + +\item{e}{data frame with POV to be stored} + +\item{thread_CF}{List of CFs to be stored} + +\item{event_CF}{List of CFs to be stored} +} +\value{ +None, updates global variables +} +\description{ +Stores the POV and context factors +} diff --git a/man/support_level.Rd b/man/support_level.Rd new file mode 100644 index 0000000..e59ef95 --- /dev/null +++ b/man/support_level.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Core.R +\name{support_level} +\alias{support_level} +\title{support_level} +\usage{ +support_level(tv, ng) +} +\arguments{ +\item{tv}{text vector of threads} + +\item{ng}{ngram to be located in the threads} +} +\value{ +percentage of threads containing the ngram +} +\description{ +Counts what fraction of the threads a particular ngram appears in +} diff --git a/man/threadMap.Rd b/man/threadMap.Rd index 046b370..0d82a31 100644 --- a/man/threadMap.Rd +++ b/man/threadMap.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ThreadNet_Graphics.R \name{threadMap} \alias{threadMap} -\title{Shows threads in a horizontal layout} +\title{threadMap shows threads in a horizontal layout} \usage{ threadMap(or, TN, timescale, CF, shape) } @@ -23,13 +23,3 @@ plotly object \description{ Creates a plotly chart of threads in either clock time or event time, depending on the timescale parameter. } -\seealso{ -Other ThreadNet_Graphics: \code{\link{CF_multi_pie}}, - \code{\link{Comparison_Plots}}, - \code{\link{circleVisNetwork}}, - \code{\link{eventNetwork}}, - \code{\link{filter_network_edges}}, - \code{\link{forceNetworkD3}}, \code{\link{ng_bar_chart}}, - \code{\link{normalNetwork}}, \code{\link{role_map}}, - \code{\link{threadTrajectory}} -} diff --git a/man/threadSizeTable.Rd b/man/threadSizeTable.Rd index 7e231cf..556d102 100644 --- a/man/threadSizeTable.Rd +++ b/man/threadSizeTable.Rd @@ -18,11 +18,3 @@ data frame with table of thread lengths This function should work on either ocurrences or events. it returns length and duration of each thread.It requires tStamp field to compute duration. } -\seealso{ -Other ThreadNet_Misc: \code{\link{cfnames}}, - \code{\link{combineContextFactors}}, - \code{\link{convert_TN_to_TramineR}}, - \code{\link{get_CF_levels}}, - \code{\link{get_moving_window}}, - \code{\link{get_threadList}}, \code{\link{numThreads}} -} diff --git a/man/threadTrajectory.Rd b/man/threadTrajectory.Rd new file mode 100644 index 0000000..7db6480 --- /dev/null +++ b/man/threadTrajectory.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Graphics.R +\name{threadTrajectory} +\alias{threadTrajectory} +\title{threadTrajectory} +\usage{ +threadTrajectory(or) +} +\arguments{ +\item{or}{event data frame} +} +\value{ +plotly scatter plot +} +\description{ +Create a plotly diagram showing relative time versus sequential time. +Inspired by Gergen and Danner-Schroeder +} diff --git a/man/thread_text_vector.Rd b/man/thread_text_vector.Rd new file mode 100644 index 0000000..d1a47d1 --- /dev/null +++ b/man/thread_text_vector.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Core.R +\name{thread_text_vector} +\alias{thread_text_vector} +\title{thread_text_vector} +\usage{ +thread_text_vector(o, TN, CF, delimiter) +} +\arguments{ +\item{o}{a dataframe of events or occurrences} + +\item{TN}{= threadNum} + +\item{CF}{= CF or columm to include} + +\item{delimiter}{usually comma or blank} +} +\value{ +vector of threads as delimited character strings +} +\description{ +Create a vector of threads +} diff --git a/man/threads_to_network_original.Rd b/man/threads_to_network_original.Rd index 90821be..f022283 100644 --- a/man/threads_to_network_original.Rd +++ b/man/threads_to_network_original.Rd @@ -22,9 +22,3 @@ a list containing two dataframes, one for the nodes (nodeDF) and one for the edg Converts a sequentially ordered streams of ;events (threads) and creates a unimodal, unidimensional network. Sequentially adjacent pairs of events become edges in the resulting network. } -\seealso{ -Other ThreadNet_Core: \code{\link{OccToEvents3}}, - \code{\link{OccToEvents_By_Chunk}}, - \code{\link{ThreadOccByPOV}}, - \code{\link{clusterEvents}}, \code{\link{count_ngrams}} -} diff --git a/man/threshold_slider_max.Rd b/man/threshold_slider_max.Rd new file mode 100644 index 0000000..43f9d5e --- /dev/null +++ b/man/threshold_slider_max.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{threshold_slider_max} +\alias{threshold_slider_max} +\title{threshold_slider_max} +\usage{ +threshold_slider_max(o) +} +\arguments{ +\item{o}{dataframe of occurrences} +} +\value{ +slider max +} +\description{ +These were used on the occ-to-event tab to configure the slider +} diff --git a/man/threshold_slider_min.Rd b/man/threshold_slider_min.Rd new file mode 100644 index 0000000..a96f139 --- /dev/null +++ b/man/threshold_slider_min.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{threshold_slider_min} +\alias{threshold_slider_min} +\title{threshold_slider_min} +\usage{ +threshold_slider_min(o) +} +\arguments{ +\item{o}{dataframe of occurrences} +} +\value{ +slider min +} +\description{ +These were used on the occ-to-event tab to configure the slider +} diff --git a/man/threshold_slider_selected.Rd b/man/threshold_slider_selected.Rd new file mode 100644 index 0000000..3ade129 --- /dev/null +++ b/man/threshold_slider_selected.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{threshold_slider_selected} +\alias{threshold_slider_selected} +\title{threshold_slider_selected} +\usage{ +threshold_slider_selected(o) +} +\arguments{ +\item{o}{dataframe of occurrences} +} +\value{ +selected value = min +} +\description{ +These were used on the occ-to-event tab to configure the slider +} diff --git a/man/window_correlation.Rd b/man/window_correlation.Rd new file mode 100644 index 0000000..309e422 --- /dev/null +++ b/man/window_correlation.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{window_correlation} +\alias{window_correlation} +\title{window_correlation} +\usage{ +window_correlation(e, w, s = 1, n = 2) +} +\arguments{ +\item{e}{data fraom for POV} + +\item{w}{width of moving window} + +\item{s}{step - how far to move window in each increment (default is 1)} + +\item{n}{number of windows (default is 2)} +} +\description{ +Correlation between moving windows +} diff --git a/man/zoomColumn.Rd b/man/zoomColumn.Rd new file mode 100644 index 0000000..f6472ef --- /dev/null +++ b/man/zoomColumn.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{zoomColumn} +\alias{zoomColumn} +\title{zoomColumn} +\usage{ +zoomColumn(z) +} +\arguments{ +\item{z}{integer for the zoom level} +} +\value{ +column name for that zoom level ('ZM_n) +} +\description{ +This function takes a slider value and returns a valid column name for zooming +if the argument is null, then use ZM_1 +} diff --git a/man/zoom_upper_limit.Rd b/man/zoom_upper_limit.Rd new file mode 100644 index 0000000..1b822b9 --- /dev/null +++ b/man/zoom_upper_limit.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ThreadNet_Misc.R +\name{zoom_upper_limit} +\alias{zoom_upper_limit} +\title{zoom_upper_limit} +\usage{ +zoom_upper_limit(e) +} +\arguments{ +\item{e}{data frame} +} +\value{ +biggest zoom level +} +\description{ +find the biggest column with ZM_, and then get the number that goes with that. + +Used to set upper limit on sliders for zooming +}