diff --git a/ArchiveServer.go b/ArchiveServer.go index 93216eb..7454bcc 100644 --- a/ArchiveServer.go +++ b/ArchiveServer.go @@ -18,7 +18,7 @@ type ArchiveServer struct { cfg *Config startup time.Time - rxViewRoot, rxViewPage, rxSearch, rxSearchRx *regexp.Regexp + rxViewRoot, rxViewPage, rxSearch, rxSearchRx, rxStats *regexp.Regexp } func NewArchiveServer(cfg *Config) (*ArchiveServer, error) { @@ -49,6 +49,7 @@ func NewArchiveServer(cfg *Config) (*ArchiveServer, error) { rxViewPage: regexp.MustCompile(`^/([^/]+)/(\d+)/(\d+)/(?:page-)?(\d+)$`), rxSearch: regexp.MustCompile(`^/([^/]+)/search/(.*)$`), rxSearchRx: regexp.MustCompile(`^/([^/]+)/rx/(.*)$`), + rxStats: regexp.MustCompile((`^/([^/]+)/stats/?$`)), }, nil } diff --git a/ArchiveState.go b/ArchiveState.go index 248aa67..ceb3aab 100644 --- a/ArchiveState.go +++ b/ArchiveState.go @@ -2,6 +2,7 @@ package archive import ( "bufio" + "errors" "fmt" "html" "io/ioutil" @@ -10,7 +11,9 @@ import ( "net/url" "os" "regexp" + "sort" "strings" + "time" ) const ( @@ -23,6 +26,7 @@ type ArchiveState struct { logBestSlug string query string queryIsRegex bool + isStats bool ym YearMonth page int highestPage int @@ -35,13 +39,21 @@ func NewArchiveState(svr *ArchiveServer) *ArchiveState { } } +func (this *ArchiveState) showPageURLs() bool { + return this.log != nil && len(this.query) == 0 && !this.isStats +} + func (this *ArchiveState) URL() string { - if len(this.query) > 0 { + if this.isStats { + return fmt.Sprintf(`/%s/stats`, this.logBestSlug) + + } else if len(this.query) > 0 { if this.queryIsRegex { return fmt.Sprintf(`/%s/rx/%s`, this.logBestSlug, url.QueryEscape(this.query)) } else { return fmt.Sprintf(`/%s/search/%s`, this.logBestSlug, url.QueryEscape(this.query)) } + } else { if this.page == pageNotSet { return fmt.Sprintf(`/%s/%s/%s`, this.logBestSlug, this.ym.Year, this.ym.Month) @@ -101,6 +113,130 @@ func (this *ArchiveState) renderView(w http.ResponseWriter) { this.renderTemplate(w, []byte(output)) } +// parseStatsFor loads user post statistics for a single yearMonth in a single log source. +func (this *ArchiveState) parseStatsFor(ls *LogSource, ym YearMonth, into map[string]int) error { + + fname, err := this.svr.LogFile(ls, ym) + if err != nil { + return err + } + + fc, err := ioutil.ReadFile(fname) + if err != nil { + return err + } + + rxUser := regexp.MustCompile(`(?ms)^[^<\r\n]*<([^>\r\n]+)>.+?$`) + matches := rxUser.FindAllSubmatch(fc, -1) + if matches == nil || len(matches) == 0 { + return errors.New("No matches") + } + + for _, match := range matches { + username := string(match[1]) + + if ct, ok := into[username]; ok { + into[username] = ct + 1 + } else { + into[username] = 1 + } + } + + return nil +} + +func (this *ArchiveState) renderStats(w http.ResponseWriter) { + // Lines per year + // Users / posts/year + + startTime := time.Now() + + yearsToUsersToPostCount := make(map[int]map[string]int, 0) + + totalErrors := 0 + var lastError error = nil + + ym := this.log.EarliestDate() + orderedYears := make([]int, 0) + for { + usersToPostCount, ok := yearsToUsersToPostCount[ym.Year] + if !ok { + usersToPostCount = make(map[string]int) + orderedYears = append(orderedYears, ym.Year) + } + + err := this.parseStatsFor(this.log, ym, usersToPostCount) + if err != nil { + //log.Printf("Stats(%s): %s", this.logBestSlug, err.Error()) + totalErrors += 1 + lastError = err + } + + //fmt.Printf("%#v\n", usersToPostCount) + + yearsToUsersToPostCount[ym.Year] = usersToPostCount + + if ym.Equals(this.log.LatestDate()) { + break + } + + ym = ym.Next() + } + + ret := make([]byte, 0) + + if lastError != nil { + ret = append(ret, []byte(fmt.Sprintf("Got %d errors, including: '%s'\n\n", totalErrors, lastError.Error()))...) + } + + // + + allUsersExistence := make(map[string]struct{}) + for _, usersMap := range yearsToUsersToPostCount { + for username, _ := range usersMap { + allUsersExistence[username] = struct{}{} + } + } + allUsernames := make([]string, 0, len(allUsersExistence)) + for username, _ := range allUsersExistence { + allUsernames = append(allUsernames, username) + } + sort.Strings(allUsernames) + + // + + ret = append(ret, []byte(``)...) + for _, year := range orderedYears { + ret = append(ret, []byte(fmt.Sprintf(``, year))...) + } + ret = append(ret, []byte("\n")...) + for _, username := range allUsernames { + + ret = append(ret, []byte(fmt.Sprintf(``, html.EscapeString(username)))...) + for _, year := range orderedYears { + usersMap := yearsToUsersToPostCount[year] + posts, _ /*ok*/ := usersMap[username] + ret = append(ret, []byte(fmt.Sprintf(``, posts))...) + } + ret = append(ret, []byte("\n")...) + } + ret = append(ret, []byte(``)...) + + for _, year := range orderedYears { + postsTotalForYear := 0 + for _, userPostCount := range yearsToUsersToPostCount[year] { + postsTotalForYear += userPostCount + } + ret = append(ret, []byte(fmt.Sprintf(``, postsTotalForYear))...) + } + + duration := time.Now().Sub(startTime) + + ret = append(ret, []byte(fmt.Sprintf("
 %d
%s%d
TOTAL:%d
\n\n%d total users\nStatistics generated in %s", len(allUsernames), duration.String()))...) + + this.renderTemplate(w, ret) +} + // renderSearch renders the search results. // - Mandatory: log, query, queryIsRegex func (this *ArchiveState) renderSearch(w http.ResponseWriter) { @@ -191,12 +327,11 @@ func (this *ArchiveState) renderTemplateHead(w http.ResponseWriter) { title = this.log.Description + ` Archives` } - showPageURLs := (this.log != nil && len(this.query) == 0) - latestUrl := `/` if this.log != nil { latestUrl = fmt.Sprintf(`/%s/%d/%d`, url.PathEscape(this.logBestSlug), this.log.LatestDate().Year, this.log.LatestDate().Month) } + statsUrl := fmt.Sprintf(`/%s/stats`, url.PathEscape(this.logBestSlug)) w.Write([]byte(` @@ -214,6 +349,7 @@ func (this *ArchiveState) renderTemplateHead(w http.ResponseWriter) {
Latest + Statistics Font increase Font decrease Download backup @@ -251,7 +387,7 @@ func (this *ArchiveState) renderTemplateHead(w http.ResponseWriter) { `)) - if showPageURLs { + if this.showPageURLs() { w.Write([]byte(`