mirror of
https://github.com/indentlabs/notebook.git
synced 2025-10-26 11:19:22 +00:00
use 3rd party lib for word counts
This commit is contained in:
parent
0b2a5a6e6c
commit
eae59ef451
1
Gemfile
1
Gemfile
@ -136,6 +136,7 @@ group :worker do
|
||||
# Document understanding
|
||||
gem 'htmlentities'
|
||||
gem 'birch', git: 'https://github.com/billthompson/birch.git', branch: 'birch-ruby22'
|
||||
gem 'word_count_analyzer'
|
||||
|
||||
gem 'engtagger'
|
||||
gem 'ibm_watson'
|
||||
|
||||
@ -1545,6 +1545,8 @@ GEM
|
||||
websocket-driver (0.7.5)
|
||||
websocket-extensions (>= 0.1.0)
|
||||
websocket-extensions (0.1.5)
|
||||
word_count_analyzer (1.0.1)
|
||||
engtagger
|
||||
zeitwerk (2.4.2)
|
||||
|
||||
PLATFORMS
|
||||
@ -1623,6 +1625,7 @@ DEPENDENCIES
|
||||
uglifier (>= 1.3.0)
|
||||
web-console
|
||||
webpacker
|
||||
word_count_analyzer
|
||||
|
||||
RUBY VERSION
|
||||
ruby 2.7.2p137
|
||||
|
||||
@ -81,7 +81,24 @@ class Document < ApplicationRecord
|
||||
end
|
||||
|
||||
def computed_word_count
|
||||
(self.body || "").scan(/[\w-]+/).size
|
||||
return 0 unless self.body && self.body.present?
|
||||
|
||||
WordCountAnalyzer::Counter.new(
|
||||
ellipsis: 'no_special_treatment',
|
||||
hyperlink: 'no_special_treatment',
|
||||
contraction: 'count_as_multiple',
|
||||
hyphenated_word: 'count_as_multiple',
|
||||
date: 'count_as_one',
|
||||
number: 'ignore',
|
||||
numbered_list: 'ignore',
|
||||
xhtml: 'keep',
|
||||
forward_slash: 'count_as_multiple',
|
||||
backslash: 'count_as_multiple',
|
||||
dotted_line: 'count',
|
||||
dashed_line: 'count',
|
||||
underscore: 'count',
|
||||
stray_punctuation: 'count'
|
||||
).count(self.body)
|
||||
end
|
||||
|
||||
def reading_estimate
|
||||
|
||||
Loading…
Reference in New Issue
Block a user