namespace :backfill do desc "Backfill cached word counts on all attributes" task attribute_word_count_caches: :environment do Attribute.where(word_count_cache: nil).where.not(value: ["", " ", ".", nil]).find_each do |attribute| word_count = WordCountAnalyzer::Counter.new( ellipsis: 'no_special_treatment', hyperlink: 'count_as_one', contraction: 'count_as_one', hyphenated_word: 'count_as_one', date: 'no_special_treatment', number: 'count', numbered_list: 'ignore', xhtml: 'remove', forward_slash: 'count_as_multiple_except_dates', backslash: 'count_as_one', dotted_line: 'ignore', dashed_line: 'ignore', underscore: 'ignore', stray_punctuation: 'ignore' ).count(attribute.value) attribute.update_column(:word_count_cache, word_count) end end task most_used_attribute_word_counts: :environment do word_counts = {} Attribute.where(word_count_cache: nil).group(:value).order('count_id DESC').limit(500).count(:id).each do |value, count| word_count = WordCountAnalyzer::Counter.new( ellipsis: 'no_special_treatment', hyperlink: 'count_as_one', contraction: 'count_as_one', hyphenated_word: 'count_as_one', date: 'no_special_treatment', number: 'count', numbered_list: 'ignore', xhtml: 'remove', forward_slash: 'count_as_multiple_except_dates', backslash: 'count_as_one', dotted_line: 'ignore', dashed_line: 'ignore', underscore: 'ignore', stray_punctuation: 'ignore' ).count(value) word_counts[word_count] ||= [] word_counts[word_count].push value puts "#{value} x #{count}: #{word_count} words" end word_counts.each do |count, values| Attribute.where(word_count_cache: nil, value: values).update_all(word_count_cache: count) end end desc "Backfill cached word counts on all documents" task document_word_count_caches: :environment do Document.with_deleted.where(cached_word_count: nil).where.not(body: [nil, ""]).find_each(batch_size: 500) do |document| document.update_column(:cached_word_count, document.computed_word_count) puts document.id end end desc "Start working through old categories/fields without position set" task sortables_positions: :environment do categories_to_position = AttributeCategory.where(position: nil).order("RANDOM()").limit(500).to_a puts "Empty position backlog:\n\t* #{AttributeCategory.where(position: nil).count} categories\n\t* #{AttributeField.where(position: nil).count} fields" while categories_to_position.any? category = categories_to_position.pop # Backfill all the positioning for this category's page's categories category.backfill_categories_ordering! # We can skip this if we're just backfilling with a single worker, # but in case we're backfilling on multiple this fetches a recent # copy of updates before proceeding. Technically still a possibility # of Doing The Same Thing Twice, but a smaller possibility. if rand(100) < 20 puts "Empty position backlog:\n\t* #{AttributeCategory.where(position: nil).count} categories\n\t* #{AttributeField.where(position: nil).count} fields" end end puts "Done!" end end