mirror of
https://github.com/indentlabs/notebook.git
synced 2025-10-26 11:19:22 +00:00
71 lines
1.8 KiB
Ruby
71 lines
1.8 KiB
Ruby
namespace :machine_learning do
|
|
task roleplaying: :environment do
|
|
output_file = 'roleplay-training.txt'
|
|
roleplay_boards = [
|
|
{
|
|
slug: 'roleplaying',
|
|
genre: 'none'
|
|
},
|
|
{
|
|
slug: 'fandom-world',
|
|
genre: 'fandom'
|
|
},
|
|
{
|
|
slug: 'limited',
|
|
genre: 'none'
|
|
},
|
|
{
|
|
slug: 'character-chats',
|
|
genre: 'character-chat'
|
|
},
|
|
{
|
|
slug: 'fantasy-tavern',
|
|
genre: 'fantasy'
|
|
},
|
|
{
|
|
slug: 'futuristic',
|
|
genre: 'sci-fi'
|
|
},
|
|
{
|
|
slug: 'beginner-s-lounge',
|
|
genre: 'none'
|
|
},
|
|
{
|
|
slug: 'realistic-modern',
|
|
genre: 'modern'
|
|
}
|
|
]
|
|
|
|
old_logger = ActiveRecord::Base.logger
|
|
ActiveRecord::Base.logger = nil
|
|
|
|
file = File.open(output_file, "w")
|
|
roleplay_boards.each do |board_data|
|
|
board = Thredded::Messageboard.find_by(slug: board_data[:slug])
|
|
raise "Board with slug #{board_data[:slug]} not found!" unless board.present?
|
|
|
|
topics = board.topics.where(moderation_state: 'approved')
|
|
puts "Starting on board #{board.name} (#{topics.count} topics)"
|
|
topics.find_each do |topic|
|
|
posts = topic.posts.where(moderation_state: 'approved')
|
|
puts topic.title + ' - ' + posts.count.to_s + ' posts'
|
|
|
|
file.write("[NEW TOPIC]\n\n")
|
|
|
|
posts.find_each.with_index do |post, index|
|
|
file.write("[user_id: #{post.user_id}][genre: #{board_data[:genre]}][thread_id: #{topic.id}][post_index: #{index}][thread_title: #{topic.title}]\n")
|
|
file.write(post.content)
|
|
file.write("\n\n" + ('-' * 67) + "\n\n")
|
|
end
|
|
|
|
file.write("[END TOPIC]\n\n")
|
|
end
|
|
|
|
end
|
|
file.close
|
|
puts "Training data written to #{output_file}"
|
|
|
|
# Re-enable console logger
|
|
ActiveRecord::Base.logger = old_logger
|
|
end
|
|
end |