commit 06c33cafaaa2dd1e64682e4bd7ced7f4931fff24 Author: Jason Staten Date: Wed May 26 23:07:36 2021 -0600 Initial brief service diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..a0a01b8 --- /dev/null +++ b/Gemfile @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } + +# gem "rails" + +gem "mechanize", "~> 2.8" + +gem "sinatra", "~> 2.1" diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..03d53a7 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,61 @@ +GEM + remote: https://rubygems.org/ + specs: + addressable (2.7.0) + public_suffix (>= 2.0.2, < 5.0) + connection_pool (2.2.5) + domain_name (0.5.20190701) + unf (>= 0.0.5, < 1.0.0) + http-cookie (1.0.3) + domain_name (~> 0.5) + mechanize (2.8.1) + addressable (~> 2.7) + domain_name (~> 0.5, >= 0.5.20190701) + http-cookie (~> 1.0, >= 1.0.3) + mime-types (~> 3.0) + net-http-digest_auth (~> 1.4, >= 1.4.1) + net-http-persistent (>= 2.5.2, < 5.0.dev) + nokogiri (~> 1.11, >= 1.11.2) + rubyntlm (~> 0.6, >= 0.6.3) + webrick (~> 1.7) + webrobots (~> 0.1.2) + mime-types (3.3.1) + mime-types-data (~> 3.2015) + mime-types-data (3.2021.0225) + mini_portile2 (2.5.1) + mustermann (1.1.1) + ruby2_keywords (~> 0.0.1) + net-http-digest_auth (1.4.1) + net-http-persistent (4.0.1) + connection_pool (~> 2.2) + nokogiri (1.11.6) + mini_portile2 (~> 2.5.0) + racc (~> 1.4) + public_suffix (4.0.6) + racc (1.5.2) + rack (2.2.3) + rack-protection (2.1.0) + rack + ruby2_keywords (0.0.4) + rubyntlm (0.6.3) + sinatra (2.1.0) + mustermann (~> 1.0) + rack (~> 2.2) + rack-protection (= 2.1.0) + tilt (~> 2.0) + tilt (2.0.10) + unf (0.1.4) + unf_ext + unf_ext (0.0.7.7) + webrick (1.7.0) + webrobots (0.1.2) + +PLATFORMS + ruby + +DEPENDENCIES + mechanize (~> 2.8) + sinatra (~> 2.1) + +BUNDLED WITH + 1.17.2 diff --git a/README.md b/README.md new file mode 100644 index 0000000..76f5188 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# brief + +Simple ruby service to obtain a title and description for a given url + + +# Setup + +``` +bundle install +``` + +# Running + +``` +ruby app.rb +``` + +And visit the `/summary` route of `localhost:4567` + +``` +http://localhost:4567/summary?url=https://www.nytimes.com/article/best-movies-netflix.html +``` diff --git a/app.rb b/app.rb new file mode 100644 index 0000000..e6c5587 --- /dev/null +++ b/app.rb @@ -0,0 +1,32 @@ +require 'sinatra' +require 'mechanize' + +# Find a description from tags +def meta_description(page) + description_meta_tag = + page.search("meta[property='og:description']")[0] || + page.search("meta[property='twitter:description']")[0] || + page.search("meta[name='description']")[0] + + description_meta_tag&.attribute("content")&.value +end + +# Truncate the text of the first
+def article_description(page) + article = page.search("article")[0] + article.text[0..500] if article +end + + +get '/summary' do + url = params[:url] + mech = Mechanize.new + page = mech.get(url) + + title = page.title + + description = meta_description(page) || article_description(page) + + content_type :json + {url: url, title: title, description: description}.to_json +end