From 2056c318668c6dbc5395f382524a95760ed321ed Mon Sep 17 00:00:00 2001 From: Rob Dawson Date: Wed, 16 May 2012 12:03:02 +1000 Subject: [PATCH] Made detect language support underlying HTML mode. --- ext/cld/thunk.cc | 3 +-- lib/cld.rb | 6 +++--- spec/cld_spec.rb | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/ext/cld/thunk.cc b/ext/cld/thunk.cc index f69d6bc..d329d27 100644 --- a/ext/cld/thunk.cc +++ b/ext/cld/thunk.cc @@ -12,8 +12,7 @@ typedef struct { } RESULT; extern "C" { - RESULT detectLanguageThunkInt(const char * src) { - bool is_plain_text = true; + RESULT detectLanguageThunkInt(const char * src, bool is_plain_text) { bool do_allow_extended_languages = true; bool do_pick_summary_language = false; bool do_remove_weak_matches = false; diff --git a/lib/cld.rb b/lib/cld.rb index de70144..2c75777 100644 --- a/lib/cld.rb +++ b/lib/cld.rb @@ -4,8 +4,8 @@ module CLD extend FFI::Library - def self.detect_language(text) - result = detect_language_ext(text.to_s) + def self.detect_language(text, is_plain_text=true) + result = detect_language_ext(text.to_s, is_plain_text) Hash[ result.members.map {|member| [member.to_sym, result[member]]} ] end @@ -17,5 +17,5 @@ class ReturnValue < FFI::Struct GEM_ROOT = File.expand_path("../../", __FILE__) ffi_lib "#{GEM_ROOT}/ext/cld/cld.so" - attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value + attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in, :bool], ReturnValue.by_value end diff --git a/spec/cld_spec.rb b/spec/cld_spec.rb index 4e89011..1e42f51 100644 --- a/spec/cld_spec.rb +++ b/spec/cld_spec.rb @@ -19,6 +19,21 @@ it { subject[:reliable].should be_true } end + context "French in HTML - using CLD html " do + subject { CLD.detect_language("

plus ça change, plus c'est la même chose

", false) } + + it { subject[:name].should eq("FRENCH") } + it { subject[:code].should eq("fr") } + + end + context "French in HTML - using CLD text " do + subject { CLD.detect_language("

plus ça change, plus c'est la même chose

", true) } + + it { subject[:name].should eq("ENGLISH") } + it { subject[:code].should eq("en") } + + end + context "Simplified Chinese text" do subject { CLD.detect_language("你好吗箭体") }