PhraseQuery matches phrases like "the quick brown fox". Most people are familiar with phrase queries having used them in most internet search engines.
Ferret's phrase queries a slightly more advanced. You can match phrases with a slop, ie the match isn't exact but it is good enough. The slop is basically the word edit distance of the phrase. For example, "the quick brown fox" with a slop of 1 would match "the quick little brown fox". With a slop of 2 it would match "the brown quick fox".
query = PhraseQuery.new(:content)
query << "the" << "quick" << "brown" << "fox"
# matches => "the quick brown fox"
query.slop = 1
# matches => "the quick little brown fox"
|__1__^
query.slop = 2
# matches => "the brown quick _____ fox"
^_____2_____|
Phrase queries can also have multiple terms in a single position. Let's say for example that we want to match synonyms for quick like "fast" and "speedy". You could the query like this;
query = PhraseQuery.new(:content) query << "the" << ["quick", "fast", "speed"] << ["brown", "red"] << "fox" # matches => "the quick red fox" # matches => "the fast brown fox" query.slop = 1 # matches => "the speedy little red fox"
You can also leave positions blank. Lets say you wanted to match "the quick <> fox" where "<>" could match anything (but not nothing). You'd build this query like this;
query = PhraseQuery.new(:content)
query.add_term("the").add_term("quick").add_term("fox", 2)
# matches => "the quick yellow fox"
# matches => "the quick alkgdhaskghaskjdh fox"
The second parameter to PhraseQuery#add_term is the position increment for the term. It is one by default meaning that every time you add a term it is expected to follow the previous term. But setting it to 2 or greater you are leaving empty spaces in the term.
There are also so tricks you can do by setting the position increment to
With a little help from your analyzer you can actually tag bold or
italic text for example. If you want more information about this, ask on the mailing list.
Create a new PhraseQuery on the field field. You need to add terms to the query it will do anything of value. See PhraseQuery#add_term.
static VALUE
frb_phq_init(int argc, VALUE *argv, VALUE self)
{
VALUE rfield, rslop;
Query *q;
rb_scan_args(argc, argv, "11", &rfield, &rslop);
q = phq_new(frb_field(rfield));
if (argc == 2) {
((PhraseQuery *)q)->slop = FIX2INT(rslop);
}
Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
object_add(q, self);
return self;
}
Add a term to the phrase query. By default the position_increment is set to 1 so each term you add is expected to come directly after the previous term. By setting position_increment to 2 you are specifying that the term you just added should occur two terms after the previous term. For example;
phrase_query.add_term("big").add_term("house", 2)
# matches => "big brick house"
# matches => "big red house"
# doesn't match => "big house"
static VALUE
frb_phq_add(int argc, VALUE *argv, VALUE self)
{
VALUE rterm, rpos_inc;
int pos_inc = 1;
GET_Q();
if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) {
pos_inc = FIX2INT(rpos_inc);
}
switch (TYPE(rterm)) {
case T_STRING:
{
phq_add_term(q, StringValuePtr(rterm), pos_inc);
break;
}
case T_ARRAY:
{
int i;
char *t;
if (RARRAY_LEN(rterm) < 1) {
rb_raise(rb_eArgError, "Cannot add empty array to a "
"PhraseQuery. You must add either a string or "
"an array of strings");
}
t = StringValuePtr(RARRAY_PTR(rterm)[0]);
phq_add_term(q, t, pos_inc);
for (i = 1; i < RARRAY_LEN(rterm); i++) {
t = StringValuePtr(RARRAY_PTR(rterm)[i]);
phq_append_multi_term(q, t);
}
break;
}
default:
rb_raise(rb_eArgError, "You can only add a string or an array of "
"strings to a PhraseQuery, not a %s\n",
rs2s(rb_obj_as_string(rterm)));
}
return self;
}
Add a term to the phrase query. By default the position_increment is set to 1 so each term you add is expected to come directly after the previous term. By setting position_increment to 2 you are specifying that the term you just added should occur two terms after the previous term. For example;
phrase_query.add_term("big").add_term("house", 2)
# matches => "big brick house"
# matches => "big red house"
# doesn't match => "big house"
static VALUE
frb_phq_add(int argc, VALUE *argv, VALUE self)
{
VALUE rterm, rpos_inc;
int pos_inc = 1;
GET_Q();
if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) {
pos_inc = FIX2INT(rpos_inc);
}
switch (TYPE(rterm)) {
case T_STRING:
{
phq_add_term(q, StringValuePtr(rterm), pos_inc);
break;
}
case T_ARRAY:
{
int i;
char *t;
if (RARRAY_LEN(rterm) < 1) {
rb_raise(rb_eArgError, "Cannot add empty array to a "
"PhraseQuery. You must add either a string or "
"an array of strings");
}
t = StringValuePtr(RARRAY_PTR(rterm)[0]);
phq_add_term(q, t, pos_inc);
for (i = 1; i < RARRAY_LEN(rterm); i++) {
t = StringValuePtr(RARRAY_PTR(rterm)[i]);
phq_append_multi_term(q, t);
}
break;
}
default:
rb_raise(rb_eArgError, "You can only add a string or an array of "
"strings to a PhraseQuery, not a %s\n",
rs2s(rb_obj_as_string(rterm)));
}
return self;
}
Return the slop set for this phrase query. See the PhraseQuery description for more information on slop
static VALUE
frb_phq_get_slop(VALUE self)
{
GET_Q();
return INT2FIX(((PhraseQuery *)q)->slop);
}
Set the slop set for this phrase query. See the PhraseQuery description for more information on slop
static VALUE
frb_phq_set_slop(VALUE self, VALUE rslop)
{
GET_Q();
((PhraseQuery *)q)->slop = FIX2INT(rslop);
return self;
}
Generated with the Darkfish Rdoc Generator 2.