|
/* |
|
Есть события: (время начала, время конца, тег "в метро", айди территории, флаг "вход", флаг "выход"), |
|
заполнены поля могут быть как угодно, очередность событий может быть любая |
|
*/ |
|
case class TelEventRecord(bts: Int, ets: Int, |
|
transport: String, |
|
zid: Int = -1, |
|
enter: Boolean = false, exit: Boolean = false) |
|
/* |
|
Надо обработать последовательность таких событий, чтобы получить последовательность "поездок" |
|
от "входа" до "выхода": (начало поездки, конец, территория входа, выхода, было ли по ходу "метро") |
|
*/ |
|
case class TripRawData(bts: Int, ets: Int, bzid: Int, ezid: Int, metro: Boolean) |
|
/* |
|
Ситуация осложняется тем, что любая последовательность "входов" и/или "выходов" может быть схлопнута в 0 или 1 вход/выход. |
|
Правила схлопывания такие: если в последовательности входов/выходов (промежуточные не учитываем) события отстают от соседей менее чем на 30 минут, |
|
такая последовательность схлопывается. Если число событий четное, то в 0 событий. Если нечетное, то в 1, первое событие. |
|
Смысл этого в том, что, к примеру, при въезде в страну, могут быть зарегистрированы события вход/выход/вход/выход с промежутком 29 минут 59 секунд. |
|
Такая последовательность выбрасывается, считается, что въезда в страну не было. Флуктации. |
|
*/ |
|
/* |
|
Набор тестов для проверки таких условий: |
|
*/ |
|
"rep#10 events2Trips" should "fold (enter,exit) into 1 trip" in { |
|
val events = Seq( |
|
TelEventRecord(1, 2, "", 3, true, false), |
|
TelEventRecord(1801, 1802, "", 4, false, true)) |
|
val expected = Seq(TripRawData(1, 1801, 3, 4, false)) |
|
val trips = events2trips(events) |
|
assert(trips === expected) |
|
} |
|
|
|
"rep#10 events2Trips" should "fold metro (enter,exit) into 1 trip" in { |
|
val events = Seq( |
|
TelEventRecord(1, 2, "metro", 3, true, false), |
|
TelEventRecord(1801, 1802, "", 4, false, true)) |
|
val expected = Seq(TripRawData(1, 1801, 3, 4, true)) |
|
assert(events2trips(events) === expected) |
|
|
|
val events2 = Seq( |
|
TelEventRecord(1, 2, "", 3, true, false), |
|
TelEventRecord(1801, 1802, "metro", 4, false, true)) |
|
assert(events2trips(events2) === expected) |
|
} |
|
|
|
"rep#10 events2Trips" should "fold (enter, ..., exit) into 1 trip" in { |
|
val events = Seq( |
|
TelEventRecord(1, 2, "", 3, true, false), |
|
TelEventRecord(100, 200, "", 5, false, false), |
|
TelEventRecord(300, 400, "", 5, false, false), |
|
TelEventRecord(1801, 1802, "", 4, false, true)) |
|
val expected = Seq(TripRawData(1, 1801, 3, 4, false)) |
|
val trips = events2trips(events) |
|
assert(trips === expected) |
|
|
|
val events2 = Seq( |
|
TelEventRecord(1, 2, "", 3, true, false), |
|
TelEventRecord(100, 200, "", 5, false, false), |
|
TelEventRecord(300, 400, "metro", 5, false, false), |
|
TelEventRecord(1801, 1802, "", 4, false, true)) |
|
assert(events2trips(events2) === Seq(TripRawData(1, 1801, 3, 4, true))) |
|
} |
|
|
|
"rep#10 events2Trips" should "fold (enter, enter, exit) into 1 trip" in { |
|
val events = Seq( |
|
TelEventRecord(1, 2, "", 3, true, false), |
|
TelEventRecord(1801, 1802, "", 4, true, false), |
|
TelEventRecord(18010, 18020, "", 4, false, true)) |
|
val expected = Seq(TripRawData(1, 18010, 3, 4, false)) |
|
val trips = events2trips(events) |
|
assert(trips === expected) |
|
} |
|
|
|
"rep#10 events2Trips" should "fold (enter, exit, exit) into 1 trip" in { |
|
val events = Seq( |
|
TelEventRecord(1, 2, "", 3, true, false), |
|
TelEventRecord(1801, 1802, "", 4, false, true), |
|
TelEventRecord(18010, 18020, "", 4, false, true)) |
|
val expected = Seq(TripRawData(1, 1801, 3, 4, false)) |
|
val trips = events2trips(events) |
|
assert(trips === expected) |
|
} |
|
|
|
"rep#10 events2Trips" should "ignore jiggle" in { |
|
val events = Seq( |
|
TelEventRecord(1, 2, "jgl", 3, true, false), |
|
TelEventRecord(3, 4, "jgl", 3, false, true), |
|
TelEventRecord(5, 6, "jgl", 3, true, false), |
|
TelEventRecord(1805, 1806, "jgl", 4, false, true)) |
|
val expected = Seq(TripRawData(1, 1805, 3, 4, false)) |
|
val trips = events2trips(events) |
|
assert(trips === expected) |
|
|
|
val events2 = Seq( |
|
TelEventRecord(1, 2, "", 3, true, false), |
|
TelEventRecord(1801, 1802, "", 4, false, true), |
|
TelEventRecord(1803, 1804, "", 4, true, false), |
|
TelEventRecord(1805, 1806, "", 4, false, true)) |
|
assert(events2trips(events2) === Seq(TripRawData(1, 1801, 3, 4, false))) |
|
|
|
val events3 = Seq( |
|
TelEventRecord(1, 2, "", 3, true, false), |
|
TelEventRecord(3, 4, "", 3, false, true), |
|
TelEventRecord(1801, 1802, "", 4, false, true)) |
|
assert(events2trips(events3) === Seq.empty[TripRawData]) |
|
|
|
val events4 = Seq( |
|
TelEventRecord(1, 2, "", 3, true, false), |
|
TelEventRecord(1801, 1802, "", 4, false, true), |
|
TelEventRecord(1803, 1804, "", 4, true, false)) |
|
assert(events2trips(events4) === Seq.empty[TripRawData]) |
|
} |
|
/* |
|
Говнокод лобового решения в императивном стиле (циклы, счетчики, буферы, мутабельные переменные) выглядит так: |
|
*/ |
|
/** |
|
* Filter interleaving events: enter, exit, ... |
|
* Copy to enter event 'metro' tag if any intermediate event have it. |
|
* @param srtd events sorted by time |
|
* @return enter, exit, ... sequence |
|
*/ |
|
def normalizeInterleave(srtd: Seq[TelEventRecord]): Seq[TelEventRecord] = { |
|
// only interleaving in/out events |
|
var metro = false |
|
var enter: TelEventRecord = null |
|
var res = Seq.empty[TelEventRecord] |
|
|
|
if (srtd.size < 2) srtd |
|
else { |
|
for (n <- 0 until srtd.size) { |
|
val curr = srtd(n) |
|
metro = (curr.transport == transName) |
|
if (curr.enter) { |
|
if (enter == null) enter = curr |
|
} |
|
else if (curr.exit) { |
|
if (enter != null) { |
|
if (metro) enter = enter.copy(transport = transName) |
|
res = (res :+ enter) :+ curr |
|
enter = null |
|
} |
|
} |
|
else { |
|
if (metro && enter != null) enter = enter.copy(transport = transName) |
|
} |
|
} |
|
if (enter == null) res else res :+ enter |
|
} |
|
} |
|
|
|
/** |
|
* Fold jiggle crossing: ignore short movements |
|
* @param srtd sorted by time events sequence |
|
* @return enter,exit, ... sequence w/o short trips |
|
*/ |
|
def normalizeJiggle(srtd: Seq[TelEventRecord]): Seq[TelEventRecord] = { |
|
// jiggle: if 2.ts - 1.ts < 30 min |
|
var temp = Seq.empty[TelEventRecord] |
|
var res = Seq.empty[TelEventRecord] |
|
|
|
if (srtd.size < 2) srtd |
|
else { |
|
var prev = srtd(0) |
|
temp = temp :+ prev |
|
for (n <- 1 until srtd.size) { |
|
val curr = srtd(n) |
|
if ((curr.bts - prev.bts) < jiggleLimit) { |
|
temp = temp :+ curr |
|
} |
|
else { |
|
val folded = if (temp.size % 2 == 0) Seq() else Seq(temp(0)) |
|
res = res ++ folded |
|
temp = Seq(curr) |
|
} |
|
prev = curr |
|
} |
|
val folded = if (temp.size % 2 == 0) Seq() else Seq(temp(0)) |
|
res ++ folded |
|
} |
|
} |
|
|
|
/** |
|
* Combine events sequence into trips sequence: |
|
* normalize in/out pairs, remove short trips |
|
* @param events events sequence |
|
* @return trips sequence |
|
*/ |
|
def events2trips(events: Seq[TelEventRecord]): Seq[TripRawData] = { |
|
var trips = Seq.empty[TripRawData] |
|
|
|
// remove cross-border repetitions: time to next cb event < 30 min |
|
def normalize(srtd: Seq[TelEventRecord]): Seq[TelEventRecord] = { |
|
val interleaved = normalizeInterleave(srtd) |
|
normalizeJiggle(interleaved) |
|
} |
|
|
|
// collect in,out pairs into trips |
|
var prev: TelEventRecord = null |
|
def checkEvent(n: Int, srtd: Seq[TelEventRecord]): Unit = { |
|
val curr = srtd(n) |
|
if (curr.enter) { if (prev == null) prev = curr } |
|
else if (curr.exit) { |
|
if (prev != null) { |
|
val metro = (prev.transport == transName || curr.transport == transName) |
|
val trip = TripRawData(prev.bts, curr.bts, prev.zid, curr.zid, metro) |
|
trips = trips :+ trip |
|
prev = null |
|
} |
|
} |
|
} |
|
|
|
val srtd = normalize(events.sortBy(e => e.bts)) |
|
if (srtd.size > 1) { |
|
for (n <- 0 until srtd.size) checkEvent(n, srtd) |
|
} |
|
|
|
trips.toVector |
|
} |
|
/* |
|
Задача: написать понятное, эффективное и красивое решение в функциональном стиле. |
|
*/ |