summaryrefslogtreecommitdiffstats
path: root/drivers/tty
diff options
context:
space:
mode:
authorJan Kundrát <jan.kundrat@cesnet.cz>2017-12-08 23:51:33 +0100
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-12-19 09:59:02 +0100
commit2258761213cb239e5e6c11b4ec9b1700fcb4fdcd (patch)
tree3ad97776d3aad8056f8e7e144584968f74444d32 /drivers/tty
parent2b4bac48c10848ccffd484e7cd025dc085c1bd32 (diff)
downloadop-kernel-dev-2258761213cb239e5e6c11b4ec9b1700fcb4fdcd.zip
op-kernel-dev-2258761213cb239e5e6c11b4ec9b1700fcb4fdcd.tar.gz
serial: max310x: Reduce RX work starvation
Prior to this patch, the code would happily trigger TX on some ports before having a chance of reading the RX buffer from the rest of them. When no flow control was used, this led to RX buffer overruns and therefore lost data under certain circumstances. I was able to reproduce this with MAX14830 (that's a quad channel one) and a simple daisy-chain of RX and TX ports on the eval board: - TX0 -> RX1 - TX1 -> RX2 - TX2 -> RX3 - TX3 -> RX0 I was testing this by transferring 2MB of data at 115200 baud via each port. I used a Solidrun Clearfog Base (Armada 388) which was talking to the UART over an SPI bus clocked at 26MHz (the chip's maximum). Without this patch, I would always get a "Possible RX FIFO overrun" in dmesg, and fewer-than-expected amount of bytes received over ttyMAX0. Results on ttyMAX{1,2,3} tended to be correct all the time, even without the previous patches in this series and with PIO SPI transfers ("indirect mode" as the Marvell datasheet calls it), so I assume that heavy congestion is needed in order to reproduce this. A drawback of this patch is that the throughput gets reduced "a bit". Previously, a 115200 baud resulted in about 11.2kBps throughput as reported by a simple `pv`. With this patch, the throughput of four parallel streams is roughly 7kBps each, and 9kBps for three streams. There is no slowdown for one or two parallel streams. Situation is worse if bytes are being read one-by-one (such as if the userspace wants to perform parity/framing/break checking) and therefore without the batched reads. With just this patch and no other modifications on top of 4.14, I was only getting roughly 3.6kBps with four parallel streams. The single-stream performance was the same, and I was seeing about 7.2kBps with two parallel streams. `perf top` said that a substantial amount of time was spent in `finish_task_switch`, `_raw_spin_unlock_irqrestore` and `__timer_delay`. Signed-off-by: Jan Kundrát <jan.kundrat@cesnet.cz> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/tty')
-rw-r--r--drivers/tty/serial/max310x.c23
1 files changed, 10 insertions, 13 deletions
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index dc0e1ad..97576ff 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -753,6 +753,14 @@ static void max310x_handle_tx(struct uart_port *port)
uart_write_wakeup(port);
}
+static void max310x_start_tx(struct uart_port *port)
+{
+ struct max310x_one *one = container_of(port, struct max310x_one, port);
+
+ if (!work_pending(&one->tx_work))
+ schedule_work(&one->tx_work);
+}
+
static irqreturn_t max310x_port_irq(struct max310x_port *s, int portno)
{
struct uart_port *port = &s->p[portno].port;
@@ -776,11 +784,8 @@ static irqreturn_t max310x_port_irq(struct max310x_port *s, int portno)
}
if (rxlen)
max310x_handle_rx(port, rxlen);
- if (ists & MAX310X_IRQ_TXEMPTY_BIT) {
- mutex_lock(&s->mutex);
- max310x_handle_tx(port);
- mutex_unlock(&s->mutex);
- }
+ if (ists & MAX310X_IRQ_TXEMPTY_BIT)
+ max310x_start_tx(port);
} while (1);
return res;
}
@@ -820,14 +825,6 @@ static void max310x_wq_proc(struct work_struct *ws)
mutex_unlock(&s->mutex);
}
-static void max310x_start_tx(struct uart_port *port)
-{
- struct max310x_one *one = container_of(port, struct max310x_one, port);
-
- if (!work_pending(&one->tx_work))
- schedule_work(&one->tx_work);
-}
-
static unsigned int max310x_tx_empty(struct uart_port *port)
{
unsigned int lvl, sts;
OpenPOWER on IntegriCloud