From 53e38fe73f941291fd20794c15c3bb7b104a4a17 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 16 May 2018 09:35:58 +0200 Subject: pwm: stm32: Add capture support Add support for PMW input mode on pwm-stm32. STM32 timers support period and duty cycle capture as long as they have at least two PWM channels. One capture channel is used for period (rising-edge), one for duty-cycle (falling-edge). When there's only one channel available, only period can be captured. Duty-cycle is simply zero'ed in such a case. Capture requires exclusive access (e.g. no pwm output running at the same time, to protect common prescaler). Timer DMA burst mode (from MFD core) is being used, to take two snapshots of capture registers (upon each period rising edge). Signed-off-by: Fabrice Gasnier Reviewed-by: Benjamin Gaignard Acked-by: Thierry Reding Signed-off-by: Lee Jones --- drivers/pwm/pwm-stm32.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) (limited to 'drivers/pwm') diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index 2708212933f7..ed3961b7b938 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -25,6 +25,7 @@ struct stm32_pwm { struct regmap *regmap; u32 max_arr; bool have_complementary_output; + u32 capture[4] ____cacheline_aligned; /* DMA'able buffer */ }; struct stm32_breakinput { @@ -62,6 +63,178 @@ static int write_ccrx(struct stm32_pwm *dev, int ch, u32 value) return -EINVAL; } +#define TIM_CCER_CC12P (TIM_CCER_CC1P | TIM_CCER_CC2P) +#define TIM_CCER_CC12E (TIM_CCER_CC1E | TIM_CCER_CC2E) +#define TIM_CCER_CC34P (TIM_CCER_CC3P | TIM_CCER_CC4P) +#define TIM_CCER_CC34E (TIM_CCER_CC3E | TIM_CCER_CC4E) + +/* + * Capture using PWM input mode: + * ___ ___ + * TI[1, 2, 3 or 4]: ........._| |________| + * ^0 ^1 ^2 + * . . . + * . . XXXXX + * . . XXXXX | + * . XXXXX . | + * XXXXX . . | + * COUNTER: ______XXXXX . . . |_XXX + * start^ . . . ^stop + * . . . . + * v v . v + * v + * CCR1/CCR3: tx..........t0...........t2 + * CCR2/CCR4: tx..............t1......... + * + * DMA burst transfer: | | + * v v + * DMA buffer: { t0, tx } { t2, t1 } + * DMA done: ^ + * + * 0: IC1/3 snapchot on rising edge: counter value -> CCR1/CCR3 + * + DMA transfer CCR[1/3] & CCR[2/4] values (t0, tx: doesn't care) + * 1: IC2/4 snapchot on falling edge: counter value -> CCR2/CCR4 + * 2: IC1/3 snapchot on rising edge: counter value -> CCR1/CCR3 + * + DMA transfer CCR[1/3] & CCR[2/4] values (t2, t1) + * + * DMA done, compute: + * - Period = t2 - t0 + * - Duty cycle = t1 - t0 + */ +static int stm32_pwm_raw_capture(struct stm32_pwm *priv, struct pwm_device *pwm, + unsigned long tmo_ms, u32 *raw_prd, + u32 *raw_dty) +{ + struct device *parent = priv->chip.dev->parent; + enum stm32_timers_dmas dma_id; + u32 ccen, ccr; + int ret; + + /* Ensure registers have been updated, enable counter and capture */ + regmap_update_bits(priv->regmap, TIM_EGR, TIM_EGR_UG, TIM_EGR_UG); + regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, TIM_CR1_CEN); + + /* Use cc1 or cc3 DMA resp for PWM input channels 1 & 2 or 3 & 4 */ + dma_id = pwm->hwpwm < 2 ? STM32_TIMERS_DMA_CH1 : STM32_TIMERS_DMA_CH3; + ccen = pwm->hwpwm < 2 ? TIM_CCER_CC12E : TIM_CCER_CC34E; + ccr = pwm->hwpwm < 2 ? TIM_CCR1 : TIM_CCR3; + regmap_update_bits(priv->regmap, TIM_CCER, ccen, ccen); + + /* + * Timer DMA burst mode. Request 2 registers, 2 bursts, to get both + * CCR1 & CCR2 (or CCR3 & CCR4) on each capture event. + * We'll get two capture snapchots: { CCR1, CCR2 }, { CCR1, CCR2 } + * or { CCR3, CCR4 }, { CCR3, CCR4 } + */ + ret = stm32_timers_dma_burst_read(parent, priv->capture, dma_id, ccr, 2, + 2, tmo_ms); + if (ret) + goto stop; + + /* Period: t2 - t0 (take care of counter overflow) */ + if (priv->capture[0] <= priv->capture[2]) + *raw_prd = priv->capture[2] - priv->capture[0]; + else + *raw_prd = priv->max_arr - priv->capture[0] + priv->capture[2]; + + /* Duty cycle capture requires at least two capture units */ + if (pwm->chip->npwm < 2) + *raw_dty = 0; + else if (priv->capture[0] <= priv->capture[3]) + *raw_dty = priv->capture[3] - priv->capture[0]; + else + *raw_dty = priv->max_arr - priv->capture[0] + priv->capture[3]; + + if (*raw_dty > *raw_prd) { + /* + * Race beetween PWM input and DMA: it may happen + * falling edge triggers new capture on TI2/4 before DMA + * had a chance to read CCR2/4. It means capture[1] + * contains period + duty_cycle. So, subtract period. + */ + *raw_dty -= *raw_prd; + } + +stop: + regmap_update_bits(priv->regmap, TIM_CCER, ccen, 0); + regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, 0); + + return ret; +} + +static int stm32_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_capture *result, unsigned long tmo_ms) +{ + struct stm32_pwm *priv = to_stm32_pwm_dev(chip); + unsigned long long prd, div, dty; + unsigned long rate; + unsigned int psc = 0; + u32 raw_prd, raw_dty; + int ret = 0; + + mutex_lock(&priv->lock); + + if (active_channels(priv)) { + ret = -EBUSY; + goto unlock; + } + + ret = clk_enable(priv->clk); + if (ret) { + dev_err(priv->chip.dev, "failed to enable counter clock\n"); + goto unlock; + } + + rate = clk_get_rate(priv->clk); + if (!rate) { + ret = -EINVAL; + goto clk_dis; + } + + /* prescaler: fit timeout window provided by upper layer */ + div = (unsigned long long)rate * (unsigned long long)tmo_ms; + do_div(div, MSEC_PER_SEC); + prd = div; + while ((div > priv->max_arr) && (psc < MAX_TIM_PSC)) { + psc++; + div = prd; + do_div(div, psc + 1); + } + regmap_write(priv->regmap, TIM_ARR, priv->max_arr); + regmap_write(priv->regmap, TIM_PSC, psc); + + /* Map TI1 or TI2 PWM input to IC1 & IC2 (or TI3/4 to IC3 & IC4) */ + regmap_update_bits(priv->regmap, + pwm->hwpwm < 2 ? TIM_CCMR1 : TIM_CCMR2, + TIM_CCMR_CC1S | TIM_CCMR_CC2S, pwm->hwpwm & 0x1 ? + TIM_CCMR_CC1S_TI2 | TIM_CCMR_CC2S_TI2 : + TIM_CCMR_CC1S_TI1 | TIM_CCMR_CC2S_TI1); + + /* Capture period on IC1/3 rising edge, duty cycle on IC2/4 falling. */ + regmap_update_bits(priv->regmap, TIM_CCER, pwm->hwpwm < 2 ? + TIM_CCER_CC12P : TIM_CCER_CC34P, pwm->hwpwm < 2 ? + TIM_CCER_CC2P : TIM_CCER_CC4P); + + ret = stm32_pwm_raw_capture(priv, pwm, tmo_ms, &raw_prd, &raw_dty); + if (ret) + goto stop; + + prd = (unsigned long long)raw_prd * (psc + 1) * NSEC_PER_SEC; + result->period = DIV_ROUND_UP_ULL(prd, rate); + dty = (unsigned long long)raw_dty * (psc + 1) * NSEC_PER_SEC; + result->duty_cycle = DIV_ROUND_UP_ULL(dty, rate); +stop: + regmap_write(priv->regmap, TIM_CCER, 0); + regmap_write(priv->regmap, pwm->hwpwm < 2 ? TIM_CCMR1 : TIM_CCMR2, 0); + regmap_write(priv->regmap, TIM_PSC, 0); +clk_dis: + clk_disable(priv->clk); +unlock: + mutex_unlock(&priv->lock); + + return ret; +} + static int stm32_pwm_config(struct stm32_pwm *priv, int ch, int duty_ns, int period_ns) { @@ -230,6 +403,9 @@ static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops stm32pwm_ops = { .owner = THIS_MODULE, .apply = stm32_pwm_apply_locked, +#if IS_ENABLED(CONFIG_DMA_ENGINE) + .capture = stm32_pwm_capture, +#endif }; static int stm32_pwm_set_breakinput(struct stm32_pwm *priv, -- cgit v1.2.3 From d66ffb91c374fc500fe666645184e278774bad38 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 16 May 2018 09:35:59 +0200 Subject: pwm: stm32: Improve capture by tuning counter prescaler Currently, capture is based on timeout window to configure prescaler. PWM capture framework provides 1s window at the time of writing. There's place for improvement, after input signal has been captured once: - Finer tune counter clock prescaler, by using 1st capture result (with arbitrary margin). - Do a 2nd capture, with scaled capture window. This increases accuracy, especially at high rates. Signed-off-by: Fabrice Gasnier Reviewed-by: Benjamin Gaignard Acked-by: Thierry Reding Signed-off-by: Lee Jones --- drivers/pwm/pwm-stm32.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'drivers/pwm') diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index ed3961b7b938..9a50acde1e61 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -168,7 +168,7 @@ static int stm32_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm, struct stm32_pwm *priv = to_stm32_pwm_dev(chip); unsigned long long prd, div, dty; unsigned long rate; - unsigned int psc = 0; + unsigned int psc = 0, scale; u32 raw_prd, raw_dty; int ret = 0; @@ -219,6 +219,28 @@ static int stm32_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm, if (ret) goto stop; + /* + * Got a capture. Try to improve accuracy at high rates: + * - decrease counter clock prescaler, scale up to max rate. + */ + if (raw_prd) { + u32 max_arr = priv->max_arr - 0x1000; /* arbitrary margin */ + + scale = max_arr / min(max_arr, raw_prd); + } else { + scale = priv->max_arr; /* bellow resolution, use max scale */ + } + + if (psc && scale > 1) { + /* 2nd measure with new scale */ + psc /= scale; + regmap_write(priv->regmap, TIM_PSC, psc); + ret = stm32_pwm_raw_capture(priv, pwm, tmo_ms, &raw_prd, + &raw_dty); + if (ret) + goto stop; + } + prd = (unsigned long long)raw_prd * (psc + 1) * NSEC_PER_SEC; result->period = DIV_ROUND_UP_ULL(prd, rate); dty = (unsigned long long)raw_dty * (psc + 1) * NSEC_PER_SEC; -- cgit v1.2.3 From ab3a897847834bf3e864fb07b733c444895a24ba Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 16 May 2018 09:36:00 +0200 Subject: pwm: stm32: Use input prescaler to improve period capture Using input prescaler, capture unit will trigger DMA once every configurable /2, /4 or /8 events (rising edge). This helps improve period (only) capture accuracy at high rates. Signed-off-by: Fabrice Gasnier Reviewed-by: Benjamin Gaignard Acked-by: Thierry Reding Signed-off-by: Lee Jones --- drivers/pwm/pwm-stm32.c | 63 ++++++++++++++++++++++++++++++++++++++-- include/linux/mfd/stm32-timers.h | 1 + 2 files changed, 62 insertions(+), 2 deletions(-) (limited to 'drivers/pwm') diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index 9a50acde1e61..60bfc07c4912 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -8,6 +8,7 @@ * pwm-atmel.c from Bo Shen */ +#include #include #include #include @@ -168,7 +169,7 @@ static int stm32_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm, struct stm32_pwm *priv = to_stm32_pwm_dev(chip); unsigned long long prd, div, dty; unsigned long rate; - unsigned int psc = 0, scale; + unsigned int psc = 0, icpsc, scale; u32 raw_prd, raw_dty; int ret = 0; @@ -222,6 +223,7 @@ static int stm32_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm, /* * Got a capture. Try to improve accuracy at high rates: * - decrease counter clock prescaler, scale up to max rate. + * - use input prescaler, capture once every /2 /4 or /8 edges. */ if (raw_prd) { u32 max_arr = priv->max_arr - 0x1000; /* arbitrary margin */ @@ -241,8 +243,65 @@ static int stm32_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm, goto stop; } + /* Compute intermediate period not to exceed timeout at low rates */ prd = (unsigned long long)raw_prd * (psc + 1) * NSEC_PER_SEC; - result->period = DIV_ROUND_UP_ULL(prd, rate); + do_div(prd, rate); + + for (icpsc = 0; icpsc < MAX_TIM_ICPSC ; icpsc++) { + /* input prescaler: also keep arbitrary margin */ + if (raw_prd >= (priv->max_arr - 0x1000) >> (icpsc + 1)) + break; + if (prd >= (tmo_ms * NSEC_PER_MSEC) >> (icpsc + 2)) + break; + } + + if (!icpsc) + goto done; + + /* Last chance to improve period accuracy, using input prescaler */ + regmap_update_bits(priv->regmap, + pwm->hwpwm < 2 ? TIM_CCMR1 : TIM_CCMR2, + TIM_CCMR_IC1PSC | TIM_CCMR_IC2PSC, + FIELD_PREP(TIM_CCMR_IC1PSC, icpsc) | + FIELD_PREP(TIM_CCMR_IC2PSC, icpsc)); + + ret = stm32_pwm_raw_capture(priv, pwm, tmo_ms, &raw_prd, &raw_dty); + if (ret) + goto stop; + + if (raw_dty >= (raw_prd >> icpsc)) { + /* + * We may fall here using input prescaler, when input + * capture starts on high side (before falling edge). + * Example with icpsc to capture on each 4 events: + * + * start 1st capture 2nd capture + * v v v + * ___ _____ _____ _____ _____ ____ + * TI1..4 |__| |__| |__| |__| |__| + * v v . . . . . v v + * icpsc1/3: . 0 . 1 . 2 . 3 . 0 + * icpsc2/4: 0 1 2 3 0 + * v v v v + * CCR1/3 ......t0..............................t2 + * CCR2/4 ..t1..............................t1'... + * . . . + * Capture0: .<----------------------------->. + * Capture1: .<-------------------------->. . + * . . . + * Period: .<------> . . + * Low side: .<>. + * + * Result: + * - Period = Capture0 / icpsc + * - Duty = Period - Low side = Period - (Capture0 - Capture1) + */ + raw_dty = (raw_prd >> icpsc) - (raw_prd - raw_dty); + } + +done: + prd = (unsigned long long)raw_prd * (psc + 1) * NSEC_PER_SEC; + result->period = DIV_ROUND_UP_ULL(prd, rate << icpsc); dty = (unsigned long long)raw_dty * (psc + 1) * NSEC_PER_SEC; result->duty_cycle = DIV_ROUND_UP_ULL(dty, rate); stop: diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index d46f5500928e..9da1d7ece079 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -82,6 +82,7 @@ #define TIM_DCR_DBL GENMASK(12, 8) /* DMA burst len */ #define MAX_TIM_PSC 0xFFFF +#define MAX_TIM_ICPSC 0x3 #define TIM_CR2_MMS_SHIFT 4 #define TIM_CR2_MMS2_SHIFT 20 #define TIM_SMCR_TS_SHIFT 4 -- cgit v1.2.3